From 6170e0b5ddbf636f554ca0109ebe99ba7c5d0c41 Mon Sep 17 00:00:00 2001 From: Liam Date: Wed, 19 Jun 2024 23:03:44 -0400 Subject: [PATCH 1/4] Replace Tesla with Req --- config/config.exs | 2 - .../artist_links/automatic_verifier.ex | 2 +- lib/philomena/channels/picarto_channel.ex | 2 +- lib/philomena/channels/piczel_channel.ex | 2 +- lib/philomena_proxy/http.ex | 83 +++++++++++-------- lib/philomena_proxy/scrapers/deviantart.ex | 8 +- lib/philomena_proxy/scrapers/pillowfort.ex | 2 +- lib/philomena_proxy/scrapers/raw.ex | 2 +- lib/philomena_proxy/scrapers/tumblr.ex | 4 +- lib/philomena_proxy/scrapers/twitter.ex | 2 +- .../plugs/compromised_password_check_plug.ex | 2 +- lib/philomena_web/plugs/scraper_plug.ex | 2 +- mix.exs | 4 +- mix.lock | 4 +- 14 files changed, 67 insertions(+), 54 deletions(-) diff --git a/config/config.exs b/config/config.exs index 9d943587..fbfedf78 100644 --- a/config/config.exs +++ b/config/config.exs @@ -46,8 +46,6 @@ config :phoenix, :template_engines, slime: PhoenixSlime.Engine, slimleex: PhoenixSlime.LiveViewEngine -config :tesla, adapter: Tesla.Adapter.Mint - # Configures Elixir's Logger config :logger, :console, format: "$time $metadata[$level] $message\n", diff --git a/lib/philomena/artist_links/automatic_verifier.ex b/lib/philomena/artist_links/automatic_verifier.ex index 57fd8fd2..f2a5bebd 100644 --- a/lib/philomena/artist_links/automatic_verifier.ex +++ b/lib/philomena/artist_links/automatic_verifier.ex @@ -12,7 +12,7 @@ defmodule Philomena.ArtistLinks.AutomaticVerifier do end end - defp contains_verification_code?({:ok, %Tesla.Env{body: body, status: 200}}, code) do + defp contains_verification_code?({:ok, %{body: body, status: 200}}, code) do String.contains?(body, code) end diff --git a/lib/philomena/channels/picarto_channel.ex b/lib/philomena/channels/picarto_channel.ex index a27a3615..1eacb28f 100644 --- a/lib/philomena/channels/picarto_channel.ex +++ b/lib/philomena/channels/picarto_channel.ex @@ -6,7 +6,7 @@ defmodule Philomena.Channels.PicartoChannel do @api_online |> PhilomenaProxy.Http.get() |> case do - {:ok, %Tesla.Env{body: body, status: 200}} -> + {:ok, %{body: body, status: 200}} -> body |> Jason.decode!() |> Map.new(&{&1["name"], fetch(&1, now)}) diff --git a/lib/philomena/channels/piczel_channel.ex b/lib/philomena/channels/piczel_channel.ex index 56da9e34..817dd486 100644 --- a/lib/philomena/channels/piczel_channel.ex +++ b/lib/philomena/channels/piczel_channel.ex @@ -6,7 +6,7 @@ defmodule Philomena.Channels.PiczelChannel do @api_online |> PhilomenaProxy.Http.get() |> case do - {:ok, %Tesla.Env{body: body, status: 200}} -> + {:ok, %{body: body, status: 200}} -> body |> Jason.decode!() |> Map.new(&{&1["slug"], fetch(&1, now)}) diff --git a/lib/philomena_proxy/http.ex b/lib/philomena_proxy/http.ex index 9a5af4ec..70172f0c 100644 --- a/lib/philomena_proxy/http.ex +++ b/lib/philomena_proxy/http.ex @@ -17,9 +17,13 @@ defmodule PhilomenaProxy.Http do @type url :: String.t() @type header_list :: [{String.t(), String.t()}] - @type body :: binary() + @type body :: iodata() + @type result :: {:ok, Req.Response.t()} | {:error, Exception.t()} - @type client_options :: keyword() + @user_agent "Mozilla/5.0 (X11; Philomena; Linux x86_64; rv:126.0) Gecko/20100101 Firefox/126.0" + @max_body 125_000_000 + + @max_body_key :resp_body_size @doc ~S""" Perform a HTTP GET request. @@ -27,15 +31,15 @@ defmodule PhilomenaProxy.Http do ## Example iex> PhilomenaProxy.Http.get("http://example.com", [{"authorization", "Bearer #{token}"}]) - {:ok, %Tesla.Env{...}} + {:ok, %{status: 200, body: ...}} iex> PhilomenaProxy.Http.get("http://nonexistent.example.com") - {:error, %Mint.TransportError{reason: :nxdomain}} + {:error, %Req.TransportError{reason: :nxdomain}} """ - @spec get(url(), header_list(), client_options()) :: Tesla.Env.result() - def get(url, headers \\ [], options \\ []) do - Tesla.get(client(headers), url, opts: [adapter: adapter_opts(options)]) + @spec get(url(), header_list()) :: result() + def get(url, headers \\ []) do + request(:get, url, [], headers) end @doc ~S""" @@ -44,15 +48,15 @@ defmodule PhilomenaProxy.Http do ## Example iex> PhilomenaProxy.Http.head("http://example.com", [{"authorization", "Bearer #{token}"}]) - {:ok, %Tesla.Env{...}} + {:ok, %{status: 200, body: ...}} iex> PhilomenaProxy.Http.head("http://nonexistent.example.com") - {:error, %Mint.TransportError{reason: :nxdomain}} + {:error, %Req.TransportError{reason: :nxdomain}} """ - @spec head(url(), header_list(), client_options()) :: Tesla.Env.result() - def head(url, headers \\ [], options \\ []) do - Tesla.head(client(headers), url, opts: [adapter: adapter_opts(options)]) + @spec head(url(), header_list()) :: result() + def head(url, headers \\ []) do + request(:head, url, [], headers) end @doc ~S""" @@ -61,26 +65,41 @@ defmodule PhilomenaProxy.Http do ## Example iex> PhilomenaProxy.Http.post("http://example.com", "", [{"authorization", "Bearer #{token}"}]) - {:ok, %Tesla.Env{...}} + {:ok, %{status: 200, body: ...}} iex> PhilomenaProxy.Http.post("http://nonexistent.example.com", "") - {:error, %Mint.TransportError{reason: :nxdomain}} + {:error, %Req.TransportError{reason: :nxdomain}} """ - @spec post(url(), body(), header_list(), client_options()) :: Tesla.Env.result() - def post(url, body, headers \\ [], options \\ []) do - Tesla.post(client(headers), url, body, opts: [adapter: adapter_opts(options)]) + @spec post(url(), body(), header_list()) :: result() + def post(url, body, headers \\ []) do + request(:post, url, body, headers) end - defp adapter_opts(opts) do - opts = Keyword.merge(opts, max_body: 125_000_000, inet6: true) + @spec request(atom(), String.t(), iodata(), header_list()) :: result() + defp request(method, url, body, headers) do + Req.new( + method: method, + url: url, + body: body, + headers: [{:user_agent, @user_agent} | headers], + max_redirects: 1, + connect_options: connect_options(), + inet6: true, + into: &stream_response_callback/2, + decode_body: false + ) + |> Req.Request.put_private(@max_body_key, 0) + |> Req.request() + end + defp connect_options do case Application.get_env(:philomena, :proxy_host) do nil -> - opts + [] url -> - Keyword.merge(opts, proxy: proxy_opts(URI.parse(url))) + [proxy: proxy_opts(URI.parse(url))] end end @@ -90,18 +109,14 @@ defmodule PhilomenaProxy.Http do defp proxy_opts(%{host: host, port: port, scheme: "http"}), do: {:http, host, port, [transport_opts: [inet6: true]]} - defp client(headers) do - Tesla.client( - [ - {Tesla.Middleware.FollowRedirects, max_redirects: 1}, - {Tesla.Middleware.Headers, - [ - {"User-Agent", - "Mozilla/5.0 (X11; Philomena; Linux x86_64; rv:86.0) Gecko/20100101 Firefox/86.0"} - | headers - ]} - ], - Tesla.Adapter.Mint - ) + defp stream_response_callback({:data, data}, {req, resp}) do + req = update_in(req.private[@max_body_key], &(&1 + byte_size(data))) + resp = update_in(resp.body, &<<&1::binary, data::binary>>) + + if req.private.resp_body_size < @max_body do + {:cont, {req, resp}} + else + {:halt, {req, RuntimeError.exception("body too big")}} + end end end diff --git a/lib/philomena_proxy/scrapers/deviantart.ex b/lib/philomena_proxy/scrapers/deviantart.ex index 10985133..138d67e1 100644 --- a/lib/philomena_proxy/scrapers/deviantart.ex +++ b/lib/philomena_proxy/scrapers/deviantart.ex @@ -38,7 +38,7 @@ defmodule PhilomenaProxy.Scrapers.Deviantart do |> try_old_hires!() end - defp extract_data!({:ok, %Tesla.Env{body: body, status: 200}}) do + defp extract_data!({:ok, %{body: body, status: 200}}) do [image] = Regex.run(@image_regex, body, capture: :all_but_first) [source] = Regex.run(@source_regex, body, capture: :all_but_first) [artist] = Regex.run(@artist_regex, source, capture: :all_but_first) @@ -60,7 +60,7 @@ defmodule PhilomenaProxy.Scrapers.Deviantart do with [domain, object_uuid, object_name] <- Regex.run(@cdnint_regex, image.url, capture: :all_but_first), built_url <- "#{domain}/intermediary/f/#{object_uuid}/#{object_name}", - {:ok, %Tesla.Env{status: 200}} <- PhilomenaProxy.Http.head(built_url) do + {:ok, %{status: 200}} <- PhilomenaProxy.Http.head(built_url) do # This is the high resolution URL. %{ data @@ -120,7 +120,7 @@ defmodule PhilomenaProxy.Scrapers.Deviantart do built_url = "http://orig01.deviantart.net/x_by_x-d#{base36}.png" case PhilomenaProxy.Http.get(built_url) do - {:ok, %Tesla.Env{status: 301, headers: headers}} -> + {:ok, %{status: 301, headers: headers}} -> # Location header provides URL of high res image. {_location, link} = Enum.find(headers, fn {header, _val} -> header == "location" end) @@ -145,7 +145,7 @@ defmodule PhilomenaProxy.Scrapers.Deviantart do defp follow_redirect(url, max_times) do case PhilomenaProxy.Http.get(url) do - {:ok, %Tesla.Env{headers: headers, status: code}} when code in [301, 302] -> + {:ok, %{headers: headers, status: code}} when code in [301, 302] -> location = Enum.find_value(headers, &location_header/1) follow_redirect(location, max_times - 1) diff --git a/lib/philomena_proxy/scrapers/pillowfort.ex b/lib/philomena_proxy/scrapers/pillowfort.ex index 6e083c9c..91c5a90d 100755 --- a/lib/philomena_proxy/scrapers/pillowfort.ex +++ b/lib/philomena_proxy/scrapers/pillowfort.ex @@ -24,7 +24,7 @@ defmodule PhilomenaProxy.Scrapers.Pillowfort do |> process_response!(url) end - defp json!({:ok, %Tesla.Env{body: body, status: 200}}), + defp json!({:ok, %{body: body, status: 200}}), do: Jason.decode!(body) defp process_response!(post_json, url) do diff --git a/lib/philomena_proxy/scrapers/raw.ex b/lib/philomena_proxy/scrapers/raw.ex index ed31d10b..a8c08d97 100644 --- a/lib/philomena_proxy/scrapers/raw.ex +++ b/lib/philomena_proxy/scrapers/raw.ex @@ -12,7 +12,7 @@ defmodule PhilomenaProxy.Scrapers.Raw do def can_handle?(_uri, url) do PhilomenaProxy.Http.head(url) |> case do - {:ok, %Tesla.Env{status: 200, headers: headers}} -> + {:ok, %{status: 200, headers: headers}} -> headers |> Enum.any?(fn {k, v} -> String.downcase(k) == "content-type" and String.downcase(v) in @mime_types diff --git a/lib/philomena_proxy/scrapers/tumblr.ex b/lib/philomena_proxy/scrapers/tumblr.ex index fe648e66..4863fb39 100644 --- a/lib/philomena_proxy/scrapers/tumblr.ex +++ b/lib/philomena_proxy/scrapers/tumblr.ex @@ -37,7 +37,7 @@ defmodule PhilomenaProxy.Scrapers.Tumblr do |> process_response!() end - defp json!({:ok, %Tesla.Env{body: body, status: 200}}), + defp json!({:ok, %{body: body, status: 200}}), do: Jason.decode!(body) defp process_response!(%{"response" => %{"posts" => [post | _rest]}}), @@ -76,7 +76,7 @@ defmodule PhilomenaProxy.Scrapers.Tumblr do end defp url_ok?(url) do - match?({:ok, %Tesla.Env{status: 200}}, PhilomenaProxy.Http.head(url)) + match?({:ok, %{status: 200}}, PhilomenaProxy.Http.head(url)) end defp add_meta(post, images) do diff --git a/lib/philomena_proxy/scrapers/twitter.ex b/lib/philomena_proxy/scrapers/twitter.ex index def1a374..a3b167f9 100644 --- a/lib/philomena_proxy/scrapers/twitter.ex +++ b/lib/philomena_proxy/scrapers/twitter.ex @@ -18,7 +18,7 @@ defmodule PhilomenaProxy.Scrapers.Twitter do [user, status_id] = Regex.run(@url_regex, url, capture: :all_but_first) api_url = "https://api.fxtwitter.com/#{user}/status/#{status_id}" - {:ok, %Tesla.Env{status: 200, body: body}} = PhilomenaProxy.Http.get(api_url) + {:ok, %{status: 200, body: body}} = PhilomenaProxy.Http.get(api_url) json = Jason.decode!(body) tweet = json["tweet"] diff --git a/lib/philomena_web/plugs/compromised_password_check_plug.ex b/lib/philomena_web/plugs/compromised_password_check_plug.ex index b46e597f..43fe2d4d 100644 --- a/lib/philomena_web/plugs/compromised_password_check_plug.ex +++ b/lib/philomena_web/plugs/compromised_password_check_plug.ex @@ -36,7 +36,7 @@ defmodule PhilomenaWeb.CompromisedPasswordCheckPlug do |> Base.encode16() case PhilomenaProxy.Http.get(make_api_url(prefix)) do - {:ok, %Tesla.Env{body: body, status: 200}} -> String.contains?(body, rest) + {:ok, %{body: body, status: 200}} -> String.contains?(body, rest) _ -> false end end diff --git a/lib/philomena_web/plugs/scraper_plug.ex b/lib/philomena_web/plugs/scraper_plug.ex index 2e4e1769..4694d084 100644 --- a/lib/philomena_web/plugs/scraper_plug.ex +++ b/lib/philomena_web/plugs/scraper_plug.ex @@ -26,7 +26,7 @@ defmodule PhilomenaWeb.ScraperPlug do # Writing the tempfile doesn't allow traversal # sobelow_skip ["Traversal.FileModule"] defp maybe_fixup_params( - {:ok, %Tesla.Env{body: body, status: 200, headers: headers}}, + {:ok, %{body: body, status: 200, headers: headers}}, url, opts, conn diff --git a/mix.exs b/mix.exs index f9cded72..dc6aae6e 100644 --- a/mix.exs +++ b/mix.exs @@ -64,9 +64,7 @@ defmodule Philomena.MixProject do {:redix, "~> 1.2"}, {:remote_ip, "~> 1.1"}, {:briefly, "~> 0.4"}, - {:tesla, "~> 1.5"}, - {:castore, "~> 1.0", override: true}, - {:mint, "~> 1.4"}, + {:req, "~> 0.5"}, {:exq, "~> 0.17"}, {:ex_aws, "~> 2.0", github: "liamwhite/ex_aws", ref: "a340859dd8ac4d63bd7a3948f0994e493e49bda4", override: true}, diff --git a/mix.lock b/mix.lock index 46729250..db8ea4b3 100644 --- a/mix.lock +++ b/mix.lock @@ -31,6 +31,7 @@ "expo": {:hex, :expo, "0.5.2", "beba786aab8e3c5431813d7a44b828e7b922bfa431d6bfbada0904535342efe2", [:mix], [], "hexpm", "8c9bfa06ca017c9cb4020fabe980bc7fdb1aaec059fd004c2ab3bff03b1c599c"}, "exq": {:hex, :exq, "0.19.0", "06eb92944dad39f0954dc8f63190d3e24d11734eef88cf5800883e57ebf74f3c", [:mix], [{:elixir_uuid, ">= 1.2.0", [hex: :elixir_uuid, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:poison, ">= 1.2.0 and < 6.0.0", [hex: :poison, repo: "hexpm", optional: true]}, {:redix, ">= 0.9.0", [hex: :redix, repo: "hexpm", optional: false]}], "hexpm", "24fc0ebdd87cc7406e1034fb46c2419f9c8a362f0ec634d23b6b819514d36390"}, "file_system": {:hex, :file_system, "1.0.0", "b689cc7dcee665f774de94b5a832e578bd7963c8e637ef940cd44327db7de2cd", [:mix], [], "hexpm", "6752092d66aec5a10e662aefeed8ddb9531d79db0bc145bb8c40325ca1d8536d"}, + "finch": {:hex, :finch, "0.18.0", "944ac7d34d0bd2ac8998f79f7a811b21d87d911e77a786bc5810adb75632ada4", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.3", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 0.2.6 or ~> 1.0", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "69f5045b042e531e53edc2574f15e25e735b522c37e2ddb766e15b979e03aa65"}, "gettext": {:hex, :gettext, "0.24.0", "6f4d90ac5f3111673cbefc4ebee96fe5f37a114861ab8c7b7d5b30a1108ce6d8", [:mix], [{:expo, "~> 0.5.1", [hex: :expo, repo: "hexpm", optional: false]}], "hexpm", "bdf75cdfcbe9e4622dd18e034b227d77dd17f0f133853a1c73b97b3d6c770e8b"}, "hackney": {:hex, :hackney, "1.20.1", "8d97aec62ddddd757d128bfd1df6c5861093419f8f7a4223823537bad5d064e2", [:rebar3], [{:certifi, "~> 2.12.0", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "~> 6.1.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "~> 1.0.0", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~> 1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:parse_trans, "3.4.1", [hex: :parse_trans, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "~> 1.1.0", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}, {:unicode_util_compat, "~> 0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "fe9094e5f1a2a2c0a7d10918fee36bfec0ec2a979994cff8cfe8058cd9af38e3"}, "hpax": {:hex, :hpax, "0.2.0", "5a58219adcb75977b2edce5eb22051de9362f08236220c9e859a47111c194ff5", [:mix], [], "hexpm", "bea06558cdae85bed075e6c036993d43cd54d447f76d8190a8db0dc5893fa2f1"}, @@ -51,6 +52,7 @@ "neotoma": {:hex, :neotoma, "1.7.3", "d8bd5404b73273989946e4f4f6d529e5c2088f5fa1ca790b4dbe81f4be408e61", [:rebar], [], "hexpm", "2da322b9b1567ffa0706a7f30f6bbbde70835ae44a1050615f4b4a3d436e0f28"}, "nimble_options": {:hex, :nimble_options, "1.1.1", "e3a492d54d85fc3fd7c5baf411d9d2852922f66e69476317787a7b2bb000a61b", [:mix], [], "hexpm", "821b2470ca9442c4b6984882fe9bb0389371b8ddec4d45a9504f00a66f650b44"}, "nimble_parsec": {:hex, :nimble_parsec, "1.4.0", "51f9b613ea62cfa97b25ccc2c1b4216e81df970acd8e16e8d1bdc58fef21370d", [:mix], [], "hexpm", "9c565862810fb383e9838c1dd2d7d2c437b3d13b267414ba6af33e50d2d1cf28"}, + "nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"}, "parse_trans": {:hex, :parse_trans, "3.4.1", "6e6aa8167cb44cc8f39441d05193be6e6f4e7c2946cb2759f015f8c56b76e5ff", [:rebar3], [], "hexpm", "620a406ce75dada827b82e453c19cf06776be266f5a67cff34e1ef2cbb60e49a"}, "pbkdf2": {:git, "https://github.com/basho/erlang-pbkdf2.git", "7e9bd5fcd3cc3062159e4c9214bb628aa6feb5ca", [ref: "7e9bd5fcd3cc3062159e4c9214bb628aa6feb5ca"]}, "phoenix": {:hex, :phoenix, "1.7.12", "1cc589e0eab99f593a8aa38ec45f15d25297dd6187ee801c8de8947090b5a9d3", [:mix], [{:castore, ">= 0.0.0", [hex: :castore, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:phoenix_pubsub, "~> 2.1", [hex: :phoenix_pubsub, repo: "hexpm", optional: false]}, {:phoenix_template, "~> 1.0", [hex: :phoenix_template, repo: "hexpm", optional: false]}, {:phoenix_view, "~> 2.0", [hex: :phoenix_view, repo: "hexpm", optional: true]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 2.7", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:plug_crypto, "~> 1.2 or ~> 2.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:websock_adapter, "~> 0.5.3", [hex: :websock_adapter, repo: "hexpm", optional: false]}], "hexpm", "d646192fbade9f485b01bc9920c139bfdd19d0f8df3d73fd8eaf2dfbe0d2837c"}, @@ -72,6 +74,7 @@ "ranch": {:hex, :ranch, "2.1.0", "2261f9ed9574dcfcc444106b9f6da155e6e540b2f82ba3d42b339b93673b72a3", [:make, :rebar3], [], "hexpm", "244ee3fa2a6175270d8e1fc59024fd9dbc76294a321057de8f803b1479e76916"}, "redix": {:hex, :redix, "1.5.1", "a2386971e69bf23630fb3a215a831b5478d2ee7dc9ea7ac811ed89186ab5d7b7", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:nimble_options, "~> 0.5.0 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "85224eb2b683c516b80d472eb89b76067d5866913bf0be59d646f550de71f5c4"}, "remote_ip": {:hex, :remote_ip, "1.2.0", "fb078e12a44414f4cef5a75963c33008fe169b806572ccd17257c208a7bc760f", [:mix], [{:combine, "~> 0.10", [hex: :combine, repo: "hexpm", optional: false]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm", "2ff91de19c48149ce19ed230a81d377186e4412552a597d6a5137373e5877cb7"}, + "req": {:hex, :req, "0.5.0", "6d8a77c25cfc03e06a439fb12ffb51beade53e3fe0e2c5e362899a18b50298b3", [:mix], [{:brotli, "~> 0.3.1", [hex: :brotli, repo: "hexpm", optional: true]}, {:ezstd, "~> 1.0", [hex: :ezstd, repo: "hexpm", optional: true]}, {:finch, "~> 0.17", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mime, "~> 1.6 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.0", [hex: :nimble_csv, repo: "hexpm", optional: true]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "dda04878c1396eebbfdec6db6f3d4ca609e5c8846b7ee88cc56eb9891406f7a3"}, "retry": {:hex, :retry, "0.18.0", "dc58ebe22c95aa00bc2459f9e0c5400e6005541cf8539925af0aa027dc860543", [:mix], [], "hexpm", "9483959cc7bf69c9e576d9dfb2b678b71c045d3e6f39ab7c9aa1489df4492d73"}, "rustler": {:hex, :rustler, "0.33.0", "4a5b0a7a7b0b51549bea49947beff6fae9bc5d5326104dcd4531261e876b5619", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:toml, "~> 0.6", [hex: :toml, repo: "hexpm", optional: false]}], "hexpm", "7c4752728fee59a815ffd20c3429c55b644041f25129b29cdeb5c470b80ec5fd"}, "scrivener": {:hex, :scrivener, "2.7.2", "1d913c965ec352650a7f864ad7fd8d80462f76a32f33d57d1e48bc5e9d40aba2", [:mix], [], "hexpm", "7866a0ec4d40274efbee1db8bead13a995ea4926ecd8203345af8f90d2b620d9"}, @@ -83,7 +86,6 @@ "sweet_xml": {:hex, :sweet_xml, "0.7.4", "a8b7e1ce7ecd775c7e8a65d501bc2cd933bff3a9c41ab763f5105688ef485d08", [:mix], [], "hexpm", "e7c4b0bdbf460c928234951def54fe87edf1a170f6896675443279e2dbeba167"}, "swoosh": {:hex, :swoosh, "1.16.9", "20c6a32ea49136a4c19f538e27739bb5070558c0fa76b8a95f4d5d5ca7d319a1", [:mix], [{:bandit, ">= 1.0.0", [hex: :bandit, repo: "hexpm", optional: true]}, {:cowboy, "~> 1.1 or ~> 2.4", [hex: :cowboy, repo: "hexpm", optional: true]}, {:ex_aws, "~> 2.1", [hex: :ex_aws, repo: "hexpm", optional: true]}, {:finch, "~> 0.6", [hex: :finch, repo: "hexpm", optional: true]}, {:gen_smtp, "~> 0.13 or ~> 1.0", [hex: :gen_smtp, repo: "hexpm", optional: true]}, {:hackney, "~> 1.9", [hex: :hackney, repo: "hexpm", optional: true]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mail, "~> 0.2", [hex: :mail, repo: "hexpm", optional: true]}, {:mime, "~> 1.1 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mua, "~> 0.2.0", [hex: :mua, repo: "hexpm", optional: true]}, {:multipart, "~> 0.4", [hex: :multipart, repo: "hexpm", optional: true]}, {:plug, "~> 1.9", [hex: :plug, repo: "hexpm", optional: true]}, {:plug_cowboy, ">= 1.0.0", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:req, "~> 0.5 or ~> 1.0", [hex: :req, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.2 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "878b1a7a6c10ebbf725a3349363f48f79c5e3d792eb621643b0d276a38acc0a6"}, "telemetry": {:hex, :telemetry, "1.2.1", "68fdfe8d8f05a8428483a97d7aab2f268aaff24b49e0f599faa091f1d4e7f61c", [:rebar3], [], "hexpm", "dad9ce9d8effc621708f99eac538ef1cbe05d6a874dd741de2e689c47feafed5"}, - "tesla": {:hex, :tesla, "1.9.0", "8c22db6a826e56a087eeb8cdef56889731287f53feeb3f361dec5d4c8efb6f14", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:exjsx, ">= 3.0.0", [hex: :exjsx, repo: "hexpm", optional: true]}, {:finch, "~> 0.13", [hex: :finch, repo: "hexpm", optional: true]}, {:fuse, "~> 2.4", [hex: :fuse, repo: "hexpm", optional: true]}, {:gun, ">= 1.0.0", [hex: :gun, repo: "hexpm", optional: true]}, {:hackney, "~> 1.6", [hex: :hackney, repo: "hexpm", optional: true]}, {:ibrowse, "4.4.2", [hex: :ibrowse, repo: "hexpm", optional: true]}, {:jason, ">= 1.0.0", [hex: :jason, repo: "hexpm", optional: true]}, {:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.0", [hex: :mint, repo: "hexpm", optional: true]}, {:msgpax, "~> 2.3", [hex: :msgpax, repo: "hexpm", optional: true]}, {:poison, ">= 1.0.0", [hex: :poison, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: true]}], "hexpm", "7c240c67e855f7e63e795bf16d6b3f5115a81d1f44b7fe4eadbf656bae0fef8a"}, "toml": {:hex, :toml, "0.7.0", "fbcd773caa937d0c7a02c301a1feea25612720ac3fa1ccb8bfd9d30d822911de", [:mix], [], "hexpm", "0690246a2478c1defd100b0c9b89b4ea280a22be9a7b313a8a058a2408a2fa70"}, "unicode_util_compat": {:hex, :unicode_util_compat, "0.7.0", "bc84380c9ab48177092f43ac89e4dfa2c6d62b40b8bd132b1059ecc7232f9a78", [:rebar3], [], "hexpm", "25eee6d67df61960cf6a794239566599b09e17e668d3700247bc498638152521"}, "websock": {:hex, :websock, "0.5.3", "2f69a6ebe810328555b6fe5c831a851f485e303a7c8ce6c5f675abeb20ebdadc", [:mix], [], "hexpm", "6105453d7fac22c712ad66fab1d45abdf049868f253cf719b625151460b8b453"}, From 29dc68c7141666a9a4fa4b18020fec33cc885da2 Mon Sep 17 00:00:00 2001 From: Liam Date: Wed, 19 Jun 2024 23:35:56 -0400 Subject: [PATCH 2/4] Add Req backend for ExAws --- config/runtime.exs | 12 +++--------- lib/philomena_media/req.ex | 31 +++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 9 deletions(-) create mode 100644 lib/philomena_media/req.ex diff --git a/config/runtime.exs b/config/runtime.exs index 935ba124..190a1da7 100644 --- a/config/runtime.exs +++ b/config/runtime.exs @@ -74,8 +74,7 @@ config :philomena, :s3_primary_options, host: System.fetch_env!("S3_HOST"), port: System.fetch_env!("S3_PORT"), access_key_id: System.fetch_env!("AWS_ACCESS_KEY_ID"), - secret_access_key: System.fetch_env!("AWS_SECRET_ACCESS_KEY"), - http_opts: [timeout: 180_000, recv_timeout: 180_000] + secret_access_key: System.fetch_env!("AWS_SECRET_ACCESS_KEY") config :philomena, :s3_primary_bucket, System.fetch_env!("S3_BUCKET") @@ -85,8 +84,7 @@ config :philomena, :s3_secondary_options, host: System.get_env("ALT_S3_HOST"), port: System.get_env("ALT_S3_PORT"), access_key_id: System.get_env("ALT_AWS_ACCESS_KEY_ID"), - secret_access_key: System.get_env("ALT_AWS_SECRET_ACCESS_KEY"), - http_opts: [timeout: 180_000, recv_timeout: 180_000] + secret_access_key: System.get_env("ALT_AWS_SECRET_ACCESS_KEY") config :philomena, :s3_secondary_bucket, System.get_env("ALT_S3_BUCKET") @@ -94,11 +92,7 @@ config :philomena, :s3_secondary_bucket, System.get_env("ALT_S3_BUCKET") config :elastix, httpoison_options: [ssl: [verify: :verify_none]] -config :ex_aws, :hackney_opts, - timeout: 180_000, - recv_timeout: 180_000, - use_default_pool: false, - pool: false +config :ex_aws, http_client: PhilomenaMedia.Req config :ex_aws, :retries, max_attempts: 20, diff --git a/lib/philomena_media/req.ex b/lib/philomena_media/req.ex new file mode 100644 index 00000000..ff92d949 --- /dev/null +++ b/lib/philomena_media/req.ex @@ -0,0 +1,31 @@ +defmodule PhilomenaMedia.Req do + @behaviour ExAws.Request.HttpClient + + @moduledoc """ + Configuration for `m:Req`. + + Options can be set for `m:Req` with the following config: + + config :philomena, :req_opts, + receive_timeout: 30_000 + + The default config handles setting the above. + """ + + @default_opts [receive_timeout: 30_000] + + @impl true + def request(method, url, body \\ "", headers \\ [], http_opts \\ []) do + [method: method, url: url, body: body, headers: headers, decode_body: false] + |> Keyword.merge(Application.get_env(:philomena, :req_opts, @default_opts)) + |> Keyword.merge(http_opts) + |> Req.request() + |> case do + {:ok, %{status: status, headers: headers, body: body}} -> + {:ok, %{status_code: status, headers: headers, body: body}} + + {:error, reason} -> + {:error, %{reason: reason}} + end + end +end From 44c160b905cb7e409b1c0cfc11cd4279cea5d848 Mon Sep 17 00:00:00 2001 From: Liam Date: Wed, 19 Jun 2024 23:57:00 -0400 Subject: [PATCH 3/4] Remove extremely outdated redirect follower from DA scraper --- lib/philomena_proxy/scrapers/deviantart.ex | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/lib/philomena_proxy/scrapers/deviantart.ex b/lib/philomena_proxy/scrapers/deviantart.ex index 138d67e1..d292e8aa 100644 --- a/lib/philomena_proxy/scrapers/deviantart.ex +++ b/lib/philomena_proxy/scrapers/deviantart.ex @@ -31,7 +31,7 @@ defmodule PhilomenaProxy.Scrapers.Deviantart do @spec scrape(URI.t(), Scrapers.url()) :: Scrapers.scrape_result() def scrape(_uri, url) do url - |> follow_redirect(2) + |> PhilomenaProxy.Http.get() |> extract_data!() |> try_intermediary_hires!() |> try_new_hires!() @@ -139,22 +139,4 @@ defmodule PhilomenaProxy.Scrapers.Deviantart do data end end - - # Workaround for benoitc/hackney#273 - defp follow_redirect(_url, 0), do: nil - - defp follow_redirect(url, max_times) do - case PhilomenaProxy.Http.get(url) do - {:ok, %{headers: headers, status: code}} when code in [301, 302] -> - location = Enum.find_value(headers, &location_header/1) - follow_redirect(location, max_times - 1) - - response -> - response - end - end - - defp location_header({"Location", location}), do: location - defp location_header({"location", location}), do: location - defp location_header(_), do: nil end From a344062d533efebb7f8f51b1d5fa1a8e4edf6117 Mon Sep 17 00:00:00 2001 From: Liam Date: Thu, 20 Jun 2024 19:22:22 -0400 Subject: [PATCH 4/4] Update response header usages for list format --- lib/philomena_proxy/scrapers/deviantart.ex | 34 --------------------- lib/philomena_proxy/scrapers/raw.ex | 12 +++----- lib/philomena_web/plugs/scraper_plug.ex | 35 ++++++++++------------ 3 files changed, 19 insertions(+), 62 deletions(-) diff --git a/lib/philomena_proxy/scrapers/deviantart.ex b/lib/philomena_proxy/scrapers/deviantart.ex index d292e8aa..cf8009d0 100644 --- a/lib/philomena_proxy/scrapers/deviantart.ex +++ b/lib/philomena_proxy/scrapers/deviantart.ex @@ -9,7 +9,6 @@ defmodule PhilomenaProxy.Scrapers.Deviantart do @image_regex ~r|data-rh="true" rel="preload" href="([^"]*)" as="image"| @source_regex ~r|rel="canonical" href="([^"]*)"| @artist_regex ~r|https://www.deviantart.com/([^/]*)/art| - @serial_regex ~r|https://www.deviantart.com/(?:.*?)-(\d+)\z| @cdnint_regex ~r|(https://images-wixmp-[0-9a-f]+.wixmp.com)(?:/intermediary)?/f/([^/]*)/([^/?]*)| @png_regex ~r|(https://[0-9a-z\-\.]+(?:/intermediary)?/f/[0-9a-f\-]+/[0-9a-z\-]+\.png/v1/fill/[0-9a-z_,]+/[0-9a-z_\-]+)(\.png)(.*)| @jpg_regex ~r|(https://[0-9a-z\-\.]+(?:/intermediary)?/f/[0-9a-f\-]+/[0-9a-z\-]+\.jpg/v1/fill/w_[0-9]+,h_[0-9]+,q_)([0-9]+)(,[a-z]+\/[a-z0-6_\-]+\.jpe?g.*)| @@ -35,7 +34,6 @@ defmodule PhilomenaProxy.Scrapers.Deviantart do |> extract_data!() |> try_intermediary_hires!() |> try_new_hires!() - |> try_old_hires!() end defp extract_data!({:ok, %{body: body, status: 200}}) do @@ -107,36 +105,4 @@ defmodule PhilomenaProxy.Scrapers.Deviantart do data end end - - defp try_old_hires!(%{source_url: source, images: [image]} = data) do - [serial] = Regex.run(@serial_regex, source, capture: :all_but_first) - - base36 = - serial - |> String.to_integer() - |> Integer.to_string(36) - |> String.downcase() - - built_url = "http://orig01.deviantart.net/x_by_x-d#{base36}.png" - - case PhilomenaProxy.Http.get(built_url) do - {:ok, %{status: 301, headers: headers}} -> - # Location header provides URL of high res image. - {_location, link} = Enum.find(headers, fn {header, _val} -> header == "location" end) - - %{ - data - | images: [ - %{ - url: link, - camo_url: image.camo_url - } - ] - } - - _ -> - # Nothing to be found here, move along... - data - end - end end diff --git a/lib/philomena_proxy/scrapers/raw.ex b/lib/philomena_proxy/scrapers/raw.ex index a8c08d97..a6985444 100644 --- a/lib/philomena_proxy/scrapers/raw.ex +++ b/lib/philomena_proxy/scrapers/raw.ex @@ -10,14 +10,10 @@ defmodule PhilomenaProxy.Scrapers.Raw do @spec can_handle?(URI.t(), String.t()) :: boolean() def can_handle?(_uri, url) do - PhilomenaProxy.Http.head(url) - |> case do - {:ok, %{status: 200, headers: headers}} -> - headers - |> Enum.any?(fn {k, v} -> - String.downcase(k) == "content-type" and String.downcase(v) in @mime_types - end) - + with {:ok, %{status: 200, headers: headers}} <- PhilomenaProxy.Http.head(url), + [type | _] <- headers["content-type"] do + String.downcase(type) in @mime_types + else _ -> false end diff --git a/lib/philomena_web/plugs/scraper_plug.ex b/lib/philomena_web/plugs/scraper_plug.ex index 4694d084..c8064d69 100644 --- a/lib/philomena_web/plugs/scraper_plug.ex +++ b/lib/philomena_web/plugs/scraper_plug.ex @@ -1,10 +1,12 @@ defmodule PhilomenaWeb.ScraperPlug do @filename_regex ~r/filename="([^"]+)"/ + @spec init(keyword()) :: keyword() def init(opts) do opts end + @spec call(Plug.Conn.t(), keyword()) :: Plug.Conn.t() def call(conn, opts) do params_name = Keyword.get(opts, :params_name, "image") params_key = Keyword.get(opts, :params_key, "image") @@ -25,18 +27,13 @@ defmodule PhilomenaWeb.ScraperPlug do # Writing the tempfile doesn't allow traversal # sobelow_skip ["Traversal.FileModule"] - defp maybe_fixup_params( - {:ok, %{body: body, status: 200, headers: headers}}, - url, - opts, - conn - ) do + defp maybe_fixup_params({:ok, %{status: 200} = resp}, url, opts, conn) do params_name = Keyword.get(opts, :params_name, "image") params_key = Keyword.get(opts, :params_key, "image") - name = extract_filename(url, headers) + name = extract_filename(url, resp.headers) file = Plug.Upload.random_file!(UUID.uuid1()) - File.write!(file, body) + File.write!(file, resp.body) fake_upload = %Plug.Upload{ path: file, @@ -44,22 +41,20 @@ defmodule PhilomenaWeb.ScraperPlug do filename: name } - updated_form = Map.put(conn.params[params_name], params_key, fake_upload) - - updated_params = Map.put(conn.params, params_name, updated_form) - - %Plug.Conn{conn | params: updated_params} + put_in(conn.params[params_name][params_key], fake_upload) end defp maybe_fixup_params(_response, _url, _opts, conn), do: conn - defp extract_filename(url, resp_headers) do - {_, header} = - Enum.find(resp_headers, {nil, "filename=\"#{Path.basename(url)}\""}, fn {key, value} -> - key == "content-disposition" and Regex.match?(@filename_regex, value) - end) - - [name] = Regex.run(@filename_regex, header, capture: :all_but_first) + defp extract_filename(url, headers) do + name = + with [value | _] <- headers["content-disposition"], + [name] <- Regex.run(@filename_regex, value, capture: :all_but_first) do + name + else + _ -> + Path.basename(url) + end String.slice(name, 0, 127) end