From c63bc41d8b15947e1b1d86d650f99b30eb97aaeb Mon Sep 17 00:00:00 2001 From: Liam Date: Fri, 24 May 2024 21:15:05 -0400 Subject: [PATCH] Split out HTTP client interaction into PhilomenaProxy namespace --- lib/camo/image.ex | 8 -- .../artist_links/automatic_verifier.ex | 2 +- lib/philomena/channels/picarto_channel.ex | 2 +- lib/philomena/channels/piczel_channel.ex | 2 +- lib/philomena/http.ex | 46 -------- lib/philomena/scrapers.ex | 25 ---- lib/philomena_proxy/camo.ex | 24 ++++ lib/philomena_proxy/http.ex | 107 ++++++++++++++++++ lib/philomena_proxy/scrapers.ex | 71 ++++++++++++ .../scrapers/deviantart.ex | 21 +++- .../scrapers/pillowfort.ex | 14 ++- .../scrapers/raw.ex | 18 ++- lib/philomena_proxy/scrapers/scraper.ex | 11 ++ .../scrapers/tumblr.ex | 18 ++- .../scrapers/twitter.ex | 16 ++- .../controllers/image/scrape_controller.ex | 2 +- lib/philomena_web/plugs/check_captcha_plug.ex | 2 +- .../plugs/compromised_password_check_plug.ex | 2 +- lib/philomena_web/plugs/scraper_plug.ex | 2 +- lib/philomena_web/views/channel_view.ex | 12 +- priv/repo/seeds_development.exs | 2 +- 21 files changed, 294 insertions(+), 113 deletions(-) delete mode 100644 lib/camo/image.ex delete mode 100644 lib/philomena/http.ex delete mode 100644 lib/philomena/scrapers.ex create mode 100644 lib/philomena_proxy/camo.ex create mode 100644 lib/philomena_proxy/http.ex create mode 100644 lib/philomena_proxy/scrapers.ex rename lib/{philomena => philomena_proxy}/scrapers/deviantart.ex (89%) rename lib/{philomena => philomena_proxy}/scrapers/pillowfort.ex (78%) rename lib/{philomena => philomena_proxy}/scrapers/raw.ex (56%) create mode 100644 lib/philomena_proxy/scrapers/scraper.ex rename lib/{philomena => philomena_proxy}/scrapers/tumblr.ex (84%) rename lib/{philomena => philomena_proxy}/scrapers/twitter.ex (61%) diff --git a/lib/camo/image.ex b/lib/camo/image.ex deleted file mode 100644 index e77f99e6..00000000 --- a/lib/camo/image.ex +++ /dev/null @@ -1,8 +0,0 @@ -defmodule Camo.Image do - @doc """ - Convert a potentially untrusted external image URL into a trusted one - loaded through a gocamo proxy (specified by the environment). - """ - @spec image_url(String.t()) :: String.t() - def image_url(input), do: Philomena.Native.camo_image_url(input) -end diff --git a/lib/philomena/artist_links/automatic_verifier.ex b/lib/philomena/artist_links/automatic_verifier.ex index 1fd303a4..57fd8fd2 100644 --- a/lib/philomena/artist_links/automatic_verifier.ex +++ b/lib/philomena/artist_links/automatic_verifier.ex @@ -1,7 +1,7 @@ defmodule Philomena.ArtistLinks.AutomaticVerifier do def check_link(artist_link, recheck_time) do artist_link.uri - |> Philomena.Http.get() + |> PhilomenaProxy.Http.get() |> contains_verification_code?(artist_link.verification_code) |> case do true -> diff --git a/lib/philomena/channels/picarto_channel.ex b/lib/philomena/channels/picarto_channel.ex index cc54cdd6..a27a3615 100644 --- a/lib/philomena/channels/picarto_channel.ex +++ b/lib/philomena/channels/picarto_channel.ex @@ -4,7 +4,7 @@ defmodule Philomena.Channels.PicartoChannel do @spec live_channels(DateTime.t()) :: map() def live_channels(now) do @api_online - |> Philomena.Http.get() + |> PhilomenaProxy.Http.get() |> case do {:ok, %Tesla.Env{body: body, status: 200}} -> body diff --git a/lib/philomena/channels/piczel_channel.ex b/lib/philomena/channels/piczel_channel.ex index 23ce8a0d..56da9e34 100644 --- a/lib/philomena/channels/piczel_channel.ex +++ b/lib/philomena/channels/piczel_channel.ex @@ -4,7 +4,7 @@ defmodule Philomena.Channels.PiczelChannel do @spec live_channels(DateTime.t()) :: map() def live_channels(now) do @api_online - |> Philomena.Http.get() + |> PhilomenaProxy.Http.get() |> case do {:ok, %Tesla.Env{body: body, status: 200}} -> body diff --git a/lib/philomena/http.ex b/lib/philomena/http.ex deleted file mode 100644 index 738d8a11..00000000 --- a/lib/philomena/http.ex +++ /dev/null @@ -1,46 +0,0 @@ -defmodule Philomena.Http do - def get(url, headers \\ [], options \\ []) do - Tesla.get(client(headers), url, opts: [adapter: adapter_opts(options)]) - end - - def head(url, headers \\ [], options \\ []) do - Tesla.head(client(headers), url, opts: [adapter: adapter_opts(options)]) - end - - def post(url, body, headers \\ [], options \\ []) do - Tesla.post(client(headers), url, body, opts: [adapter: adapter_opts(options)]) - end - - defp adapter_opts(opts) do - opts = Keyword.merge(opts, max_body: 125_000_000, inet6: true) - - case Application.get_env(:philomena, :proxy_host) do - nil -> - opts - - url -> - Keyword.merge(opts, proxy: proxy_opts(URI.parse(url))) - end - end - - defp proxy_opts(%{host: host, port: port, scheme: "https"}), - do: {:https, host, port, [transport_opts: [inet6: true]]} - - defp proxy_opts(%{host: host, port: port, scheme: "http"}), - do: {:http, host, port, [transport_opts: [inet6: true]]} - - defp client(headers) do - Tesla.client( - [ - {Tesla.Middleware.FollowRedirects, max_redirects: 1}, - {Tesla.Middleware.Headers, - [ - {"User-Agent", - "Mozilla/5.0 (X11; Philomena; Linux x86_64; rv:86.0) Gecko/20100101 Firefox/86.0"} - | headers - ]} - ], - Tesla.Adapter.Mint - ) - end -end diff --git a/lib/philomena/scrapers.ex b/lib/philomena/scrapers.ex deleted file mode 100644 index da5fd381..00000000 --- a/lib/philomena/scrapers.ex +++ /dev/null @@ -1,25 +0,0 @@ -defmodule Philomena.Scrapers do - @scrapers [ - Philomena.Scrapers.Deviantart, - Philomena.Scrapers.Pillowfort, - Philomena.Scrapers.Twitter, - Philomena.Scrapers.Tumblr, - Philomena.Scrapers.Raw - ] - - def scrape!(url) do - uri = URI.parse(url) - - @scrapers - |> Enum.find(& &1.can_handle?(uri, url)) - |> wrap() - |> Enum.map(& &1.scrape(uri, url)) - |> unwrap() - end - - defp wrap(nil), do: [] - defp wrap(res), do: [res] - - defp unwrap([result]), do: result - defp unwrap(_result), do: nil -end diff --git a/lib/philomena_proxy/camo.ex b/lib/philomena_proxy/camo.ex new file mode 100644 index 00000000..881b1c39 --- /dev/null +++ b/lib/philomena_proxy/camo.ex @@ -0,0 +1,24 @@ +defmodule PhilomenaProxy.Camo do + @moduledoc """ + Image proxying utilities. + """ + + @doc """ + Convert a potentially untrusted external image URL into a trusted one + loaded through a gocamo proxy (specified by the environment). + + Configuration is read from environment variables at runtime by Philomena. + + config :philomena, + camo_host: System.get_env("CAMO_HOST"), + camo_key: System.get_env("CAMO_KEY"), + + ## Example + + iex> PhilomenaProxy.Camo.image_url("https://example.org/img/view/2024/1/1/1.png") + "https://example.net/L5MqSmYq1ZEqiBGGvsvSDpILyJI/aHR0cHM6Ly9leGFtcGxlLm9yZy9pbWcvdmlldy8yMDI0LzEvMS8xLnBuZwo" + + """ + @spec image_url(String.t()) :: String.t() + def image_url(input), do: Philomena.Native.camo_image_url(input) +end diff --git a/lib/philomena_proxy/http.ex b/lib/philomena_proxy/http.ex new file mode 100644 index 00000000..9a5af4ec --- /dev/null +++ b/lib/philomena_proxy/http.ex @@ -0,0 +1,107 @@ +defmodule PhilomenaProxy.Http do + @moduledoc """ + HTTP client implementation. + + This applies the Philomena User-Agent header, and optionally proxies traffic through a SOCKS5 + HTTP proxy to allow the application to connect when the local network is restricted. + + If a proxy host is not specified in the configuration, then a proxy is not used and external + traffic is originated from the same network as application. + + Proxy options are read from environment variables at runtime by Philomena. + + config :philomena, + proxy_host: System.get_env("PROXY_HOST"), + + """ + + @type url :: String.t() + @type header_list :: [{String.t(), String.t()}] + @type body :: binary() + + @type client_options :: keyword() + + @doc ~S""" + Perform a HTTP GET request. + + ## Example + + iex> PhilomenaProxy.Http.get("http://example.com", [{"authorization", "Bearer #{token}"}]) + {:ok, %Tesla.Env{...}} + + iex> PhilomenaProxy.Http.get("http://nonexistent.example.com") + {:error, %Mint.TransportError{reason: :nxdomain}} + + """ + @spec get(url(), header_list(), client_options()) :: Tesla.Env.result() + def get(url, headers \\ [], options \\ []) do + Tesla.get(client(headers), url, opts: [adapter: adapter_opts(options)]) + end + + @doc ~S""" + Perform a HTTP HEAD request. + + ## Example + + iex> PhilomenaProxy.Http.head("http://example.com", [{"authorization", "Bearer #{token}"}]) + {:ok, %Tesla.Env{...}} + + iex> PhilomenaProxy.Http.head("http://nonexistent.example.com") + {:error, %Mint.TransportError{reason: :nxdomain}} + + """ + @spec head(url(), header_list(), client_options()) :: Tesla.Env.result() + def head(url, headers \\ [], options \\ []) do + Tesla.head(client(headers), url, opts: [adapter: adapter_opts(options)]) + end + + @doc ~S""" + Perform a HTTP POST request. + + ## Example + + iex> PhilomenaProxy.Http.post("http://example.com", "", [{"authorization", "Bearer #{token}"}]) + {:ok, %Tesla.Env{...}} + + iex> PhilomenaProxy.Http.post("http://nonexistent.example.com", "") + {:error, %Mint.TransportError{reason: :nxdomain}} + + """ + @spec post(url(), body(), header_list(), client_options()) :: Tesla.Env.result() + def post(url, body, headers \\ [], options \\ []) do + Tesla.post(client(headers), url, body, opts: [adapter: adapter_opts(options)]) + end + + defp adapter_opts(opts) do + opts = Keyword.merge(opts, max_body: 125_000_000, inet6: true) + + case Application.get_env(:philomena, :proxy_host) do + nil -> + opts + + url -> + Keyword.merge(opts, proxy: proxy_opts(URI.parse(url))) + end + end + + defp proxy_opts(%{host: host, port: port, scheme: "https"}), + do: {:https, host, port, [transport_opts: [inet6: true]]} + + defp proxy_opts(%{host: host, port: port, scheme: "http"}), + do: {:http, host, port, [transport_opts: [inet6: true]]} + + defp client(headers) do + Tesla.client( + [ + {Tesla.Middleware.FollowRedirects, max_redirects: 1}, + {Tesla.Middleware.Headers, + [ + {"User-Agent", + "Mozilla/5.0 (X11; Philomena; Linux x86_64; rv:86.0) Gecko/20100101 Firefox/86.0"} + | headers + ]} + ], + Tesla.Adapter.Mint + ) + end +end diff --git a/lib/philomena_proxy/scrapers.ex b/lib/philomena_proxy/scrapers.ex new file mode 100644 index 00000000..9a166887 --- /dev/null +++ b/lib/philomena_proxy/scrapers.ex @@ -0,0 +1,71 @@ +defmodule PhilomenaProxy.Scrapers do + @moduledoc """ + Scrape utilities to facilitate uploading media from other websites. + """ + + # The URL to fetch, as a string. + @type url :: String.t() + + # An individual image in a list associated with a scrape result. + @type image_result :: %{ + url: url(), + camo_url: url() + } + + # Result of a successful scrape. + @type scrape_result :: %{ + source_url: url(), + description: String.t() | nil, + author_name: String.t() | nil, + images: [image_result()] + } + + @scrapers [ + PhilomenaProxy.Scrapers.Deviantart, + PhilomenaProxy.Scrapers.Pillowfort, + PhilomenaProxy.Scrapers.Twitter, + PhilomenaProxy.Scrapers.Tumblr, + PhilomenaProxy.Scrapers.Raw + ] + + @doc """ + Scrape a URL for content. + + The scrape result is intended for serialization to JSON. + + ## Examples + + iex> PhilomenaProxy.Scrapers.scrape!("http://example.org/image-page") + %{ + source_url: "http://example.org/image-page", + description: "Test", + author_name: "myself", + images: [ + %{ + url: "http://example.org/image.png" + camo_url: "http://example.net/UT2YIjkWDas6CQBmQcYlcNGmKfQ/aHR0cDovL2V4YW1wbGUub3JnL2ltY" + } + ] + } + + iex> PhilomenaProxy.Scrapers.scrape!("http://example.org/nonexistent-path") + nil + + """ + @spec scrape!(url()) :: scrape_result() | nil + def scrape!(url) do + uri = URI.parse(url) + + @scrapers + |> Enum.find(& &1.can_handle?(uri, url)) + |> wrap() + |> Enum.map(& &1.scrape(uri, url)) + |> unwrap() + end + + defp wrap(nil), do: [] + defp wrap(res), do: [res] + + defp unwrap([result]), do: result + defp unwrap(_result), do: nil +end diff --git a/lib/philomena/scrapers/deviantart.ex b/lib/philomena_proxy/scrapers/deviantart.ex similarity index 89% rename from lib/philomena/scrapers/deviantart.ex rename to lib/philomena_proxy/scrapers/deviantart.ex index 4bc8fdd9..10985133 100644 --- a/lib/philomena/scrapers/deviantart.ex +++ b/lib/philomena_proxy/scrapers/deviantart.ex @@ -1,4 +1,11 @@ -defmodule Philomena.Scrapers.Deviantart do +defmodule PhilomenaProxy.Scrapers.Deviantart do + @moduledoc false + + alias PhilomenaProxy.Scrapers.Scraper + alias PhilomenaProxy.Scrapers + + @behaviour Scraper + @image_regex ~r|data-rh="true" rel="preload" href="([^"]*)" as="image"| @source_regex ~r|rel="canonical" href="([^"]*)"| @artist_regex ~r|https://www.deviantart.com/([^/]*)/art| @@ -7,7 +14,7 @@ defmodule Philomena.Scrapers.Deviantart do @png_regex ~r|(https://[0-9a-z\-\.]+(?:/intermediary)?/f/[0-9a-f\-]+/[0-9a-z\-]+\.png/v1/fill/[0-9a-z_,]+/[0-9a-z_\-]+)(\.png)(.*)| @jpg_regex ~r|(https://[0-9a-z\-\.]+(?:/intermediary)?/f/[0-9a-f\-]+/[0-9a-z\-]+\.jpg/v1/fill/w_[0-9]+,h_[0-9]+,q_)([0-9]+)(,[a-z]+\/[a-z0-6_\-]+\.jpe?g.*)| - @spec can_handle?(URI.t(), String.t()) :: true | false + @spec can_handle?(URI.t(), String.t()) :: boolean() def can_handle?(uri, _url) do String.ends_with?(uri.host, "deviantart.com") end @@ -21,6 +28,7 @@ defmodule Philomena.Scrapers.Deviantart do # # So, regex it is. Eat dirt, deviantart. You don't deserve the respect # artists give you. + @spec scrape(URI.t(), Scrapers.url()) :: Scrapers.scrape_result() def scrape(_uri, url) do url |> follow_redirect(2) @@ -38,10 +46,11 @@ defmodule Philomena.Scrapers.Deviantart do %{ source_url: source, author_name: artist, + description: "", images: [ %{ url: image, - camo_url: Camo.Image.image_url(image) + camo_url: PhilomenaProxy.Camo.image_url(image) } ] } @@ -51,7 +60,7 @@ defmodule Philomena.Scrapers.Deviantart do with [domain, object_uuid, object_name] <- Regex.run(@cdnint_regex, image.url, capture: :all_but_first), built_url <- "#{domain}/intermediary/f/#{object_uuid}/#{object_name}", - {:ok, %Tesla.Env{status: 200}} <- Philomena.Http.head(built_url) do + {:ok, %Tesla.Env{status: 200}} <- PhilomenaProxy.Http.head(built_url) do # This is the high resolution URL. %{ data @@ -110,7 +119,7 @@ defmodule Philomena.Scrapers.Deviantart do built_url = "http://orig01.deviantart.net/x_by_x-d#{base36}.png" - case Philomena.Http.get(built_url) do + case PhilomenaProxy.Http.get(built_url) do {:ok, %Tesla.Env{status: 301, headers: headers}} -> # Location header provides URL of high res image. {_location, link} = Enum.find(headers, fn {header, _val} -> header == "location" end) @@ -135,7 +144,7 @@ defmodule Philomena.Scrapers.Deviantart do defp follow_redirect(_url, 0), do: nil defp follow_redirect(url, max_times) do - case Philomena.Http.get(url) do + case PhilomenaProxy.Http.get(url) do {:ok, %Tesla.Env{headers: headers, status: code}} when code in [301, 302] -> location = Enum.find_value(headers, &location_header/1) follow_redirect(location, max_times - 1) diff --git a/lib/philomena/scrapers/pillowfort.ex b/lib/philomena_proxy/scrapers/pillowfort.ex similarity index 78% rename from lib/philomena/scrapers/pillowfort.ex rename to lib/philomena_proxy/scrapers/pillowfort.ex index b577c819..6e083c9c 100755 --- a/lib/philomena/scrapers/pillowfort.ex +++ b/lib/philomena_proxy/scrapers/pillowfort.ex @@ -1,4 +1,11 @@ -defmodule Philomena.Scrapers.Pillowfort do +defmodule PhilomenaProxy.Scrapers.Pillowfort do + @moduledoc false + + alias PhilomenaProxy.Scrapers.Scraper + alias PhilomenaProxy.Scrapers + + @behaviour Scraper + @url_regex ~r|\Ahttps?://www\.pillowfort\.social/posts/([0-9]+)| @spec can_handle?(URI.t(), String.t()) :: boolean() @@ -6,12 +13,13 @@ defmodule Philomena.Scrapers.Pillowfort do String.match?(url, @url_regex) end + @spec scrape(URI.t(), Scrapers.url()) :: Scrapers.scrape_result() def scrape(_uri, url) do [post_id] = Regex.run(@url_regex, url, capture: :all_but_first) api_url = "https://www.pillowfort.social/posts/#{post_id}/json" - Philomena.Http.get(api_url) + PhilomenaProxy.Http.get(api_url) |> json!() |> process_response!(url) end @@ -25,7 +33,7 @@ defmodule Philomena.Scrapers.Pillowfort do |> Enum.map( &%{ url: &1["url"], - camo_url: Camo.Image.image_url(&1["small_image_url"]) + camo_url: PhilomenaProxy.Camo.image_url(&1["small_image_url"]) } ) diff --git a/lib/philomena/scrapers/raw.ex b/lib/philomena_proxy/scrapers/raw.ex similarity index 56% rename from lib/philomena/scrapers/raw.ex rename to lib/philomena_proxy/scrapers/raw.ex index 0085f54c..ed31d10b 100644 --- a/lib/philomena/scrapers/raw.ex +++ b/lib/philomena_proxy/scrapers/raw.ex @@ -1,9 +1,16 @@ -defmodule Philomena.Scrapers.Raw do +defmodule PhilomenaProxy.Scrapers.Raw do + @moduledoc false + + alias PhilomenaProxy.Scrapers.Scraper + alias PhilomenaProxy.Scrapers + + @behaviour Scraper + @mime_types ["image/gif", "image/jpeg", "image/png", "image/svg", "image/svg+xml", "video/webm"] - @spec can_handle?(URI.t(), String.t()) :: true | false + @spec can_handle?(URI.t(), String.t()) :: boolean() def can_handle?(_uri, url) do - Philomena.Http.head(url) + PhilomenaProxy.Http.head(url) |> case do {:ok, %Tesla.Env{status: 200, headers: headers}} -> headers @@ -16,13 +23,16 @@ defmodule Philomena.Scrapers.Raw do end end + @spec scrape(URI.t(), Scrapers.url()) :: Scrapers.scrape_result() def scrape(_uri, url) do %{ source_url: url, + author_name: "", + description: "", images: [ %{ url: url, - camo_url: Camo.Image.image_url(url) + camo_url: PhilomenaProxy.Camo.image_url(url) } ] } diff --git a/lib/philomena_proxy/scrapers/scraper.ex b/lib/philomena_proxy/scrapers/scraper.ex new file mode 100644 index 00000000..15cedcea --- /dev/null +++ b/lib/philomena_proxy/scrapers/scraper.ex @@ -0,0 +1,11 @@ +defmodule PhilomenaProxy.Scrapers.Scraper do + @moduledoc false + + alias PhilomenaProxy.Scrapers + + # Return whether the given URL can be parsed by the scraper + @callback can_handle?(URI.t(), Scrapers.url()) :: boolean() + + # Collect upload information from the URL + @callback scrape(URI.t(), Scrapers.url()) :: Scrapers.scrape_result() +end diff --git a/lib/philomena/scrapers/tumblr.ex b/lib/philomena_proxy/scrapers/tumblr.ex similarity index 84% rename from lib/philomena/scrapers/tumblr.ex rename to lib/philomena_proxy/scrapers/tumblr.ex index 61ec1def..fe648e66 100644 --- a/lib/philomena/scrapers/tumblr.ex +++ b/lib/philomena_proxy/scrapers/tumblr.ex @@ -1,4 +1,11 @@ -defmodule Philomena.Scrapers.Tumblr do +defmodule PhilomenaProxy.Scrapers.Tumblr do + @moduledoc false + + alias PhilomenaProxy.Scrapers.Scraper + alias PhilomenaProxy.Scrapers + + @behaviour Scraper + @url_regex ~r|\Ahttps?://(?:.*)/(?:image\|post)/(\d+)(?:\z\|[/?#])| @media_regex ~r|https?://(?:\d+\.)?media\.tumblr\.com/[a-f\d]+/[a-f\d]+-[a-f\d]+/s\d+x\d+/[a-f\d]+\.(?:png\|jpe?g\|gif)|i @size_regex ~r|_(\d+)(\..+)\z| @@ -18,13 +25,14 @@ defmodule Philomena.Scrapers.Tumblr do String.match?(url, @url_regex) and tumblr_domain?(uri.host) end + @spec scrape(URI.t(), Scrapers.url()) :: Scrapers.scrape_result() def scrape(uri, url) do [post_id] = Regex.run(@url_regex, url, capture: :all_but_first) api_url = "https://api.tumblr.com/v2/blog/#{uri.host}/posts/photo?id=#{post_id}&api_key=#{tumblr_api_key()}" - Philomena.Http.get(api_url) + PhilomenaProxy.Http.get(api_url) |> json!() |> process_response!() end @@ -44,7 +52,7 @@ defmodule Philomena.Scrapers.Tumblr do %{"url" => preview} = Enum.find(photo["alt_sizes"], &(&1["width"] == 400)) || %{"url" => image} - %{url: image, camo_url: Camo.Image.image_url(preview)} + %{url: image, camo_url: PhilomenaProxy.Camo.image_url(preview)} end) add_meta(post, images) @@ -55,7 +63,7 @@ defmodule Philomena.Scrapers.Tumblr do @media_regex |> Regex.scan(post["body"]) |> Enum.map(fn [url | _captures] -> - %{url: url, camo_url: Camo.Image.image_url(url)} + %{url: url, camo_url: PhilomenaProxy.Camo.image_url(url)} end) add_meta(post, images) @@ -68,7 +76,7 @@ defmodule Philomena.Scrapers.Tumblr do end defp url_ok?(url) do - match?({:ok, %Tesla.Env{status: 200}}, Philomena.Http.head(url)) + match?({:ok, %Tesla.Env{status: 200}}, PhilomenaProxy.Http.head(url)) end defp add_meta(post, images) do diff --git a/lib/philomena/scrapers/twitter.ex b/lib/philomena_proxy/scrapers/twitter.ex similarity index 61% rename from lib/philomena/scrapers/twitter.ex rename to lib/philomena_proxy/scrapers/twitter.ex index 0ba64180..def1a374 100644 --- a/lib/philomena/scrapers/twitter.ex +++ b/lib/philomena_proxy/scrapers/twitter.ex @@ -1,16 +1,24 @@ -defmodule Philomena.Scrapers.Twitter do +defmodule PhilomenaProxy.Scrapers.Twitter do + @moduledoc false + + alias PhilomenaProxy.Scrapers.Scraper + alias PhilomenaProxy.Scrapers + + @behaviour Scraper + @url_regex ~r|\Ahttps?://(?:mobile\.)?(?:twitter\|x).com/([A-Za-z\d_]+)/status/([\d]+)/?| - @spec can_handle?(URI.t(), String.t()) :: true | false + @spec can_handle?(URI.t(), String.t()) :: boolean() def can_handle?(_uri, url) do String.match?(url, @url_regex) end + @spec scrape(URI.t(), Scrapers.url()) :: Scrapers.scrape_result() def scrape(_uri, url) do [user, status_id] = Regex.run(@url_regex, url, capture: :all_but_first) api_url = "https://api.fxtwitter.com/#{user}/status/#{status_id}" - {:ok, %Tesla.Env{status: 200, body: body}} = Philomena.Http.get(api_url) + {:ok, %Tesla.Env{status: 200, body: body}} = PhilomenaProxy.Http.get(api_url) json = Jason.decode!(body) tweet = json["tweet"] @@ -19,7 +27,7 @@ defmodule Philomena.Scrapers.Twitter do Enum.map(tweet["media"]["photos"], fn p -> %{ url: "#{p["url"]}:orig", - camo_url: Camo.Image.image_url(p["url"]) + camo_url: PhilomenaProxy.Camo.image_url(p["url"]) } end) diff --git a/lib/philomena_web/controllers/image/scrape_controller.ex b/lib/philomena_web/controllers/image/scrape_controller.ex index a46c8733..56e602e9 100644 --- a/lib/philomena_web/controllers/image/scrape_controller.ex +++ b/lib/philomena_web/controllers/image/scrape_controller.ex @@ -1,7 +1,7 @@ defmodule PhilomenaWeb.Image.ScrapeController do use PhilomenaWeb, :controller - alias Philomena.Scrapers + alias PhilomenaProxy.Scrapers def create(conn, params) do result = diff --git a/lib/philomena_web/plugs/check_captcha_plug.ex b/lib/philomena_web/plugs/check_captcha_plug.ex index d411d405..607309ad 100644 --- a/lib/philomena_web/plugs/check_captcha_plug.ex +++ b/lib/philomena_web/plugs/check_captcha_plug.ex @@ -31,7 +31,7 @@ defmodule PhilomenaWeb.CheckCaptchaPlug do defp valid_solution?(%{"h-captcha-response" => captcha_token}) do {:ok, %{body: body, status: 200}} = - Philomena.Http.post( + PhilomenaProxy.Http.post( "https://hcaptcha.com/siteverify", URI.encode_query(%{"response" => captcha_token, "secret" => hcaptcha_secret_key()}), [{"Content-Type", "application/x-www-form-urlencoded"}] diff --git a/lib/philomena_web/plugs/compromised_password_check_plug.ex b/lib/philomena_web/plugs/compromised_password_check_plug.ex index eaeecd2a..b46e597f 100644 --- a/lib/philomena_web/plugs/compromised_password_check_plug.ex +++ b/lib/philomena_web/plugs/compromised_password_check_plug.ex @@ -35,7 +35,7 @@ defmodule PhilomenaWeb.CompromisedPasswordCheckPlug do :crypto.hash(:sha, password) |> Base.encode16() - case Philomena.Http.get(make_api_url(prefix)) do + case PhilomenaProxy.Http.get(make_api_url(prefix)) do {:ok, %Tesla.Env{body: body, status: 200}} -> String.contains?(body, rest) _ -> false end diff --git a/lib/philomena_web/plugs/scraper_plug.ex b/lib/philomena_web/plugs/scraper_plug.ex index 1fcd0db1..2e4e1769 100644 --- a/lib/philomena_web/plugs/scraper_plug.ex +++ b/lib/philomena_web/plugs/scraper_plug.ex @@ -15,7 +15,7 @@ defmodule PhilomenaWeb.ScraperPlug do %{"scraper_cache" => url} when not is_nil(url) and url != "" -> url - |> Philomena.Http.get() + |> PhilomenaProxy.Http.get() |> maybe_fixup_params(url, opts, conn) _ -> diff --git a/lib/philomena_web/views/channel_view.ex b/lib/philomena_web/views/channel_view.ex index b4dc532e..acb4880a 100644 --- a/lib/philomena_web/views/channel_view.ex +++ b/lib/philomena_web/views/channel_view.ex @@ -4,20 +4,24 @@ defmodule PhilomenaWeb.ChannelView do def channel_image(%{type: "LivestreamChannel", short_name: short_name}) do now = DateTime.utc_now() |> DateTime.to_unix(:microsecond) - Camo.Image.image_url( + PhilomenaProxy.Camo.image_url( "https://thumbnail.api.livestream.com/thumbnail?name=#{short_name}&rand=#{now}" ) end def channel_image(%{type: "PicartoChannel", thumbnail_url: thumbnail_url}), - do: Camo.Image.image_url(thumbnail_url || "https://picarto.tv/images/missingthumb.jpg") + do: + PhilomenaProxy.Camo.image_url(thumbnail_url || "https://picarto.tv/images/missingthumb.jpg") def channel_image(%{type: "PiczelChannel", remote_stream_id: remote_stream_id}), - do: Camo.Image.image_url("https://piczel.tv/api/thumbnail/stream_#{remote_stream_id}.jpg") + do: + PhilomenaProxy.Camo.image_url( + "https://piczel.tv/api/thumbnail/stream_#{remote_stream_id}.jpg" + ) def channel_image(%{type: "TwitchChannel", short_name: short_name}), do: - Camo.Image.image_url( + PhilomenaProxy.Camo.image_url( "https://static-cdn.jtvnw.net/previews-ttv/live_user_#{String.downcase(short_name)}-320x180.jpg" ) end diff --git a/priv/repo/seeds_development.exs b/priv/repo/seeds_development.exs index 466b3111..cde0302a 100644 --- a/priv/repo/seeds_development.exs +++ b/priv/repo/seeds_development.exs @@ -52,7 +52,7 @@ for image_def <- resources["remote_images"] do now = DateTime.utc_now() |> DateTime.to_unix(:microsecond) IO.puts "Fetching #{image_def["url"]} ..." - {:ok, %{body: body}} = Philomena.Http.get(image_def["url"]) + {:ok, %{body: body}} = PhilomenaProxy.Http.get(image_def["url"]) File.write!(file, body)