diff --git a/lib/philomena_proxy/scrapers/deviantart.ex b/lib/philomena_proxy/scrapers/deviantart.ex index d292e8aa..cf8009d0 100644 --- a/lib/philomena_proxy/scrapers/deviantart.ex +++ b/lib/philomena_proxy/scrapers/deviantart.ex @@ -9,7 +9,6 @@ defmodule PhilomenaProxy.Scrapers.Deviantart do @image_regex ~r|data-rh="true" rel="preload" href="([^"]*)" as="image"| @source_regex ~r|rel="canonical" href="([^"]*)"| @artist_regex ~r|https://www.deviantart.com/([^/]*)/art| - @serial_regex ~r|https://www.deviantart.com/(?:.*?)-(\d+)\z| @cdnint_regex ~r|(https://images-wixmp-[0-9a-f]+.wixmp.com)(?:/intermediary)?/f/([^/]*)/([^/?]*)| @png_regex ~r|(https://[0-9a-z\-\.]+(?:/intermediary)?/f/[0-9a-f\-]+/[0-9a-z\-]+\.png/v1/fill/[0-9a-z_,]+/[0-9a-z_\-]+)(\.png)(.*)| @jpg_regex ~r|(https://[0-9a-z\-\.]+(?:/intermediary)?/f/[0-9a-f\-]+/[0-9a-z\-]+\.jpg/v1/fill/w_[0-9]+,h_[0-9]+,q_)([0-9]+)(,[a-z]+\/[a-z0-6_\-]+\.jpe?g.*)| @@ -35,7 +34,6 @@ defmodule PhilomenaProxy.Scrapers.Deviantart do |> extract_data!() |> try_intermediary_hires!() |> try_new_hires!() - |> try_old_hires!() end defp extract_data!({:ok, %{body: body, status: 200}}) do @@ -107,36 +105,4 @@ defmodule PhilomenaProxy.Scrapers.Deviantart do data end end - - defp try_old_hires!(%{source_url: source, images: [image]} = data) do - [serial] = Regex.run(@serial_regex, source, capture: :all_but_first) - - base36 = - serial - |> String.to_integer() - |> Integer.to_string(36) - |> String.downcase() - - built_url = "http://orig01.deviantart.net/x_by_x-d#{base36}.png" - - case PhilomenaProxy.Http.get(built_url) do - {:ok, %{status: 301, headers: headers}} -> - # Location header provides URL of high res image. - {_location, link} = Enum.find(headers, fn {header, _val} -> header == "location" end) - - %{ - data - | images: [ - %{ - url: link, - camo_url: image.camo_url - } - ] - } - - _ -> - # Nothing to be found here, move along... - data - end - end end diff --git a/lib/philomena_proxy/scrapers/raw.ex b/lib/philomena_proxy/scrapers/raw.ex index a8c08d97..a6985444 100644 --- a/lib/philomena_proxy/scrapers/raw.ex +++ b/lib/philomena_proxy/scrapers/raw.ex @@ -10,14 +10,10 @@ defmodule PhilomenaProxy.Scrapers.Raw do @spec can_handle?(URI.t(), String.t()) :: boolean() def can_handle?(_uri, url) do - PhilomenaProxy.Http.head(url) - |> case do - {:ok, %{status: 200, headers: headers}} -> - headers - |> Enum.any?(fn {k, v} -> - String.downcase(k) == "content-type" and String.downcase(v) in @mime_types - end) - + with {:ok, %{status: 200, headers: headers}} <- PhilomenaProxy.Http.head(url), + [type | _] <- headers["content-type"] do + String.downcase(type) in @mime_types + else _ -> false end diff --git a/lib/philomena_web/plugs/scraper_plug.ex b/lib/philomena_web/plugs/scraper_plug.ex index 4694d084..c8064d69 100644 --- a/lib/philomena_web/plugs/scraper_plug.ex +++ b/lib/philomena_web/plugs/scraper_plug.ex @@ -1,10 +1,12 @@ defmodule PhilomenaWeb.ScraperPlug do @filename_regex ~r/filename="([^"]+)"/ + @spec init(keyword()) :: keyword() def init(opts) do opts end + @spec call(Plug.Conn.t(), keyword()) :: Plug.Conn.t() def call(conn, opts) do params_name = Keyword.get(opts, :params_name, "image") params_key = Keyword.get(opts, :params_key, "image") @@ -25,18 +27,13 @@ defmodule PhilomenaWeb.ScraperPlug do # Writing the tempfile doesn't allow traversal # sobelow_skip ["Traversal.FileModule"] - defp maybe_fixup_params( - {:ok, %{body: body, status: 200, headers: headers}}, - url, - opts, - conn - ) do + defp maybe_fixup_params({:ok, %{status: 200} = resp}, url, opts, conn) do params_name = Keyword.get(opts, :params_name, "image") params_key = Keyword.get(opts, :params_key, "image") - name = extract_filename(url, headers) + name = extract_filename(url, resp.headers) file = Plug.Upload.random_file!(UUID.uuid1()) - File.write!(file, body) + File.write!(file, resp.body) fake_upload = %Plug.Upload{ path: file, @@ -44,22 +41,20 @@ defmodule PhilomenaWeb.ScraperPlug do filename: name } - updated_form = Map.put(conn.params[params_name], params_key, fake_upload) - - updated_params = Map.put(conn.params, params_name, updated_form) - - %Plug.Conn{conn | params: updated_params} + put_in(conn.params[params_name][params_key], fake_upload) end defp maybe_fixup_params(_response, _url, _opts, conn), do: conn - defp extract_filename(url, resp_headers) do - {_, header} = - Enum.find(resp_headers, {nil, "filename=\"#{Path.basename(url)}\""}, fn {key, value} -> - key == "content-disposition" and Regex.match?(@filename_regex, value) - end) - - [name] = Regex.run(@filename_regex, header, capture: :all_but_first) + defp extract_filename(url, headers) do + name = + with [value | _] <- headers["content-disposition"], + [name] <- Regex.run(@filename_regex, value, capture: :all_but_first) do + name + else + _ -> + Path.basename(url) + end String.slice(name, 0, 127) end