Update response header usages for list format

This commit is contained in:
Liam 2024-06-20 19:22:22 -04:00
parent 44c160b905
commit a344062d53
3 changed files with 19 additions and 62 deletions

View file

@ -9,7 +9,6 @@ defmodule PhilomenaProxy.Scrapers.Deviantart do
@image_regex ~r|data-rh="true" rel="preload" href="([^"]*)" as="image"|
@source_regex ~r|rel="canonical" href="([^"]*)"|
@artist_regex ~r|https://www.deviantart.com/([^/]*)/art|
@serial_regex ~r|https://www.deviantart.com/(?:.*?)-(\d+)\z|
@cdnint_regex ~r|(https://images-wixmp-[0-9a-f]+.wixmp.com)(?:/intermediary)?/f/([^/]*)/([^/?]*)|
@png_regex ~r|(https://[0-9a-z\-\.]+(?:/intermediary)?/f/[0-9a-f\-]+/[0-9a-z\-]+\.png/v1/fill/[0-9a-z_,]+/[0-9a-z_\-]+)(\.png)(.*)|
@jpg_regex ~r|(https://[0-9a-z\-\.]+(?:/intermediary)?/f/[0-9a-f\-]+/[0-9a-z\-]+\.jpg/v1/fill/w_[0-9]+,h_[0-9]+,q_)([0-9]+)(,[a-z]+\/[a-z0-6_\-]+\.jpe?g.*)|
@ -35,7 +34,6 @@ defmodule PhilomenaProxy.Scrapers.Deviantart do
|> extract_data!()
|> try_intermediary_hires!()
|> try_new_hires!()
|> try_old_hires!()
end
defp extract_data!({:ok, %{body: body, status: 200}}) do
@ -107,36 +105,4 @@ defmodule PhilomenaProxy.Scrapers.Deviantart do
data
end
end
defp try_old_hires!(%{source_url: source, images: [image]} = data) do
[serial] = Regex.run(@serial_regex, source, capture: :all_but_first)
base36 =
serial
|> String.to_integer()
|> Integer.to_string(36)
|> String.downcase()
built_url = "http://orig01.deviantart.net/x_by_x-d#{base36}.png"
case PhilomenaProxy.Http.get(built_url) do
{:ok, %{status: 301, headers: headers}} ->
# Location header provides URL of high res image.
{_location, link} = Enum.find(headers, fn {header, _val} -> header == "location" end)
%{
data
| images: [
%{
url: link,
camo_url: image.camo_url
}
]
}
_ ->
# Nothing to be found here, move along...
data
end
end
end

View file

@ -10,14 +10,10 @@ defmodule PhilomenaProxy.Scrapers.Raw do
@spec can_handle?(URI.t(), String.t()) :: boolean()
def can_handle?(_uri, url) do
PhilomenaProxy.Http.head(url)
|> case do
{:ok, %{status: 200, headers: headers}} ->
headers
|> Enum.any?(fn {k, v} ->
String.downcase(k) == "content-type" and String.downcase(v) in @mime_types
end)
with {:ok, %{status: 200, headers: headers}} <- PhilomenaProxy.Http.head(url),
[type | _] <- headers["content-type"] do
String.downcase(type) in @mime_types
else
_ ->
false
end

View file

@ -1,10 +1,12 @@
defmodule PhilomenaWeb.ScraperPlug do
@filename_regex ~r/filename="([^"]+)"/
@spec init(keyword()) :: keyword()
def init(opts) do
opts
end
@spec call(Plug.Conn.t(), keyword()) :: Plug.Conn.t()
def call(conn, opts) do
params_name = Keyword.get(opts, :params_name, "image")
params_key = Keyword.get(opts, :params_key, "image")
@ -25,18 +27,13 @@ defmodule PhilomenaWeb.ScraperPlug do
# Writing the tempfile doesn't allow traversal
# sobelow_skip ["Traversal.FileModule"]
defp maybe_fixup_params(
{:ok, %{body: body, status: 200, headers: headers}},
url,
opts,
conn
) do
defp maybe_fixup_params({:ok, %{status: 200} = resp}, url, opts, conn) do
params_name = Keyword.get(opts, :params_name, "image")
params_key = Keyword.get(opts, :params_key, "image")
name = extract_filename(url, headers)
name = extract_filename(url, resp.headers)
file = Plug.Upload.random_file!(UUID.uuid1())
File.write!(file, body)
File.write!(file, resp.body)
fake_upload = %Plug.Upload{
path: file,
@ -44,22 +41,20 @@ defmodule PhilomenaWeb.ScraperPlug do
filename: name
}
updated_form = Map.put(conn.params[params_name], params_key, fake_upload)
updated_params = Map.put(conn.params, params_name, updated_form)
%Plug.Conn{conn | params: updated_params}
put_in(conn.params[params_name][params_key], fake_upload)
end
defp maybe_fixup_params(_response, _url, _opts, conn), do: conn
defp extract_filename(url, resp_headers) do
{_, header} =
Enum.find(resp_headers, {nil, "filename=\"#{Path.basename(url)}\""}, fn {key, value} ->
key == "content-disposition" and Regex.match?(@filename_regex, value)
end)
[name] = Regex.run(@filename_regex, header, capture: :all_but_first)
defp extract_filename(url, headers) do
name =
with [value | _] <- headers["content-disposition"],
[name] <- Regex.run(@filename_regex, value, capture: :all_but_first) do
name
else
_ ->
Path.basename(url)
end
String.slice(name, 0, 127)
end