mirror of
https://github.com/philomena-dev/philomena.git
synced 2024-11-23 20:18:00 +01:00
Update response header usages for list format
This commit is contained in:
parent
44c160b905
commit
a344062d53
3 changed files with 19 additions and 62 deletions
|
@ -9,7 +9,6 @@ defmodule PhilomenaProxy.Scrapers.Deviantart do
|
|||
@image_regex ~r|data-rh="true" rel="preload" href="([^"]*)" as="image"|
|
||||
@source_regex ~r|rel="canonical" href="([^"]*)"|
|
||||
@artist_regex ~r|https://www.deviantart.com/([^/]*)/art|
|
||||
@serial_regex ~r|https://www.deviantart.com/(?:.*?)-(\d+)\z|
|
||||
@cdnint_regex ~r|(https://images-wixmp-[0-9a-f]+.wixmp.com)(?:/intermediary)?/f/([^/]*)/([^/?]*)|
|
||||
@png_regex ~r|(https://[0-9a-z\-\.]+(?:/intermediary)?/f/[0-9a-f\-]+/[0-9a-z\-]+\.png/v1/fill/[0-9a-z_,]+/[0-9a-z_\-]+)(\.png)(.*)|
|
||||
@jpg_regex ~r|(https://[0-9a-z\-\.]+(?:/intermediary)?/f/[0-9a-f\-]+/[0-9a-z\-]+\.jpg/v1/fill/w_[0-9]+,h_[0-9]+,q_)([0-9]+)(,[a-z]+\/[a-z0-6_\-]+\.jpe?g.*)|
|
||||
|
@ -35,7 +34,6 @@ defmodule PhilomenaProxy.Scrapers.Deviantart do
|
|||
|> extract_data!()
|
||||
|> try_intermediary_hires!()
|
||||
|> try_new_hires!()
|
||||
|> try_old_hires!()
|
||||
end
|
||||
|
||||
defp extract_data!({:ok, %{body: body, status: 200}}) do
|
||||
|
@ -107,36 +105,4 @@ defmodule PhilomenaProxy.Scrapers.Deviantart do
|
|||
data
|
||||
end
|
||||
end
|
||||
|
||||
defp try_old_hires!(%{source_url: source, images: [image]} = data) do
|
||||
[serial] = Regex.run(@serial_regex, source, capture: :all_but_first)
|
||||
|
||||
base36 =
|
||||
serial
|
||||
|> String.to_integer()
|
||||
|> Integer.to_string(36)
|
||||
|> String.downcase()
|
||||
|
||||
built_url = "http://orig01.deviantart.net/x_by_x-d#{base36}.png"
|
||||
|
||||
case PhilomenaProxy.Http.get(built_url) do
|
||||
{:ok, %{status: 301, headers: headers}} ->
|
||||
# Location header provides URL of high res image.
|
||||
{_location, link} = Enum.find(headers, fn {header, _val} -> header == "location" end)
|
||||
|
||||
%{
|
||||
data
|
||||
| images: [
|
||||
%{
|
||||
url: link,
|
||||
camo_url: image.camo_url
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
_ ->
|
||||
# Nothing to be found here, move along...
|
||||
data
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -10,14 +10,10 @@ defmodule PhilomenaProxy.Scrapers.Raw do
|
|||
|
||||
@spec can_handle?(URI.t(), String.t()) :: boolean()
|
||||
def can_handle?(_uri, url) do
|
||||
PhilomenaProxy.Http.head(url)
|
||||
|> case do
|
||||
{:ok, %{status: 200, headers: headers}} ->
|
||||
headers
|
||||
|> Enum.any?(fn {k, v} ->
|
||||
String.downcase(k) == "content-type" and String.downcase(v) in @mime_types
|
||||
end)
|
||||
|
||||
with {:ok, %{status: 200, headers: headers}} <- PhilomenaProxy.Http.head(url),
|
||||
[type | _] <- headers["content-type"] do
|
||||
String.downcase(type) in @mime_types
|
||||
else
|
||||
_ ->
|
||||
false
|
||||
end
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
defmodule PhilomenaWeb.ScraperPlug do
|
||||
@filename_regex ~r/filename="([^"]+)"/
|
||||
|
||||
@spec init(keyword()) :: keyword()
|
||||
def init(opts) do
|
||||
opts
|
||||
end
|
||||
|
||||
@spec call(Plug.Conn.t(), keyword()) :: Plug.Conn.t()
|
||||
def call(conn, opts) do
|
||||
params_name = Keyword.get(opts, :params_name, "image")
|
||||
params_key = Keyword.get(opts, :params_key, "image")
|
||||
|
@ -25,18 +27,13 @@ defmodule PhilomenaWeb.ScraperPlug do
|
|||
|
||||
# Writing the tempfile doesn't allow traversal
|
||||
# sobelow_skip ["Traversal.FileModule"]
|
||||
defp maybe_fixup_params(
|
||||
{:ok, %{body: body, status: 200, headers: headers}},
|
||||
url,
|
||||
opts,
|
||||
conn
|
||||
) do
|
||||
defp maybe_fixup_params({:ok, %{status: 200} = resp}, url, opts, conn) do
|
||||
params_name = Keyword.get(opts, :params_name, "image")
|
||||
params_key = Keyword.get(opts, :params_key, "image")
|
||||
name = extract_filename(url, headers)
|
||||
name = extract_filename(url, resp.headers)
|
||||
file = Plug.Upload.random_file!(UUID.uuid1())
|
||||
|
||||
File.write!(file, body)
|
||||
File.write!(file, resp.body)
|
||||
|
||||
fake_upload = %Plug.Upload{
|
||||
path: file,
|
||||
|
@ -44,22 +41,20 @@ defmodule PhilomenaWeb.ScraperPlug do
|
|||
filename: name
|
||||
}
|
||||
|
||||
updated_form = Map.put(conn.params[params_name], params_key, fake_upload)
|
||||
|
||||
updated_params = Map.put(conn.params, params_name, updated_form)
|
||||
|
||||
%Plug.Conn{conn | params: updated_params}
|
||||
put_in(conn.params[params_name][params_key], fake_upload)
|
||||
end
|
||||
|
||||
defp maybe_fixup_params(_response, _url, _opts, conn), do: conn
|
||||
|
||||
defp extract_filename(url, resp_headers) do
|
||||
{_, header} =
|
||||
Enum.find(resp_headers, {nil, "filename=\"#{Path.basename(url)}\""}, fn {key, value} ->
|
||||
key == "content-disposition" and Regex.match?(@filename_regex, value)
|
||||
end)
|
||||
|
||||
[name] = Regex.run(@filename_regex, header, capture: :all_but_first)
|
||||
defp extract_filename(url, headers) do
|
||||
name =
|
||||
with [value | _] <- headers["content-disposition"],
|
||||
[name] <- Regex.run(@filename_regex, value, capture: :all_but_first) do
|
||||
name
|
||||
else
|
||||
_ ->
|
||||
Path.basename(url)
|
||||
end
|
||||
|
||||
String.slice(name, 0, 127)
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue