mirror of
https://github.com/philomena-dev/philomena.git
synced 2024-11-23 20:18:00 +01:00
Update response header usages for list format
This commit is contained in:
parent
44c160b905
commit
a344062d53
3 changed files with 19 additions and 62 deletions
|
@ -9,7 +9,6 @@ defmodule PhilomenaProxy.Scrapers.Deviantart do
|
||||||
@image_regex ~r|data-rh="true" rel="preload" href="([^"]*)" as="image"|
|
@image_regex ~r|data-rh="true" rel="preload" href="([^"]*)" as="image"|
|
||||||
@source_regex ~r|rel="canonical" href="([^"]*)"|
|
@source_regex ~r|rel="canonical" href="([^"]*)"|
|
||||||
@artist_regex ~r|https://www.deviantart.com/([^/]*)/art|
|
@artist_regex ~r|https://www.deviantart.com/([^/]*)/art|
|
||||||
@serial_regex ~r|https://www.deviantart.com/(?:.*?)-(\d+)\z|
|
|
||||||
@cdnint_regex ~r|(https://images-wixmp-[0-9a-f]+.wixmp.com)(?:/intermediary)?/f/([^/]*)/([^/?]*)|
|
@cdnint_regex ~r|(https://images-wixmp-[0-9a-f]+.wixmp.com)(?:/intermediary)?/f/([^/]*)/([^/?]*)|
|
||||||
@png_regex ~r|(https://[0-9a-z\-\.]+(?:/intermediary)?/f/[0-9a-f\-]+/[0-9a-z\-]+\.png/v1/fill/[0-9a-z_,]+/[0-9a-z_\-]+)(\.png)(.*)|
|
@png_regex ~r|(https://[0-9a-z\-\.]+(?:/intermediary)?/f/[0-9a-f\-]+/[0-9a-z\-]+\.png/v1/fill/[0-9a-z_,]+/[0-9a-z_\-]+)(\.png)(.*)|
|
||||||
@jpg_regex ~r|(https://[0-9a-z\-\.]+(?:/intermediary)?/f/[0-9a-f\-]+/[0-9a-z\-]+\.jpg/v1/fill/w_[0-9]+,h_[0-9]+,q_)([0-9]+)(,[a-z]+\/[a-z0-6_\-]+\.jpe?g.*)|
|
@jpg_regex ~r|(https://[0-9a-z\-\.]+(?:/intermediary)?/f/[0-9a-f\-]+/[0-9a-z\-]+\.jpg/v1/fill/w_[0-9]+,h_[0-9]+,q_)([0-9]+)(,[a-z]+\/[a-z0-6_\-]+\.jpe?g.*)|
|
||||||
|
@ -35,7 +34,6 @@ defmodule PhilomenaProxy.Scrapers.Deviantart do
|
||||||
|> extract_data!()
|
|> extract_data!()
|
||||||
|> try_intermediary_hires!()
|
|> try_intermediary_hires!()
|
||||||
|> try_new_hires!()
|
|> try_new_hires!()
|
||||||
|> try_old_hires!()
|
|
||||||
end
|
end
|
||||||
|
|
||||||
defp extract_data!({:ok, %{body: body, status: 200}}) do
|
defp extract_data!({:ok, %{body: body, status: 200}}) do
|
||||||
|
@ -107,36 +105,4 @@ defmodule PhilomenaProxy.Scrapers.Deviantart do
|
||||||
data
|
data
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
defp try_old_hires!(%{source_url: source, images: [image]} = data) do
|
|
||||||
[serial] = Regex.run(@serial_regex, source, capture: :all_but_first)
|
|
||||||
|
|
||||||
base36 =
|
|
||||||
serial
|
|
||||||
|> String.to_integer()
|
|
||||||
|> Integer.to_string(36)
|
|
||||||
|> String.downcase()
|
|
||||||
|
|
||||||
built_url = "http://orig01.deviantart.net/x_by_x-d#{base36}.png"
|
|
||||||
|
|
||||||
case PhilomenaProxy.Http.get(built_url) do
|
|
||||||
{:ok, %{status: 301, headers: headers}} ->
|
|
||||||
# Location header provides URL of high res image.
|
|
||||||
{_location, link} = Enum.find(headers, fn {header, _val} -> header == "location" end)
|
|
||||||
|
|
||||||
%{
|
|
||||||
data
|
|
||||||
| images: [
|
|
||||||
%{
|
|
||||||
url: link,
|
|
||||||
camo_url: image.camo_url
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
_ ->
|
|
||||||
# Nothing to be found here, move along...
|
|
||||||
data
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
|
@ -10,14 +10,10 @@ defmodule PhilomenaProxy.Scrapers.Raw do
|
||||||
|
|
||||||
@spec can_handle?(URI.t(), String.t()) :: boolean()
|
@spec can_handle?(URI.t(), String.t()) :: boolean()
|
||||||
def can_handle?(_uri, url) do
|
def can_handle?(_uri, url) do
|
||||||
PhilomenaProxy.Http.head(url)
|
with {:ok, %{status: 200, headers: headers}} <- PhilomenaProxy.Http.head(url),
|
||||||
|> case do
|
[type | _] <- headers["content-type"] do
|
||||||
{:ok, %{status: 200, headers: headers}} ->
|
String.downcase(type) in @mime_types
|
||||||
headers
|
else
|
||||||
|> Enum.any?(fn {k, v} ->
|
|
||||||
String.downcase(k) == "content-type" and String.downcase(v) in @mime_types
|
|
||||||
end)
|
|
||||||
|
|
||||||
_ ->
|
_ ->
|
||||||
false
|
false
|
||||||
end
|
end
|
||||||
|
|
|
@ -1,10 +1,12 @@
|
||||||
defmodule PhilomenaWeb.ScraperPlug do
|
defmodule PhilomenaWeb.ScraperPlug do
|
||||||
@filename_regex ~r/filename="([^"]+)"/
|
@filename_regex ~r/filename="([^"]+)"/
|
||||||
|
|
||||||
|
@spec init(keyword()) :: keyword()
|
||||||
def init(opts) do
|
def init(opts) do
|
||||||
opts
|
opts
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@spec call(Plug.Conn.t(), keyword()) :: Plug.Conn.t()
|
||||||
def call(conn, opts) do
|
def call(conn, opts) do
|
||||||
params_name = Keyword.get(opts, :params_name, "image")
|
params_name = Keyword.get(opts, :params_name, "image")
|
||||||
params_key = Keyword.get(opts, :params_key, "image")
|
params_key = Keyword.get(opts, :params_key, "image")
|
||||||
|
@ -25,18 +27,13 @@ defmodule PhilomenaWeb.ScraperPlug do
|
||||||
|
|
||||||
# Writing the tempfile doesn't allow traversal
|
# Writing the tempfile doesn't allow traversal
|
||||||
# sobelow_skip ["Traversal.FileModule"]
|
# sobelow_skip ["Traversal.FileModule"]
|
||||||
defp maybe_fixup_params(
|
defp maybe_fixup_params({:ok, %{status: 200} = resp}, url, opts, conn) do
|
||||||
{:ok, %{body: body, status: 200, headers: headers}},
|
|
||||||
url,
|
|
||||||
opts,
|
|
||||||
conn
|
|
||||||
) do
|
|
||||||
params_name = Keyword.get(opts, :params_name, "image")
|
params_name = Keyword.get(opts, :params_name, "image")
|
||||||
params_key = Keyword.get(opts, :params_key, "image")
|
params_key = Keyword.get(opts, :params_key, "image")
|
||||||
name = extract_filename(url, headers)
|
name = extract_filename(url, resp.headers)
|
||||||
file = Plug.Upload.random_file!(UUID.uuid1())
|
file = Plug.Upload.random_file!(UUID.uuid1())
|
||||||
|
|
||||||
File.write!(file, body)
|
File.write!(file, resp.body)
|
||||||
|
|
||||||
fake_upload = %Plug.Upload{
|
fake_upload = %Plug.Upload{
|
||||||
path: file,
|
path: file,
|
||||||
|
@ -44,22 +41,20 @@ defmodule PhilomenaWeb.ScraperPlug do
|
||||||
filename: name
|
filename: name
|
||||||
}
|
}
|
||||||
|
|
||||||
updated_form = Map.put(conn.params[params_name], params_key, fake_upload)
|
put_in(conn.params[params_name][params_key], fake_upload)
|
||||||
|
|
||||||
updated_params = Map.put(conn.params, params_name, updated_form)
|
|
||||||
|
|
||||||
%Plug.Conn{conn | params: updated_params}
|
|
||||||
end
|
end
|
||||||
|
|
||||||
defp maybe_fixup_params(_response, _url, _opts, conn), do: conn
|
defp maybe_fixup_params(_response, _url, _opts, conn), do: conn
|
||||||
|
|
||||||
defp extract_filename(url, resp_headers) do
|
defp extract_filename(url, headers) do
|
||||||
{_, header} =
|
name =
|
||||||
Enum.find(resp_headers, {nil, "filename=\"#{Path.basename(url)}\""}, fn {key, value} ->
|
with [value | _] <- headers["content-disposition"],
|
||||||
key == "content-disposition" and Regex.match?(@filename_regex, value)
|
[name] <- Regex.run(@filename_regex, value, capture: :all_but_first) do
|
||||||
end)
|
name
|
||||||
|
else
|
||||||
[name] = Regex.run(@filename_regex, header, capture: :all_but_first)
|
_ ->
|
||||||
|
Path.basename(url)
|
||||||
|
end
|
||||||
|
|
||||||
String.slice(name, 0, 127)
|
String.slice(name, 0, 127)
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue