2019-11-29 00:19:47 +01:00
|
|
|
defmodule PhilomenaWeb.ScraperPlug do
|
2020-12-06 18:40:38 +01:00
|
|
|
@filename_regex ~r/filename="([^"]+)"/
|
|
|
|
|
2020-08-08 02:23:36 +02:00
|
|
|
def init(opts) do
|
|
|
|
opts
|
|
|
|
end
|
2019-11-29 00:19:47 +01:00
|
|
|
|
|
|
|
def call(conn, opts) do
|
2019-12-19 00:51:02 +01:00
|
|
|
params_name = Keyword.get(opts, :params_name, "image")
|
|
|
|
params_key = Keyword.get(opts, :params_key, "image")
|
|
|
|
|
2019-11-29 00:19:47 +01:00
|
|
|
case conn.params do
|
2019-12-19 00:51:02 +01:00
|
|
|
%{^params_name => %{^params_key => %Plug.Upload{}}} ->
|
|
|
|
conn
|
|
|
|
|
2020-12-08 21:12:31 +01:00
|
|
|
%{"scraper_cache" => url} when not is_nil(url) and url != "" ->
|
2020-08-08 02:23:36 +02:00
|
|
|
url
|
2020-09-10 05:12:54 +02:00
|
|
|
|> Philomena.Http.get()
|
2020-12-06 18:40:38 +01:00
|
|
|
|> maybe_fixup_params(url, opts, conn)
|
2019-11-29 00:19:47 +01:00
|
|
|
|
|
|
|
_ ->
|
|
|
|
conn
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2021-04-01 18:49:41 +02:00
|
|
|
# Writing the tempfile doesn't allow traversal
|
|
|
|
# sobelow_skip ["Traversal.FileModule"]
|
2020-12-06 18:40:38 +01:00
|
|
|
defp maybe_fixup_params(
|
|
|
|
{:ok, %Tesla.Env{body: body, status: 200, headers: headers}},
|
|
|
|
url,
|
|
|
|
opts,
|
|
|
|
conn
|
|
|
|
) do
|
2019-11-29 00:19:47 +01:00
|
|
|
params_name = Keyword.get(opts, :params_name, "image")
|
|
|
|
params_key = Keyword.get(opts, :params_key, "image")
|
2020-12-06 18:40:38 +01:00
|
|
|
name = extract_filename(url, headers)
|
2022-07-18 16:13:24 +02:00
|
|
|
file = Plug.Upload.random_file!(UUID.uuid1())
|
2019-11-29 00:19:47 +01:00
|
|
|
|
|
|
|
File.write!(file, body)
|
|
|
|
|
2020-01-11 05:20:19 +01:00
|
|
|
fake_upload = %Plug.Upload{
|
|
|
|
path: file,
|
|
|
|
content_type: "application/octet-stream",
|
2020-12-06 18:40:38 +01:00
|
|
|
filename: name
|
2020-01-11 05:20:19 +01:00
|
|
|
}
|
2019-11-29 00:19:47 +01:00
|
|
|
|
2020-01-11 05:20:19 +01:00
|
|
|
updated_form = Map.put(conn.params[params_name], params_key, fake_upload)
|
2019-11-29 00:19:47 +01:00
|
|
|
|
2020-01-11 05:20:19 +01:00
|
|
|
updated_params = Map.put(conn.params, params_name, updated_form)
|
2019-11-29 00:19:47 +01:00
|
|
|
|
2020-08-08 02:23:36 +02:00
|
|
|
%Plug.Conn{conn | params: updated_params}
|
2019-11-29 00:19:47 +01:00
|
|
|
end
|
2020-01-11 05:20:19 +01:00
|
|
|
|
2020-12-06 18:40:38 +01:00
|
|
|
defp maybe_fixup_params(_response, _url, _opts, conn), do: conn
|
|
|
|
|
|
|
|
defp extract_filename(url, resp_headers) do
|
|
|
|
{_, header} =
|
|
|
|
Enum.find(resp_headers, {nil, "filename=\"#{Path.basename(url)}\""}, fn {key, value} ->
|
|
|
|
key == "content-disposition" and Regex.match?(@filename_regex, value)
|
|
|
|
end)
|
|
|
|
|
|
|
|
[name] = Regex.run(@filename_regex, header, capture: :all_but_first)
|
|
|
|
|
|
|
|
String.slice(name, 0, 127)
|
|
|
|
end
|
2019-12-19 00:51:02 +01:00
|
|
|
end
|