philomena/lib/philomena_web/plugs/scraper_plug.ex

62 lines
1.6 KiB
Elixir
Raw Permalink Normal View History

2019-11-29 00:19:47 +01:00
defmodule PhilomenaWeb.ScraperPlug do
@filename_regex ~r/filename="([^"]+)"/
@spec init(keyword()) :: keyword()
2020-08-08 02:23:36 +02:00
def init(opts) do
opts
end
2019-11-29 00:19:47 +01:00
@spec call(Plug.Conn.t(), keyword()) :: Plug.Conn.t()
2019-11-29 00:19:47 +01:00
def call(conn, opts) do
2019-12-19 00:51:02 +01:00
params_name = Keyword.get(opts, :params_name, "image")
params_key = Keyword.get(opts, :params_key, "image")
2019-11-29 00:19:47 +01:00
case conn.params do
2019-12-19 00:51:02 +01:00
%{^params_name => %{^params_key => %Plug.Upload{}}} ->
conn
%{"scraper_cache" => url} when not is_nil(url) and url != "" ->
2020-08-08 02:23:36 +02:00
url
|> PhilomenaProxy.Http.get()
|> maybe_fixup_params(url, opts, conn)
2019-11-29 00:19:47 +01:00
_ ->
conn
end
end
2021-04-01 18:49:41 +02:00
# Writing the tempfile doesn't allow traversal
# sobelow_skip ["Traversal.FileModule"]
defp maybe_fixup_params({:ok, %{status: 200} = resp}, url, opts, conn) do
2019-11-29 00:19:47 +01:00
params_name = Keyword.get(opts, :params_name, "image")
params_key = Keyword.get(opts, :params_key, "image")
name = extract_filename(url, resp.headers)
2022-07-18 16:13:24 +02:00
file = Plug.Upload.random_file!(UUID.uuid1())
2019-11-29 00:19:47 +01:00
File.write!(file, resp.body)
2019-11-29 00:19:47 +01:00
2020-01-11 05:20:19 +01:00
fake_upload = %Plug.Upload{
path: file,
content_type: "application/octet-stream",
filename: name
2020-01-11 05:20:19 +01:00
}
2019-11-29 00:19:47 +01:00
put_in(conn.params[params_name][params_key], fake_upload)
2019-11-29 00:19:47 +01:00
end
2020-01-11 05:20:19 +01:00
defp maybe_fixup_params(_response, _url, _opts, conn), do: conn
defp extract_filename(url, headers) do
name =
with [value | _] <- headers["content-disposition"],
[name] <- Regex.run(@filename_regex, value, capture: :all_but_first) do
name
else
_ ->
Path.basename(url)
end
String.slice(name, 0, 127)
end
2019-12-19 00:51:02 +01:00
end