philomena/lib/philomena_web/plugs/scraper_plug.ex
2024-06-19 23:35:44 -04:00

66 lines
1.8 KiB
Elixir

defmodule PhilomenaWeb.ScraperPlug do
@filename_regex ~r/filename="([^"]+)"/
def init(opts) do
opts
end
def call(conn, opts) do
params_name = Keyword.get(opts, :params_name, "image")
params_key = Keyword.get(opts, :params_key, "image")
case conn.params do
%{^params_name => %{^params_key => %Plug.Upload{}}} ->
conn
%{"scraper_cache" => url} when not is_nil(url) and url != "" ->
url
|> PhilomenaProxy.Http.get()
|> maybe_fixup_params(url, opts, conn)
_ ->
conn
end
end
# Writing the tempfile doesn't allow traversal
# sobelow_skip ["Traversal.FileModule"]
defp maybe_fixup_params(
{:ok, %{body: body, status: 200, headers: headers}},
url,
opts,
conn
) do
params_name = Keyword.get(opts, :params_name, "image")
params_key = Keyword.get(opts, :params_key, "image")
name = extract_filename(url, headers)
file = Plug.Upload.random_file!(UUID.uuid1())
File.write!(file, body)
fake_upload = %Plug.Upload{
path: file,
content_type: "application/octet-stream",
filename: name
}
updated_form = Map.put(conn.params[params_name], params_key, fake_upload)
updated_params = Map.put(conn.params, params_name, updated_form)
%Plug.Conn{conn | params: updated_params}
end
defp maybe_fixup_params(_response, _url, _opts, conn), do: conn
defp extract_filename(url, resp_headers) do
{_, header} =
Enum.find(resp_headers, {nil, "filename=\"#{Path.basename(url)}\""}, fn {key, value} ->
key == "content-disposition" and Regex.match?(@filename_regex, value)
end)
[name] = Regex.run(@filename_regex, header, capture: :all_but_first)
String.slice(name, 0, 127)
end
end