mirror of
https://github.com/philomena-dev/philomena.git
synced 2025-01-19 22:27:59 +01:00
replace use of hackney in scraper with mint
This commit is contained in:
parent
68102bdddd
commit
7fca37741b
11 changed files with 58 additions and 55 deletions
|
@ -65,7 +65,7 @@ config :philomena, PhilomenaWeb.Endpoint,
|
|||
|
||||
# Do not include metadata nor timestamps in development logs
|
||||
config :logger, :console, format: "[$level] $message\n"
|
||||
config :logger, compile_time_purge_matching: [[application: :remote_ip]]
|
||||
config :logger, compile_time_purge_matching: [[application: :remote_ip], [application: :mint]]
|
||||
|
||||
# Set up mailer
|
||||
config :philomena, PhilomenaWeb.Mailer, adapter: Bamboo.LocalAdapter
|
||||
|
|
|
@ -1,31 +1,38 @@
|
|||
defmodule Philomena.Http do
|
||||
@user_agent [
|
||||
"User-Agent":
|
||||
"Mozilla/5.0 (X11; Philomena; Linux x86_64; rv:70.0) Gecko/20100101 Firefox/70.0"
|
||||
]
|
||||
|
||||
def get!(url, headers \\ [], options \\ []) do
|
||||
headers = Keyword.merge(@user_agent, headers) |> add_host(url)
|
||||
options = Keyword.merge(options, proxy: proxy_host(), ssl: [insecure: true])
|
||||
|
||||
HTTPoison.get!(url, headers, options)
|
||||
Tesla.get!(client(headers), url, opts: [adapter: adapter_opts(options)])
|
||||
end
|
||||
|
||||
def head!(url, headers \\ [], options \\ []) do
|
||||
headers = Keyword.merge(@user_agent, headers) |> add_host(url)
|
||||
options = Keyword.merge(options, proxy: proxy_host(), ssl: [insecure: true])
|
||||
|
||||
HTTPoison.head!(url, headers, options)
|
||||
Tesla.head!(client(headers), url, opts: [adapter: adapter_opts(options)])
|
||||
end
|
||||
|
||||
# Add host for caching proxies, since hackney doesn't do it for us
|
||||
defp add_host(headers, url) do
|
||||
%{host: host} = URI.parse(url)
|
||||
defp adapter_opts(opts) do
|
||||
opts = Keyword.merge(opts, max_body: 30_000_000)
|
||||
|
||||
Keyword.merge([Host: host, Connection: "close"], headers)
|
||||
case Application.get_env(:philomena, :proxy_host) do
|
||||
nil ->
|
||||
opts
|
||||
|
||||
url ->
|
||||
Keyword.merge(opts, proxy: proxy_opts(URI.parse(url)))
|
||||
end
|
||||
end
|
||||
|
||||
defp proxy_host do
|
||||
Application.get_env(:philomena, :proxy_host)
|
||||
defp proxy_opts(%{host: host, port: port, scheme: "https"}), do: {:https, host, port, []}
|
||||
defp proxy_opts(%{host: host, port: port, scheme: "http"}), do: {:http, host, port, []}
|
||||
|
||||
defp client(headers) do
|
||||
Tesla.client(
|
||||
[
|
||||
{Tesla.Middleware.Headers,
|
||||
[
|
||||
{"User-Agent",
|
||||
"Mozilla/5.0 (X11; Philomena; Linux x86_64; rv:70.0) Gecko/20100101 Firefox/76.0"}
|
||||
| headers
|
||||
]}
|
||||
],
|
||||
Tesla.Adapter.Mint
|
||||
)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -30,7 +30,7 @@ defmodule Philomena.Scrapers.Deviantart do
|
|||
|> try_old_hires!()
|
||||
end
|
||||
|
||||
defp extract_data!(%HTTPoison.Response{body: body, status_code: 200}) do
|
||||
defp extract_data!(%Tesla.Env{body: body, status: 200}) do
|
||||
[image] = Regex.run(@image_regex, body, capture: :all_but_first)
|
||||
[source] = Regex.run(@source_regex, body, capture: :all_but_first)
|
||||
[artist] = Regex.run(@artist_regex, source, capture: :all_but_first)
|
||||
|
@ -48,25 +48,21 @@ defmodule Philomena.Scrapers.Deviantart do
|
|||
end
|
||||
|
||||
defp try_intermediary_hires!(%{images: [image]} = data) do
|
||||
[domain, object_uuid, object_name] =
|
||||
Regex.run(@cdnint_regex, image.url, capture: :all_but_first)
|
||||
|
||||
built_url = "#{domain}/intermediary/f/#{object_uuid}/#{object_name}"
|
||||
|
||||
case Philomena.Http.head!(built_url) do
|
||||
%HTTPoison.Response{status_code: 200} ->
|
||||
# This is the high resolution URL.
|
||||
|
||||
%{
|
||||
data
|
||||
| images: [
|
||||
%{
|
||||
url: built_url,
|
||||
camo_url: image.camo_url
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
with [domain, object_uuid, object_name] <-
|
||||
Regex.run(@cdnint_regex, image.url, capture: :all_but_first),
|
||||
built_url <- "#{domain}/intermediary/f/#{object_uuid}/#{object_name}",
|
||||
%Tesla.Env{status: 200} <- Philomena.Http.head!(built_url) do
|
||||
# This is the high resolution URL.
|
||||
%{
|
||||
data
|
||||
| images: [
|
||||
%{
|
||||
url: built_url,
|
||||
camo_url: image.camo_url
|
||||
}
|
||||
]
|
||||
}
|
||||
else
|
||||
_ ->
|
||||
# Nothing to be found here, move along...
|
||||
data
|
||||
|
@ -115,9 +111,9 @@ defmodule Philomena.Scrapers.Deviantart do
|
|||
built_url = "http://orig01.deviantart.net/x_by_x-d#{base36}.png"
|
||||
|
||||
case Philomena.Http.get!(built_url) do
|
||||
%HTTPoison.Response{status_code: 301, headers: headers} ->
|
||||
%Tesla.Env{status: 301, headers: headers} ->
|
||||
# Location header provides URL of high res image.
|
||||
{_location, link} = Enum.find(headers, fn {header, _val} -> header == "Location" end)
|
||||
{_location, link} = Enum.find(headers, fn {header, _val} -> header == "location" end)
|
||||
|
||||
%{
|
||||
data
|
||||
|
@ -140,7 +136,7 @@ defmodule Philomena.Scrapers.Deviantart do
|
|||
|
||||
defp follow_redirect(url, max_times) do
|
||||
case Philomena.Http.get!(url) do
|
||||
%HTTPoison.Response{headers: headers, status_code: code} when code in [301, 302] ->
|
||||
%Tesla.Env{headers: headers, status: code} when code in [301, 302] ->
|
||||
location = Enum.find_value(headers, &location_header/1)
|
||||
follow_redirect(location, max_times - 1)
|
||||
|
||||
|
|
|
@ -3,9 +3,9 @@ defmodule Philomena.Scrapers.Raw do
|
|||
|
||||
@spec can_handle?(URI.t(), String.t()) :: true | false
|
||||
def can_handle?(_uri, url) do
|
||||
Philomena.Http.head!(url, [], max_body_length: 30_000_000)
|
||||
Philomena.Http.head!(url)
|
||||
|> case do
|
||||
%HTTPoison.Response{status_code: 200, headers: headers} ->
|
||||
%Tesla.Env{status: 200, headers: headers} ->
|
||||
headers
|
||||
|> Enum.any?(fn {k, v} ->
|
||||
String.downcase(k) == "content-type" and String.downcase(v) in @mime_types
|
||||
|
|
|
@ -31,7 +31,7 @@ defmodule Philomena.Scrapers.Tumblr do
|
|||
|> process_response!()
|
||||
end
|
||||
|
||||
defp json!(%HTTPoison.Response{body: body, status_code: 200}),
|
||||
defp json!(%Tesla.Env{body: body, status: 200}),
|
||||
do: Jason.decode!(body)
|
||||
|
||||
defp process_response!(%{"response" => %{"posts" => [post | _rest]}}),
|
||||
|
@ -70,7 +70,7 @@ defmodule Philomena.Scrapers.Tumblr do
|
|||
end
|
||||
|
||||
defp url_ok?(url) do
|
||||
match?(%HTTPoison.Response{status_code: 200}, Philomena.Http.head!(url))
|
||||
match?(%Tesla.Env{status: 200}, Philomena.Http.head!(url))
|
||||
end
|
||||
|
||||
defp add_meta(post, images) do
|
||||
|
|
|
@ -50,7 +50,7 @@ defmodule Philomena.Scrapers.Twitter do
|
|||
|> Map.get(:body)
|
||||
|> extract_guest_token_and_bearer()
|
||||
|
||||
Philomena.Http.get!(api_url, Authorization: "Bearer #{bearer}", "x-guest-token": gt)
|
||||
Philomena.Http.get!(api_url, [{"Authorization", "Bearer #{bearer}"}, {"x-guest-token", gt}])
|
||||
|> Map.get(:body)
|
||||
|> Jason.decode!()
|
||||
|> Map.get("globalObjects")
|
||||
|
|
|
@ -28,7 +28,7 @@ defmodule Philomena.Servers.PicartoChannelUpdater do
|
|||
run()
|
||||
end
|
||||
|
||||
defp handle_response(%HTTPoison.Response{body: body, status_code: 200}, now) do
|
||||
defp handle_response(%Tesla.Env{body: body, status: 200}, now) do
|
||||
resp =
|
||||
body
|
||||
|> Jason.decode!()
|
||||
|
|
|
@ -28,7 +28,7 @@ defmodule Philomena.Servers.PiczelChannelUpdater do
|
|||
run()
|
||||
end
|
||||
|
||||
defp handle_response(%HTTPoison.Response{body: body, status_code: 200}, now) do
|
||||
defp handle_response(%Tesla.Env{body: body, status: 200}, now) do
|
||||
resp =
|
||||
body
|
||||
|> Jason.decode!()
|
||||
|
|
|
@ -63,7 +63,7 @@ defmodule Philomena.Servers.UserLinkUpdater do
|
|||
|> handle_response(user_link)
|
||||
end
|
||||
|
||||
defp handle_response(%HTTPoison.Response{body: body, status_code: 200}, user_link) do
|
||||
defp handle_response(%Tesla.Env{body: body, status: 200}, user_link) do
|
||||
case :binary.match(body, user_link.verification_code) do
|
||||
:nomatch ->
|
||||
nil
|
||||
|
|
|
@ -32,8 +32,8 @@ defmodule PhilomenaWeb.CompromisedPasswordCheckPlug do
|
|||
:crypto.hash(:sha, password)
|
||||
|> Base.encode16()
|
||||
|
||||
case HTTPoison.get(make_api_url(prefix)) do
|
||||
{:ok, %HTTPoison.Response{body: body, status_code: 200}} -> String.contains?(body, rest)
|
||||
case Philomena.Http.get!(make_api_url(prefix)) do
|
||||
%Tesla.Env{body: body, status: 200} -> String.contains?(body, rest)
|
||||
_ -> false
|
||||
end
|
||||
end
|
||||
|
|
|
@ -10,7 +10,7 @@ defmodule PhilomenaWeb.ScraperPlug do
|
|||
conn
|
||||
|
||||
%{"scraper_cache" => url} when not is_nil(url) ->
|
||||
Philomena.Http.get!(url, [], max_body_length: 30_000_000)
|
||||
Philomena.Http.get!(url)
|
||||
|> maybe_fixup_params(opts, conn)
|
||||
|
||||
_ ->
|
||||
|
@ -18,7 +18,7 @@ defmodule PhilomenaWeb.ScraperPlug do
|
|||
end
|
||||
end
|
||||
|
||||
defp maybe_fixup_params(%HTTPoison.Response{body: body, status_code: 200}, opts, conn) do
|
||||
defp maybe_fixup_params(%Tesla.Env{body: body, status: 200}, opts, conn) do
|
||||
params_name = Keyword.get(opts, :params_name, "image")
|
||||
params_key = Keyword.get(opts, :params_key, "image")
|
||||
file = Briefly.create!()
|
||||
|
|
Loading…
Reference in a new issue