mirror of
https://github.com/philomena-dev/philomena.git
synced 2024-11-23 20:18:00 +01:00
require http clients to handle errors
This commit is contained in:
parent
11276b2339
commit
39ce0ae4fd
11 changed files with 33 additions and 30 deletions
|
@ -1,10 +1,10 @@
|
||||||
defmodule Philomena.Http do
|
defmodule Philomena.Http do
|
||||||
def get!(url, headers \\ [], options \\ []) do
|
def get(url, headers \\ [], options \\ []) do
|
||||||
Tesla.get!(client(headers), url, opts: [adapter: adapter_opts(options)])
|
Tesla.get(client(headers), url, opts: [adapter: adapter_opts(options)])
|
||||||
end
|
end
|
||||||
|
|
||||||
def head!(url, headers \\ [], options \\ []) do
|
def head(url, headers \\ [], options \\ []) do
|
||||||
Tesla.head!(client(headers), url, opts: [adapter: adapter_opts(options)])
|
Tesla.head(client(headers), url, opts: [adapter: adapter_opts(options)])
|
||||||
end
|
end
|
||||||
|
|
||||||
defp adapter_opts(opts) do
|
defp adapter_opts(opts) do
|
||||||
|
|
|
@ -30,7 +30,7 @@ defmodule Philomena.Scrapers.Deviantart do
|
||||||
|> try_old_hires!()
|
|> try_old_hires!()
|
||||||
end
|
end
|
||||||
|
|
||||||
defp extract_data!(%Tesla.Env{body: body, status: 200}) do
|
defp extract_data!({:ok, %Tesla.Env{body: body, status: 200}}) do
|
||||||
[image] = Regex.run(@image_regex, body, capture: :all_but_first)
|
[image] = Regex.run(@image_regex, body, capture: :all_but_first)
|
||||||
[source] = Regex.run(@source_regex, body, capture: :all_but_first)
|
[source] = Regex.run(@source_regex, body, capture: :all_but_first)
|
||||||
[artist] = Regex.run(@artist_regex, source, capture: :all_but_first)
|
[artist] = Regex.run(@artist_regex, source, capture: :all_but_first)
|
||||||
|
@ -51,7 +51,7 @@ defmodule Philomena.Scrapers.Deviantart do
|
||||||
with [domain, object_uuid, object_name] <-
|
with [domain, object_uuid, object_name] <-
|
||||||
Regex.run(@cdnint_regex, image.url, capture: :all_but_first),
|
Regex.run(@cdnint_regex, image.url, capture: :all_but_first),
|
||||||
built_url <- "#{domain}/intermediary/f/#{object_uuid}/#{object_name}",
|
built_url <- "#{domain}/intermediary/f/#{object_uuid}/#{object_name}",
|
||||||
%Tesla.Env{status: 200} <- Philomena.Http.head!(built_url) do
|
{:ok, %Tesla.Env{status: 200}} <- Philomena.Http.head(built_url) do
|
||||||
# This is the high resolution URL.
|
# This is the high resolution URL.
|
||||||
%{
|
%{
|
||||||
data
|
data
|
||||||
|
@ -110,8 +110,8 @@ defmodule Philomena.Scrapers.Deviantart do
|
||||||
|
|
||||||
built_url = "http://orig01.deviantart.net/x_by_x-d#{base36}.png"
|
built_url = "http://orig01.deviantart.net/x_by_x-d#{base36}.png"
|
||||||
|
|
||||||
case Philomena.Http.get!(built_url) do
|
case Philomena.Http.get(built_url) do
|
||||||
%Tesla.Env{status: 301, headers: headers} ->
|
{:ok, %Tesla.Env{status: 301, headers: headers}} ->
|
||||||
# Location header provides URL of high res image.
|
# Location header provides URL of high res image.
|
||||||
{_location, link} = Enum.find(headers, fn {header, _val} -> header == "location" end)
|
{_location, link} = Enum.find(headers, fn {header, _val} -> header == "location" end)
|
||||||
|
|
||||||
|
@ -135,8 +135,8 @@ defmodule Philomena.Scrapers.Deviantart do
|
||||||
defp follow_redirect(_url, 0), do: nil
|
defp follow_redirect(_url, 0), do: nil
|
||||||
|
|
||||||
defp follow_redirect(url, max_times) do
|
defp follow_redirect(url, max_times) do
|
||||||
case Philomena.Http.get!(url) do
|
case Philomena.Http.get(url) do
|
||||||
%Tesla.Env{headers: headers, status: code} when code in [301, 302] ->
|
{:ok, %Tesla.Env{headers: headers, status: code}} when code in [301, 302] ->
|
||||||
location = Enum.find_value(headers, &location_header/1)
|
location = Enum.find_value(headers, &location_header/1)
|
||||||
follow_redirect(location, max_times - 1)
|
follow_redirect(location, max_times - 1)
|
||||||
|
|
||||||
|
|
|
@ -3,9 +3,9 @@ defmodule Philomena.Scrapers.Raw do
|
||||||
|
|
||||||
@spec can_handle?(URI.t(), String.t()) :: true | false
|
@spec can_handle?(URI.t(), String.t()) :: true | false
|
||||||
def can_handle?(_uri, url) do
|
def can_handle?(_uri, url) do
|
||||||
Philomena.Http.head!(url)
|
Philomena.Http.head(url)
|
||||||
|> case do
|
|> case do
|
||||||
%Tesla.Env{status: 200, headers: headers} ->
|
{:ok, %Tesla.Env{status: 200, headers: headers}} ->
|
||||||
headers
|
headers
|
||||||
|> Enum.any?(fn {k, v} ->
|
|> Enum.any?(fn {k, v} ->
|
||||||
String.downcase(k) == "content-type" and String.downcase(v) in @mime_types
|
String.downcase(k) == "content-type" and String.downcase(v) in @mime_types
|
||||||
|
|
|
@ -26,12 +26,12 @@ defmodule Philomena.Scrapers.Tumblr do
|
||||||
tumblr_api_key()
|
tumblr_api_key()
|
||||||
}"
|
}"
|
||||||
|
|
||||||
Philomena.Http.get!(api_url)
|
Philomena.Http.get(api_url)
|
||||||
|> json!()
|
|> json!()
|
||||||
|> process_response!()
|
|> process_response!()
|
||||||
end
|
end
|
||||||
|
|
||||||
defp json!(%Tesla.Env{body: body, status: 200}),
|
defp json!({:ok, %Tesla.Env{body: body, status: 200}}),
|
||||||
do: Jason.decode!(body)
|
do: Jason.decode!(body)
|
||||||
|
|
||||||
defp process_response!(%{"response" => %{"posts" => [post | _rest]}}),
|
defp process_response!(%{"response" => %{"posts" => [post | _rest]}}),
|
||||||
|
@ -70,7 +70,7 @@ defmodule Philomena.Scrapers.Tumblr do
|
||||||
end
|
end
|
||||||
|
|
||||||
defp url_ok?(url) do
|
defp url_ok?(url) do
|
||||||
match?(%Tesla.Env{status: 200}, Philomena.Http.head!(url))
|
match?({:ok, %Tesla.Env{status: 200}}, Philomena.Http.head(url))
|
||||||
end
|
end
|
||||||
|
|
||||||
defp add_meta(post, images) do
|
defp add_meta(post, images) do
|
||||||
|
|
|
@ -46,10 +46,13 @@ defmodule Philomena.Scrapers.Twitter do
|
||||||
url = "https://twitter.com/#{user}/status/#{status_id}"
|
url = "https://twitter.com/#{user}/status/#{status_id}"
|
||||||
|
|
||||||
{gt, bearer} =
|
{gt, bearer} =
|
||||||
Philomena.Http.get!(page_url)
|
Philomena.Http.get(page_url)
|
||||||
|> extract_guest_token_and_bearer()
|
|> extract_guest_token_and_bearer()
|
||||||
|
|
||||||
Philomena.Http.get!(api_url, [{"Authorization", "Bearer #{bearer}"}, {"x-guest-token", gt}])
|
{:ok, api_resp} =
|
||||||
|
Philomena.Http.get(api_url, [{"Authorization", "Bearer #{bearer}"}, {"x-guest-token", gt}])
|
||||||
|
|
||||||
|
api_resp
|
||||||
|> Map.get(:body)
|
|> Map.get(:body)
|
||||||
|> Jason.decode!()
|
|> Jason.decode!()
|
||||||
|> Map.get("globalObjects")
|
|> Map.get("globalObjects")
|
||||||
|
@ -59,11 +62,11 @@ defmodule Philomena.Scrapers.Twitter do
|
||||||
|> Map.put("url", url)
|
|> Map.put("url", url)
|
||||||
end
|
end
|
||||||
|
|
||||||
defp extract_guest_token_and_bearer(%Tesla.Env{body: page}) do
|
defp extract_guest_token_and_bearer({:ok, %Tesla.Env{body: page}}) do
|
||||||
[gt] = Regex.run(@gt_regex, page, capture: :all_but_first)
|
[gt] = Regex.run(@gt_regex, page, capture: :all_but_first)
|
||||||
[script] = Regex.run(@script_regex, page, capture: :all_but_first)
|
[script] = Regex.run(@script_regex, page, capture: :all_but_first)
|
||||||
|
|
||||||
%{body: body} = Philomena.Http.get!(script)
|
{:ok, %{body: body}} = Philomena.Http.get(script)
|
||||||
|
|
||||||
[bearer] = Regex.run(@bearer_regex, body, capture: :all_but_first)
|
[bearer] = Regex.run(@bearer_regex, body, capture: :all_but_first)
|
||||||
|
|
||||||
|
|
|
@ -22,13 +22,13 @@ defmodule Philomena.Servers.PicartoChannelUpdater do
|
||||||
now = DateTime.utc_now() |> DateTime.truncate(:second)
|
now = DateTime.utc_now() |> DateTime.truncate(:second)
|
||||||
|
|
||||||
@api_online
|
@api_online
|
||||||
|> Philomena.Http.get!()
|
|> Philomena.Http.get()
|
||||||
|> handle_response(now)
|
|> handle_response(now)
|
||||||
|
|
||||||
run()
|
run()
|
||||||
end
|
end
|
||||||
|
|
||||||
defp handle_response(%Tesla.Env{body: body, status: 200}, now) do
|
defp handle_response({:ok, %Tesla.Env{body: body, status: 200}}, now) do
|
||||||
resp =
|
resp =
|
||||||
body
|
body
|
||||||
|> Jason.decode!()
|
|> Jason.decode!()
|
||||||
|
|
|
@ -22,13 +22,13 @@ defmodule Philomena.Servers.PiczelChannelUpdater do
|
||||||
now = DateTime.utc_now() |> DateTime.truncate(:second)
|
now = DateTime.utc_now() |> DateTime.truncate(:second)
|
||||||
|
|
||||||
@api_online
|
@api_online
|
||||||
|> Philomena.Http.get!()
|
|> Philomena.Http.get()
|
||||||
|> handle_response(now)
|
|> handle_response(now)
|
||||||
|
|
||||||
run()
|
run()
|
||||||
end
|
end
|
||||||
|
|
||||||
defp handle_response(%Tesla.Env{body: body, status: 200}, now) do
|
defp handle_response({:ok, %Tesla.Env{body: body, status: 200}}, now) do
|
||||||
resp =
|
resp =
|
||||||
body
|
body
|
||||||
|> Jason.decode!()
|
|> Jason.decode!()
|
||||||
|
|
|
@ -59,11 +59,11 @@ defmodule Philomena.Servers.UserLinkUpdater do
|
||||||
|
|
||||||
user_link
|
user_link
|
||||||
|> Map.get(:uri)
|
|> Map.get(:uri)
|
||||||
|> Philomena.Http.get!()
|
|> Philomena.Http.get()
|
||||||
|> handle_response(user_link)
|
|> handle_response(user_link)
|
||||||
end
|
end
|
||||||
|
|
||||||
defp handle_response(%Tesla.Env{body: body, status: 200}, user_link) do
|
defp handle_response({:ok, %Tesla.Env{body: body, status: 200}}, user_link) do
|
||||||
case :binary.match(body, user_link.verification_code) do
|
case :binary.match(body, user_link.verification_code) do
|
||||||
:nomatch ->
|
:nomatch ->
|
||||||
nil
|
nil
|
||||||
|
|
|
@ -35,8 +35,8 @@ defmodule PhilomenaWeb.CompromisedPasswordCheckPlug do
|
||||||
:crypto.hash(:sha, password)
|
:crypto.hash(:sha, password)
|
||||||
|> Base.encode16()
|
|> Base.encode16()
|
||||||
|
|
||||||
case Philomena.Http.get!(make_api_url(prefix)) do
|
case Philomena.Http.get(make_api_url(prefix)) do
|
||||||
%Tesla.Env{body: body, status: 200} -> String.contains?(body, rest)
|
{:ok, %Tesla.Env{body: body, status: 200}} -> String.contains?(body, rest)
|
||||||
_ -> false
|
_ -> false
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -13,7 +13,7 @@ defmodule PhilomenaWeb.ScraperPlug do
|
||||||
|
|
||||||
%{"scraper_cache" => url} when not is_nil(url) ->
|
%{"scraper_cache" => url} when not is_nil(url) ->
|
||||||
url
|
url
|
||||||
|> Philomena.Http.get!()
|
|> Philomena.Http.get()
|
||||||
|> maybe_fixup_params(opts, conn)
|
|> maybe_fixup_params(opts, conn)
|
||||||
|
|
||||||
_ ->
|
_ ->
|
||||||
|
@ -21,7 +21,7 @@ defmodule PhilomenaWeb.ScraperPlug do
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
defp maybe_fixup_params(%Tesla.Env{body: body, status: 200}, opts, conn) do
|
defp maybe_fixup_params({:ok, %Tesla.Env{body: body, status: 200}}, opts, conn) do
|
||||||
params_name = Keyword.get(opts, :params_name, "image")
|
params_name = Keyword.get(opts, :params_name, "image")
|
||||||
params_key = Keyword.get(opts, :params_key, "image")
|
params_key = Keyword.get(opts, :params_key, "image")
|
||||||
file = Briefly.create!()
|
file = Briefly.create!()
|
||||||
|
|
|
@ -52,7 +52,7 @@ for image_def <- resources["remote_images"] do
|
||||||
now = DateTime.utc_now() |> DateTime.to_unix(:microsecond)
|
now = DateTime.utc_now() |> DateTime.to_unix(:microsecond)
|
||||||
|
|
||||||
IO.puts "Fetching #{image_def["url"]} ..."
|
IO.puts "Fetching #{image_def["url"]} ..."
|
||||||
%{body: body} = Philomena.Http.get!(image_def["url"])
|
{:ok, %{body: body}} = Philomena.Http.get(image_def["url"])
|
||||||
|
|
||||||
File.write!(file, body)
|
File.write!(file, body)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue