mirror of
https://github.com/philomena-dev/philomena.git
synced 2024-11-23 20:18:00 +01:00
Split out HTTP client interaction into PhilomenaProxy namespace
This commit is contained in:
parent
3a2cb05995
commit
c63bc41d8b
21 changed files with 294 additions and 113 deletions
|
@ -1,8 +0,0 @@
|
||||||
defmodule Camo.Image do
|
|
||||||
@doc """
|
|
||||||
Convert a potentially untrusted external image URL into a trusted one
|
|
||||||
loaded through a gocamo proxy (specified by the environment).
|
|
||||||
"""
|
|
||||||
@spec image_url(String.t()) :: String.t()
|
|
||||||
def image_url(input), do: Philomena.Native.camo_image_url(input)
|
|
||||||
end
|
|
|
@ -1,7 +1,7 @@
|
||||||
defmodule Philomena.ArtistLinks.AutomaticVerifier do
|
defmodule Philomena.ArtistLinks.AutomaticVerifier do
|
||||||
def check_link(artist_link, recheck_time) do
|
def check_link(artist_link, recheck_time) do
|
||||||
artist_link.uri
|
artist_link.uri
|
||||||
|> Philomena.Http.get()
|
|> PhilomenaProxy.Http.get()
|
||||||
|> contains_verification_code?(artist_link.verification_code)
|
|> contains_verification_code?(artist_link.verification_code)
|
||||||
|> case do
|
|> case do
|
||||||
true ->
|
true ->
|
||||||
|
|
|
@ -4,7 +4,7 @@ defmodule Philomena.Channels.PicartoChannel do
|
||||||
@spec live_channels(DateTime.t()) :: map()
|
@spec live_channels(DateTime.t()) :: map()
|
||||||
def live_channels(now) do
|
def live_channels(now) do
|
||||||
@api_online
|
@api_online
|
||||||
|> Philomena.Http.get()
|
|> PhilomenaProxy.Http.get()
|
||||||
|> case do
|
|> case do
|
||||||
{:ok, %Tesla.Env{body: body, status: 200}} ->
|
{:ok, %Tesla.Env{body: body, status: 200}} ->
|
||||||
body
|
body
|
||||||
|
|
|
@ -4,7 +4,7 @@ defmodule Philomena.Channels.PiczelChannel do
|
||||||
@spec live_channels(DateTime.t()) :: map()
|
@spec live_channels(DateTime.t()) :: map()
|
||||||
def live_channels(now) do
|
def live_channels(now) do
|
||||||
@api_online
|
@api_online
|
||||||
|> Philomena.Http.get()
|
|> PhilomenaProxy.Http.get()
|
||||||
|> case do
|
|> case do
|
||||||
{:ok, %Tesla.Env{body: body, status: 200}} ->
|
{:ok, %Tesla.Env{body: body, status: 200}} ->
|
||||||
body
|
body
|
||||||
|
|
|
@ -1,46 +0,0 @@
|
||||||
defmodule Philomena.Http do
|
|
||||||
def get(url, headers \\ [], options \\ []) do
|
|
||||||
Tesla.get(client(headers), url, opts: [adapter: adapter_opts(options)])
|
|
||||||
end
|
|
||||||
|
|
||||||
def head(url, headers \\ [], options \\ []) do
|
|
||||||
Tesla.head(client(headers), url, opts: [adapter: adapter_opts(options)])
|
|
||||||
end
|
|
||||||
|
|
||||||
def post(url, body, headers \\ [], options \\ []) do
|
|
||||||
Tesla.post(client(headers), url, body, opts: [adapter: adapter_opts(options)])
|
|
||||||
end
|
|
||||||
|
|
||||||
defp adapter_opts(opts) do
|
|
||||||
opts = Keyword.merge(opts, max_body: 125_000_000, inet6: true)
|
|
||||||
|
|
||||||
case Application.get_env(:philomena, :proxy_host) do
|
|
||||||
nil ->
|
|
||||||
opts
|
|
||||||
|
|
||||||
url ->
|
|
||||||
Keyword.merge(opts, proxy: proxy_opts(URI.parse(url)))
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
defp proxy_opts(%{host: host, port: port, scheme: "https"}),
|
|
||||||
do: {:https, host, port, [transport_opts: [inet6: true]]}
|
|
||||||
|
|
||||||
defp proxy_opts(%{host: host, port: port, scheme: "http"}),
|
|
||||||
do: {:http, host, port, [transport_opts: [inet6: true]]}
|
|
||||||
|
|
||||||
defp client(headers) do
|
|
||||||
Tesla.client(
|
|
||||||
[
|
|
||||||
{Tesla.Middleware.FollowRedirects, max_redirects: 1},
|
|
||||||
{Tesla.Middleware.Headers,
|
|
||||||
[
|
|
||||||
{"User-Agent",
|
|
||||||
"Mozilla/5.0 (X11; Philomena; Linux x86_64; rv:86.0) Gecko/20100101 Firefox/86.0"}
|
|
||||||
| headers
|
|
||||||
]}
|
|
||||||
],
|
|
||||||
Tesla.Adapter.Mint
|
|
||||||
)
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -1,25 +0,0 @@
|
||||||
defmodule Philomena.Scrapers do
|
|
||||||
@scrapers [
|
|
||||||
Philomena.Scrapers.Deviantart,
|
|
||||||
Philomena.Scrapers.Pillowfort,
|
|
||||||
Philomena.Scrapers.Twitter,
|
|
||||||
Philomena.Scrapers.Tumblr,
|
|
||||||
Philomena.Scrapers.Raw
|
|
||||||
]
|
|
||||||
|
|
||||||
def scrape!(url) do
|
|
||||||
uri = URI.parse(url)
|
|
||||||
|
|
||||||
@scrapers
|
|
||||||
|> Enum.find(& &1.can_handle?(uri, url))
|
|
||||||
|> wrap()
|
|
||||||
|> Enum.map(& &1.scrape(uri, url))
|
|
||||||
|> unwrap()
|
|
||||||
end
|
|
||||||
|
|
||||||
defp wrap(nil), do: []
|
|
||||||
defp wrap(res), do: [res]
|
|
||||||
|
|
||||||
defp unwrap([result]), do: result
|
|
||||||
defp unwrap(_result), do: nil
|
|
||||||
end
|
|
24
lib/philomena_proxy/camo.ex
Normal file
24
lib/philomena_proxy/camo.ex
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
defmodule PhilomenaProxy.Camo do
|
||||||
|
@moduledoc """
|
||||||
|
Image proxying utilities.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
Convert a potentially untrusted external image URL into a trusted one
|
||||||
|
loaded through a gocamo proxy (specified by the environment).
|
||||||
|
|
||||||
|
Configuration is read from environment variables at runtime by Philomena.
|
||||||
|
|
||||||
|
config :philomena,
|
||||||
|
camo_host: System.get_env("CAMO_HOST"),
|
||||||
|
camo_key: System.get_env("CAMO_KEY"),
|
||||||
|
|
||||||
|
## Example
|
||||||
|
|
||||||
|
iex> PhilomenaProxy.Camo.image_url("https://example.org/img/view/2024/1/1/1.png")
|
||||||
|
"https://example.net/L5MqSmYq1ZEqiBGGvsvSDpILyJI/aHR0cHM6Ly9leGFtcGxlLm9yZy9pbWcvdmlldy8yMDI0LzEvMS8xLnBuZwo"
|
||||||
|
|
||||||
|
"""
|
||||||
|
@spec image_url(String.t()) :: String.t()
|
||||||
|
def image_url(input), do: Philomena.Native.camo_image_url(input)
|
||||||
|
end
|
107
lib/philomena_proxy/http.ex
Normal file
107
lib/philomena_proxy/http.ex
Normal file
|
@ -0,0 +1,107 @@
|
||||||
|
defmodule PhilomenaProxy.Http do
|
||||||
|
@moduledoc """
|
||||||
|
HTTP client implementation.
|
||||||
|
|
||||||
|
This applies the Philomena User-Agent header, and optionally proxies traffic through a SOCKS5
|
||||||
|
HTTP proxy to allow the application to connect when the local network is restricted.
|
||||||
|
|
||||||
|
If a proxy host is not specified in the configuration, then a proxy is not used and external
|
||||||
|
traffic is originated from the same network as application.
|
||||||
|
|
||||||
|
Proxy options are read from environment variables at runtime by Philomena.
|
||||||
|
|
||||||
|
config :philomena,
|
||||||
|
proxy_host: System.get_env("PROXY_HOST"),
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
@type url :: String.t()
|
||||||
|
@type header_list :: [{String.t(), String.t()}]
|
||||||
|
@type body :: binary()
|
||||||
|
|
||||||
|
@type client_options :: keyword()
|
||||||
|
|
||||||
|
@doc ~S"""
|
||||||
|
Perform a HTTP GET request.
|
||||||
|
|
||||||
|
## Example
|
||||||
|
|
||||||
|
iex> PhilomenaProxy.Http.get("http://example.com", [{"authorization", "Bearer #{token}"}])
|
||||||
|
{:ok, %Tesla.Env{...}}
|
||||||
|
|
||||||
|
iex> PhilomenaProxy.Http.get("http://nonexistent.example.com")
|
||||||
|
{:error, %Mint.TransportError{reason: :nxdomain}}
|
||||||
|
|
||||||
|
"""
|
||||||
|
@spec get(url(), header_list(), client_options()) :: Tesla.Env.result()
|
||||||
|
def get(url, headers \\ [], options \\ []) do
|
||||||
|
Tesla.get(client(headers), url, opts: [adapter: adapter_opts(options)])
|
||||||
|
end
|
||||||
|
|
||||||
|
@doc ~S"""
|
||||||
|
Perform a HTTP HEAD request.
|
||||||
|
|
||||||
|
## Example
|
||||||
|
|
||||||
|
iex> PhilomenaProxy.Http.head("http://example.com", [{"authorization", "Bearer #{token}"}])
|
||||||
|
{:ok, %Tesla.Env{...}}
|
||||||
|
|
||||||
|
iex> PhilomenaProxy.Http.head("http://nonexistent.example.com")
|
||||||
|
{:error, %Mint.TransportError{reason: :nxdomain}}
|
||||||
|
|
||||||
|
"""
|
||||||
|
@spec head(url(), header_list(), client_options()) :: Tesla.Env.result()
|
||||||
|
def head(url, headers \\ [], options \\ []) do
|
||||||
|
Tesla.head(client(headers), url, opts: [adapter: adapter_opts(options)])
|
||||||
|
end
|
||||||
|
|
||||||
|
@doc ~S"""
|
||||||
|
Perform a HTTP POST request.
|
||||||
|
|
||||||
|
## Example
|
||||||
|
|
||||||
|
iex> PhilomenaProxy.Http.post("http://example.com", "", [{"authorization", "Bearer #{token}"}])
|
||||||
|
{:ok, %Tesla.Env{...}}
|
||||||
|
|
||||||
|
iex> PhilomenaProxy.Http.post("http://nonexistent.example.com", "")
|
||||||
|
{:error, %Mint.TransportError{reason: :nxdomain}}
|
||||||
|
|
||||||
|
"""
|
||||||
|
@spec post(url(), body(), header_list(), client_options()) :: Tesla.Env.result()
|
||||||
|
def post(url, body, headers \\ [], options \\ []) do
|
||||||
|
Tesla.post(client(headers), url, body, opts: [adapter: adapter_opts(options)])
|
||||||
|
end
|
||||||
|
|
||||||
|
defp adapter_opts(opts) do
|
||||||
|
opts = Keyword.merge(opts, max_body: 125_000_000, inet6: true)
|
||||||
|
|
||||||
|
case Application.get_env(:philomena, :proxy_host) do
|
||||||
|
nil ->
|
||||||
|
opts
|
||||||
|
|
||||||
|
url ->
|
||||||
|
Keyword.merge(opts, proxy: proxy_opts(URI.parse(url)))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
defp proxy_opts(%{host: host, port: port, scheme: "https"}),
|
||||||
|
do: {:https, host, port, [transport_opts: [inet6: true]]}
|
||||||
|
|
||||||
|
defp proxy_opts(%{host: host, port: port, scheme: "http"}),
|
||||||
|
do: {:http, host, port, [transport_opts: [inet6: true]]}
|
||||||
|
|
||||||
|
defp client(headers) do
|
||||||
|
Tesla.client(
|
||||||
|
[
|
||||||
|
{Tesla.Middleware.FollowRedirects, max_redirects: 1},
|
||||||
|
{Tesla.Middleware.Headers,
|
||||||
|
[
|
||||||
|
{"User-Agent",
|
||||||
|
"Mozilla/5.0 (X11; Philomena; Linux x86_64; rv:86.0) Gecko/20100101 Firefox/86.0"}
|
||||||
|
| headers
|
||||||
|
]}
|
||||||
|
],
|
||||||
|
Tesla.Adapter.Mint
|
||||||
|
)
|
||||||
|
end
|
||||||
|
end
|
71
lib/philomena_proxy/scrapers.ex
Normal file
71
lib/philomena_proxy/scrapers.ex
Normal file
|
@ -0,0 +1,71 @@
|
||||||
|
defmodule PhilomenaProxy.Scrapers do
|
||||||
|
@moduledoc """
|
||||||
|
Scrape utilities to facilitate uploading media from other websites.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# The URL to fetch, as a string.
|
||||||
|
@type url :: String.t()
|
||||||
|
|
||||||
|
# An individual image in a list associated with a scrape result.
|
||||||
|
@type image_result :: %{
|
||||||
|
url: url(),
|
||||||
|
camo_url: url()
|
||||||
|
}
|
||||||
|
|
||||||
|
# Result of a successful scrape.
|
||||||
|
@type scrape_result :: %{
|
||||||
|
source_url: url(),
|
||||||
|
description: String.t() | nil,
|
||||||
|
author_name: String.t() | nil,
|
||||||
|
images: [image_result()]
|
||||||
|
}
|
||||||
|
|
||||||
|
@scrapers [
|
||||||
|
PhilomenaProxy.Scrapers.Deviantart,
|
||||||
|
PhilomenaProxy.Scrapers.Pillowfort,
|
||||||
|
PhilomenaProxy.Scrapers.Twitter,
|
||||||
|
PhilomenaProxy.Scrapers.Tumblr,
|
||||||
|
PhilomenaProxy.Scrapers.Raw
|
||||||
|
]
|
||||||
|
|
||||||
|
@doc """
|
||||||
|
Scrape a URL for content.
|
||||||
|
|
||||||
|
The scrape result is intended for serialization to JSON.
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
iex> PhilomenaProxy.Scrapers.scrape!("http://example.org/image-page")
|
||||||
|
%{
|
||||||
|
source_url: "http://example.org/image-page",
|
||||||
|
description: "Test",
|
||||||
|
author_name: "myself",
|
||||||
|
images: [
|
||||||
|
%{
|
||||||
|
url: "http://example.org/image.png"
|
||||||
|
camo_url: "http://example.net/UT2YIjkWDas6CQBmQcYlcNGmKfQ/aHR0cDovL2V4YW1wbGUub3JnL2ltY"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
iex> PhilomenaProxy.Scrapers.scrape!("http://example.org/nonexistent-path")
|
||||||
|
nil
|
||||||
|
|
||||||
|
"""
|
||||||
|
@spec scrape!(url()) :: scrape_result() | nil
|
||||||
|
def scrape!(url) do
|
||||||
|
uri = URI.parse(url)
|
||||||
|
|
||||||
|
@scrapers
|
||||||
|
|> Enum.find(& &1.can_handle?(uri, url))
|
||||||
|
|> wrap()
|
||||||
|
|> Enum.map(& &1.scrape(uri, url))
|
||||||
|
|> unwrap()
|
||||||
|
end
|
||||||
|
|
||||||
|
defp wrap(nil), do: []
|
||||||
|
defp wrap(res), do: [res]
|
||||||
|
|
||||||
|
defp unwrap([result]), do: result
|
||||||
|
defp unwrap(_result), do: nil
|
||||||
|
end
|
|
@ -1,4 +1,11 @@
|
||||||
defmodule Philomena.Scrapers.Deviantart do
|
defmodule PhilomenaProxy.Scrapers.Deviantart do
|
||||||
|
@moduledoc false
|
||||||
|
|
||||||
|
alias PhilomenaProxy.Scrapers.Scraper
|
||||||
|
alias PhilomenaProxy.Scrapers
|
||||||
|
|
||||||
|
@behaviour Scraper
|
||||||
|
|
||||||
@image_regex ~r|data-rh="true" rel="preload" href="([^"]*)" as="image"|
|
@image_regex ~r|data-rh="true" rel="preload" href="([^"]*)" as="image"|
|
||||||
@source_regex ~r|rel="canonical" href="([^"]*)"|
|
@source_regex ~r|rel="canonical" href="([^"]*)"|
|
||||||
@artist_regex ~r|https://www.deviantart.com/([^/]*)/art|
|
@artist_regex ~r|https://www.deviantart.com/([^/]*)/art|
|
||||||
|
@ -7,7 +14,7 @@ defmodule Philomena.Scrapers.Deviantart do
|
||||||
@png_regex ~r|(https://[0-9a-z\-\.]+(?:/intermediary)?/f/[0-9a-f\-]+/[0-9a-z\-]+\.png/v1/fill/[0-9a-z_,]+/[0-9a-z_\-]+)(\.png)(.*)|
|
@png_regex ~r|(https://[0-9a-z\-\.]+(?:/intermediary)?/f/[0-9a-f\-]+/[0-9a-z\-]+\.png/v1/fill/[0-9a-z_,]+/[0-9a-z_\-]+)(\.png)(.*)|
|
||||||
@jpg_regex ~r|(https://[0-9a-z\-\.]+(?:/intermediary)?/f/[0-9a-f\-]+/[0-9a-z\-]+\.jpg/v1/fill/w_[0-9]+,h_[0-9]+,q_)([0-9]+)(,[a-z]+\/[a-z0-6_\-]+\.jpe?g.*)|
|
@jpg_regex ~r|(https://[0-9a-z\-\.]+(?:/intermediary)?/f/[0-9a-f\-]+/[0-9a-z\-]+\.jpg/v1/fill/w_[0-9]+,h_[0-9]+,q_)([0-9]+)(,[a-z]+\/[a-z0-6_\-]+\.jpe?g.*)|
|
||||||
|
|
||||||
@spec can_handle?(URI.t(), String.t()) :: true | false
|
@spec can_handle?(URI.t(), String.t()) :: boolean()
|
||||||
def can_handle?(uri, _url) do
|
def can_handle?(uri, _url) do
|
||||||
String.ends_with?(uri.host, "deviantart.com")
|
String.ends_with?(uri.host, "deviantart.com")
|
||||||
end
|
end
|
||||||
|
@ -21,6 +28,7 @@ defmodule Philomena.Scrapers.Deviantart do
|
||||||
#
|
#
|
||||||
# So, regex it is. Eat dirt, deviantart. You don't deserve the respect
|
# So, regex it is. Eat dirt, deviantart. You don't deserve the respect
|
||||||
# artists give you.
|
# artists give you.
|
||||||
|
@spec scrape(URI.t(), Scrapers.url()) :: Scrapers.scrape_result()
|
||||||
def scrape(_uri, url) do
|
def scrape(_uri, url) do
|
||||||
url
|
url
|
||||||
|> follow_redirect(2)
|
|> follow_redirect(2)
|
||||||
|
@ -38,10 +46,11 @@ defmodule Philomena.Scrapers.Deviantart do
|
||||||
%{
|
%{
|
||||||
source_url: source,
|
source_url: source,
|
||||||
author_name: artist,
|
author_name: artist,
|
||||||
|
description: "",
|
||||||
images: [
|
images: [
|
||||||
%{
|
%{
|
||||||
url: image,
|
url: image,
|
||||||
camo_url: Camo.Image.image_url(image)
|
camo_url: PhilomenaProxy.Camo.image_url(image)
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
@ -51,7 +60,7 @@ defmodule Philomena.Scrapers.Deviantart do
|
||||||
with [domain, object_uuid, object_name] <-
|
with [domain, object_uuid, object_name] <-
|
||||||
Regex.run(@cdnint_regex, image.url, capture: :all_but_first),
|
Regex.run(@cdnint_regex, image.url, capture: :all_but_first),
|
||||||
built_url <- "#{domain}/intermediary/f/#{object_uuid}/#{object_name}",
|
built_url <- "#{domain}/intermediary/f/#{object_uuid}/#{object_name}",
|
||||||
{:ok, %Tesla.Env{status: 200}} <- Philomena.Http.head(built_url) do
|
{:ok, %Tesla.Env{status: 200}} <- PhilomenaProxy.Http.head(built_url) do
|
||||||
# This is the high resolution URL.
|
# This is the high resolution URL.
|
||||||
%{
|
%{
|
||||||
data
|
data
|
||||||
|
@ -110,7 +119,7 @@ defmodule Philomena.Scrapers.Deviantart do
|
||||||
|
|
||||||
built_url = "http://orig01.deviantart.net/x_by_x-d#{base36}.png"
|
built_url = "http://orig01.deviantart.net/x_by_x-d#{base36}.png"
|
||||||
|
|
||||||
case Philomena.Http.get(built_url) do
|
case PhilomenaProxy.Http.get(built_url) do
|
||||||
{:ok, %Tesla.Env{status: 301, headers: headers}} ->
|
{:ok, %Tesla.Env{status: 301, headers: headers}} ->
|
||||||
# Location header provides URL of high res image.
|
# Location header provides URL of high res image.
|
||||||
{_location, link} = Enum.find(headers, fn {header, _val} -> header == "location" end)
|
{_location, link} = Enum.find(headers, fn {header, _val} -> header == "location" end)
|
||||||
|
@ -135,7 +144,7 @@ defmodule Philomena.Scrapers.Deviantart do
|
||||||
defp follow_redirect(_url, 0), do: nil
|
defp follow_redirect(_url, 0), do: nil
|
||||||
|
|
||||||
defp follow_redirect(url, max_times) do
|
defp follow_redirect(url, max_times) do
|
||||||
case Philomena.Http.get(url) do
|
case PhilomenaProxy.Http.get(url) do
|
||||||
{:ok, %Tesla.Env{headers: headers, status: code}} when code in [301, 302] ->
|
{:ok, %Tesla.Env{headers: headers, status: code}} when code in [301, 302] ->
|
||||||
location = Enum.find_value(headers, &location_header/1)
|
location = Enum.find_value(headers, &location_header/1)
|
||||||
follow_redirect(location, max_times - 1)
|
follow_redirect(location, max_times - 1)
|
|
@ -1,4 +1,11 @@
|
||||||
defmodule Philomena.Scrapers.Pillowfort do
|
defmodule PhilomenaProxy.Scrapers.Pillowfort do
|
||||||
|
@moduledoc false
|
||||||
|
|
||||||
|
alias PhilomenaProxy.Scrapers.Scraper
|
||||||
|
alias PhilomenaProxy.Scrapers
|
||||||
|
|
||||||
|
@behaviour Scraper
|
||||||
|
|
||||||
@url_regex ~r|\Ahttps?://www\.pillowfort\.social/posts/([0-9]+)|
|
@url_regex ~r|\Ahttps?://www\.pillowfort\.social/posts/([0-9]+)|
|
||||||
|
|
||||||
@spec can_handle?(URI.t(), String.t()) :: boolean()
|
@spec can_handle?(URI.t(), String.t()) :: boolean()
|
||||||
|
@ -6,12 +13,13 @@ defmodule Philomena.Scrapers.Pillowfort do
|
||||||
String.match?(url, @url_regex)
|
String.match?(url, @url_regex)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@spec scrape(URI.t(), Scrapers.url()) :: Scrapers.scrape_result()
|
||||||
def scrape(_uri, url) do
|
def scrape(_uri, url) do
|
||||||
[post_id] = Regex.run(@url_regex, url, capture: :all_but_first)
|
[post_id] = Regex.run(@url_regex, url, capture: :all_but_first)
|
||||||
|
|
||||||
api_url = "https://www.pillowfort.social/posts/#{post_id}/json"
|
api_url = "https://www.pillowfort.social/posts/#{post_id}/json"
|
||||||
|
|
||||||
Philomena.Http.get(api_url)
|
PhilomenaProxy.Http.get(api_url)
|
||||||
|> json!()
|
|> json!()
|
||||||
|> process_response!(url)
|
|> process_response!(url)
|
||||||
end
|
end
|
||||||
|
@ -25,7 +33,7 @@ defmodule Philomena.Scrapers.Pillowfort do
|
||||||
|> Enum.map(
|
|> Enum.map(
|
||||||
&%{
|
&%{
|
||||||
url: &1["url"],
|
url: &1["url"],
|
||||||
camo_url: Camo.Image.image_url(&1["small_image_url"])
|
camo_url: PhilomenaProxy.Camo.image_url(&1["small_image_url"])
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,9 +1,16 @@
|
||||||
defmodule Philomena.Scrapers.Raw do
|
defmodule PhilomenaProxy.Scrapers.Raw do
|
||||||
|
@moduledoc false
|
||||||
|
|
||||||
|
alias PhilomenaProxy.Scrapers.Scraper
|
||||||
|
alias PhilomenaProxy.Scrapers
|
||||||
|
|
||||||
|
@behaviour Scraper
|
||||||
|
|
||||||
@mime_types ["image/gif", "image/jpeg", "image/png", "image/svg", "image/svg+xml", "video/webm"]
|
@mime_types ["image/gif", "image/jpeg", "image/png", "image/svg", "image/svg+xml", "video/webm"]
|
||||||
|
|
||||||
@spec can_handle?(URI.t(), String.t()) :: true | false
|
@spec can_handle?(URI.t(), String.t()) :: boolean()
|
||||||
def can_handle?(_uri, url) do
|
def can_handle?(_uri, url) do
|
||||||
Philomena.Http.head(url)
|
PhilomenaProxy.Http.head(url)
|
||||||
|> case do
|
|> case do
|
||||||
{:ok, %Tesla.Env{status: 200, headers: headers}} ->
|
{:ok, %Tesla.Env{status: 200, headers: headers}} ->
|
||||||
headers
|
headers
|
||||||
|
@ -16,13 +23,16 @@ defmodule Philomena.Scrapers.Raw do
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@spec scrape(URI.t(), Scrapers.url()) :: Scrapers.scrape_result()
|
||||||
def scrape(_uri, url) do
|
def scrape(_uri, url) do
|
||||||
%{
|
%{
|
||||||
source_url: url,
|
source_url: url,
|
||||||
|
author_name: "",
|
||||||
|
description: "",
|
||||||
images: [
|
images: [
|
||||||
%{
|
%{
|
||||||
url: url,
|
url: url,
|
||||||
camo_url: Camo.Image.image_url(url)
|
camo_url: PhilomenaProxy.Camo.image_url(url)
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
11
lib/philomena_proxy/scrapers/scraper.ex
Normal file
11
lib/philomena_proxy/scrapers/scraper.ex
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
defmodule PhilomenaProxy.Scrapers.Scraper do
|
||||||
|
@moduledoc false
|
||||||
|
|
||||||
|
alias PhilomenaProxy.Scrapers
|
||||||
|
|
||||||
|
# Return whether the given URL can be parsed by the scraper
|
||||||
|
@callback can_handle?(URI.t(), Scrapers.url()) :: boolean()
|
||||||
|
|
||||||
|
# Collect upload information from the URL
|
||||||
|
@callback scrape(URI.t(), Scrapers.url()) :: Scrapers.scrape_result()
|
||||||
|
end
|
|
@ -1,4 +1,11 @@
|
||||||
defmodule Philomena.Scrapers.Tumblr do
|
defmodule PhilomenaProxy.Scrapers.Tumblr do
|
||||||
|
@moduledoc false
|
||||||
|
|
||||||
|
alias PhilomenaProxy.Scrapers.Scraper
|
||||||
|
alias PhilomenaProxy.Scrapers
|
||||||
|
|
||||||
|
@behaviour Scraper
|
||||||
|
|
||||||
@url_regex ~r|\Ahttps?://(?:.*)/(?:image\|post)/(\d+)(?:\z\|[/?#])|
|
@url_regex ~r|\Ahttps?://(?:.*)/(?:image\|post)/(\d+)(?:\z\|[/?#])|
|
||||||
@media_regex ~r|https?://(?:\d+\.)?media\.tumblr\.com/[a-f\d]+/[a-f\d]+-[a-f\d]+/s\d+x\d+/[a-f\d]+\.(?:png\|jpe?g\|gif)|i
|
@media_regex ~r|https?://(?:\d+\.)?media\.tumblr\.com/[a-f\d]+/[a-f\d]+-[a-f\d]+/s\d+x\d+/[a-f\d]+\.(?:png\|jpe?g\|gif)|i
|
||||||
@size_regex ~r|_(\d+)(\..+)\z|
|
@size_regex ~r|_(\d+)(\..+)\z|
|
||||||
|
@ -18,13 +25,14 @@ defmodule Philomena.Scrapers.Tumblr do
|
||||||
String.match?(url, @url_regex) and tumblr_domain?(uri.host)
|
String.match?(url, @url_regex) and tumblr_domain?(uri.host)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@spec scrape(URI.t(), Scrapers.url()) :: Scrapers.scrape_result()
|
||||||
def scrape(uri, url) do
|
def scrape(uri, url) do
|
||||||
[post_id] = Regex.run(@url_regex, url, capture: :all_but_first)
|
[post_id] = Regex.run(@url_regex, url, capture: :all_but_first)
|
||||||
|
|
||||||
api_url =
|
api_url =
|
||||||
"https://api.tumblr.com/v2/blog/#{uri.host}/posts/photo?id=#{post_id}&api_key=#{tumblr_api_key()}"
|
"https://api.tumblr.com/v2/blog/#{uri.host}/posts/photo?id=#{post_id}&api_key=#{tumblr_api_key()}"
|
||||||
|
|
||||||
Philomena.Http.get(api_url)
|
PhilomenaProxy.Http.get(api_url)
|
||||||
|> json!()
|
|> json!()
|
||||||
|> process_response!()
|
|> process_response!()
|
||||||
end
|
end
|
||||||
|
@ -44,7 +52,7 @@ defmodule Philomena.Scrapers.Tumblr do
|
||||||
%{"url" => preview} =
|
%{"url" => preview} =
|
||||||
Enum.find(photo["alt_sizes"], &(&1["width"] == 400)) || %{"url" => image}
|
Enum.find(photo["alt_sizes"], &(&1["width"] == 400)) || %{"url" => image}
|
||||||
|
|
||||||
%{url: image, camo_url: Camo.Image.image_url(preview)}
|
%{url: image, camo_url: PhilomenaProxy.Camo.image_url(preview)}
|
||||||
end)
|
end)
|
||||||
|
|
||||||
add_meta(post, images)
|
add_meta(post, images)
|
||||||
|
@ -55,7 +63,7 @@ defmodule Philomena.Scrapers.Tumblr do
|
||||||
@media_regex
|
@media_regex
|
||||||
|> Regex.scan(post["body"])
|
|> Regex.scan(post["body"])
|
||||||
|> Enum.map(fn [url | _captures] ->
|
|> Enum.map(fn [url | _captures] ->
|
||||||
%{url: url, camo_url: Camo.Image.image_url(url)}
|
%{url: url, camo_url: PhilomenaProxy.Camo.image_url(url)}
|
||||||
end)
|
end)
|
||||||
|
|
||||||
add_meta(post, images)
|
add_meta(post, images)
|
||||||
|
@ -68,7 +76,7 @@ defmodule Philomena.Scrapers.Tumblr do
|
||||||
end
|
end
|
||||||
|
|
||||||
defp url_ok?(url) do
|
defp url_ok?(url) do
|
||||||
match?({:ok, %Tesla.Env{status: 200}}, Philomena.Http.head(url))
|
match?({:ok, %Tesla.Env{status: 200}}, PhilomenaProxy.Http.head(url))
|
||||||
end
|
end
|
||||||
|
|
||||||
defp add_meta(post, images) do
|
defp add_meta(post, images) do
|
|
@ -1,16 +1,24 @@
|
||||||
defmodule Philomena.Scrapers.Twitter do
|
defmodule PhilomenaProxy.Scrapers.Twitter do
|
||||||
|
@moduledoc false
|
||||||
|
|
||||||
|
alias PhilomenaProxy.Scrapers.Scraper
|
||||||
|
alias PhilomenaProxy.Scrapers
|
||||||
|
|
||||||
|
@behaviour Scraper
|
||||||
|
|
||||||
@url_regex ~r|\Ahttps?://(?:mobile\.)?(?:twitter\|x).com/([A-Za-z\d_]+)/status/([\d]+)/?|
|
@url_regex ~r|\Ahttps?://(?:mobile\.)?(?:twitter\|x).com/([A-Za-z\d_]+)/status/([\d]+)/?|
|
||||||
|
|
||||||
@spec can_handle?(URI.t(), String.t()) :: true | false
|
@spec can_handle?(URI.t(), String.t()) :: boolean()
|
||||||
def can_handle?(_uri, url) do
|
def can_handle?(_uri, url) do
|
||||||
String.match?(url, @url_regex)
|
String.match?(url, @url_regex)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@spec scrape(URI.t(), Scrapers.url()) :: Scrapers.scrape_result()
|
||||||
def scrape(_uri, url) do
|
def scrape(_uri, url) do
|
||||||
[user, status_id] = Regex.run(@url_regex, url, capture: :all_but_first)
|
[user, status_id] = Regex.run(@url_regex, url, capture: :all_but_first)
|
||||||
|
|
||||||
api_url = "https://api.fxtwitter.com/#{user}/status/#{status_id}"
|
api_url = "https://api.fxtwitter.com/#{user}/status/#{status_id}"
|
||||||
{:ok, %Tesla.Env{status: 200, body: body}} = Philomena.Http.get(api_url)
|
{:ok, %Tesla.Env{status: 200, body: body}} = PhilomenaProxy.Http.get(api_url)
|
||||||
|
|
||||||
json = Jason.decode!(body)
|
json = Jason.decode!(body)
|
||||||
tweet = json["tweet"]
|
tweet = json["tweet"]
|
||||||
|
@ -19,7 +27,7 @@ defmodule Philomena.Scrapers.Twitter do
|
||||||
Enum.map(tweet["media"]["photos"], fn p ->
|
Enum.map(tweet["media"]["photos"], fn p ->
|
||||||
%{
|
%{
|
||||||
url: "#{p["url"]}:orig",
|
url: "#{p["url"]}:orig",
|
||||||
camo_url: Camo.Image.image_url(p["url"])
|
camo_url: PhilomenaProxy.Camo.image_url(p["url"])
|
||||||
}
|
}
|
||||||
end)
|
end)
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
defmodule PhilomenaWeb.Image.ScrapeController do
|
defmodule PhilomenaWeb.Image.ScrapeController do
|
||||||
use PhilomenaWeb, :controller
|
use PhilomenaWeb, :controller
|
||||||
|
|
||||||
alias Philomena.Scrapers
|
alias PhilomenaProxy.Scrapers
|
||||||
|
|
||||||
def create(conn, params) do
|
def create(conn, params) do
|
||||||
result =
|
result =
|
||||||
|
|
|
@ -31,7 +31,7 @@ defmodule PhilomenaWeb.CheckCaptchaPlug do
|
||||||
|
|
||||||
defp valid_solution?(%{"h-captcha-response" => captcha_token}) do
|
defp valid_solution?(%{"h-captcha-response" => captcha_token}) do
|
||||||
{:ok, %{body: body, status: 200}} =
|
{:ok, %{body: body, status: 200}} =
|
||||||
Philomena.Http.post(
|
PhilomenaProxy.Http.post(
|
||||||
"https://hcaptcha.com/siteverify",
|
"https://hcaptcha.com/siteverify",
|
||||||
URI.encode_query(%{"response" => captcha_token, "secret" => hcaptcha_secret_key()}),
|
URI.encode_query(%{"response" => captcha_token, "secret" => hcaptcha_secret_key()}),
|
||||||
[{"Content-Type", "application/x-www-form-urlencoded"}]
|
[{"Content-Type", "application/x-www-form-urlencoded"}]
|
||||||
|
|
|
@ -35,7 +35,7 @@ defmodule PhilomenaWeb.CompromisedPasswordCheckPlug do
|
||||||
:crypto.hash(:sha, password)
|
:crypto.hash(:sha, password)
|
||||||
|> Base.encode16()
|
|> Base.encode16()
|
||||||
|
|
||||||
case Philomena.Http.get(make_api_url(prefix)) do
|
case PhilomenaProxy.Http.get(make_api_url(prefix)) do
|
||||||
{:ok, %Tesla.Env{body: body, status: 200}} -> String.contains?(body, rest)
|
{:ok, %Tesla.Env{body: body, status: 200}} -> String.contains?(body, rest)
|
||||||
_ -> false
|
_ -> false
|
||||||
end
|
end
|
||||||
|
|
|
@ -15,7 +15,7 @@ defmodule PhilomenaWeb.ScraperPlug do
|
||||||
|
|
||||||
%{"scraper_cache" => url} when not is_nil(url) and url != "" ->
|
%{"scraper_cache" => url} when not is_nil(url) and url != "" ->
|
||||||
url
|
url
|
||||||
|> Philomena.Http.get()
|
|> PhilomenaProxy.Http.get()
|
||||||
|> maybe_fixup_params(url, opts, conn)
|
|> maybe_fixup_params(url, opts, conn)
|
||||||
|
|
||||||
_ ->
|
_ ->
|
||||||
|
|
|
@ -4,20 +4,24 @@ defmodule PhilomenaWeb.ChannelView do
|
||||||
def channel_image(%{type: "LivestreamChannel", short_name: short_name}) do
|
def channel_image(%{type: "LivestreamChannel", short_name: short_name}) do
|
||||||
now = DateTime.utc_now() |> DateTime.to_unix(:microsecond)
|
now = DateTime.utc_now() |> DateTime.to_unix(:microsecond)
|
||||||
|
|
||||||
Camo.Image.image_url(
|
PhilomenaProxy.Camo.image_url(
|
||||||
"https://thumbnail.api.livestream.com/thumbnail?name=#{short_name}&rand=#{now}"
|
"https://thumbnail.api.livestream.com/thumbnail?name=#{short_name}&rand=#{now}"
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
def channel_image(%{type: "PicartoChannel", thumbnail_url: thumbnail_url}),
|
def channel_image(%{type: "PicartoChannel", thumbnail_url: thumbnail_url}),
|
||||||
do: Camo.Image.image_url(thumbnail_url || "https://picarto.tv/images/missingthumb.jpg")
|
do:
|
||||||
|
PhilomenaProxy.Camo.image_url(thumbnail_url || "https://picarto.tv/images/missingthumb.jpg")
|
||||||
|
|
||||||
def channel_image(%{type: "PiczelChannel", remote_stream_id: remote_stream_id}),
|
def channel_image(%{type: "PiczelChannel", remote_stream_id: remote_stream_id}),
|
||||||
do: Camo.Image.image_url("https://piczel.tv/api/thumbnail/stream_#{remote_stream_id}.jpg")
|
do:
|
||||||
|
PhilomenaProxy.Camo.image_url(
|
||||||
|
"https://piczel.tv/api/thumbnail/stream_#{remote_stream_id}.jpg"
|
||||||
|
)
|
||||||
|
|
||||||
def channel_image(%{type: "TwitchChannel", short_name: short_name}),
|
def channel_image(%{type: "TwitchChannel", short_name: short_name}),
|
||||||
do:
|
do:
|
||||||
Camo.Image.image_url(
|
PhilomenaProxy.Camo.image_url(
|
||||||
"https://static-cdn.jtvnw.net/previews-ttv/live_user_#{String.downcase(short_name)}-320x180.jpg"
|
"https://static-cdn.jtvnw.net/previews-ttv/live_user_#{String.downcase(short_name)}-320x180.jpg"
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
|
@ -52,7 +52,7 @@ for image_def <- resources["remote_images"] do
|
||||||
now = DateTime.utc_now() |> DateTime.to_unix(:microsecond)
|
now = DateTime.utc_now() |> DateTime.to_unix(:microsecond)
|
||||||
|
|
||||||
IO.puts "Fetching #{image_def["url"]} ..."
|
IO.puts "Fetching #{image_def["url"]} ..."
|
||||||
{:ok, %{body: body}} = Philomena.Http.get(image_def["url"])
|
{:ok, %{body: body}} = PhilomenaProxy.Http.get(image_def["url"])
|
||||||
|
|
||||||
File.write!(file, body)
|
File.write!(file, body)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue