2024-05-25 03:15:05 +02:00
|
|
|
defmodule PhilomenaProxy.Scrapers.Raw do
|
|
|
|
@moduledoc false
|
|
|
|
|
|
|
|
alias PhilomenaProxy.Scrapers.Scraper
|
|
|
|
alias PhilomenaProxy.Scrapers
|
|
|
|
|
|
|
|
@behaviour Scraper
|
|
|
|
|
2019-11-28 18:12:10 +01:00
|
|
|
@mime_types ["image/gif", "image/jpeg", "image/png", "image/svg", "image/svg+xml", "video/webm"]
|
|
|
|
|
2024-05-25 03:15:05 +02:00
|
|
|
@spec can_handle?(URI.t(), String.t()) :: boolean()
|
2019-11-28 18:12:10 +01:00
|
|
|
def can_handle?(_uri, url) do
|
2024-05-25 03:15:05 +02:00
|
|
|
PhilomenaProxy.Http.head(url)
|
2019-11-28 18:12:10 +01:00
|
|
|
|> case do
|
2024-06-20 05:03:44 +02:00
|
|
|
{:ok, %{status: 200, headers: headers}} ->
|
2019-11-28 18:12:10 +01:00
|
|
|
headers
|
|
|
|
|> Enum.any?(fn {k, v} ->
|
|
|
|
String.downcase(k) == "content-type" and String.downcase(v) in @mime_types
|
|
|
|
end)
|
|
|
|
|
|
|
|
_ ->
|
|
|
|
false
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2024-05-25 03:15:05 +02:00
|
|
|
@spec scrape(URI.t(), Scrapers.url()) :: Scrapers.scrape_result()
|
2019-11-28 18:12:10 +01:00
|
|
|
def scrape(_uri, url) do
|
|
|
|
%{
|
|
|
|
source_url: url,
|
2024-05-25 03:15:05 +02:00
|
|
|
author_name: "",
|
|
|
|
description: "",
|
2019-11-28 18:12:10 +01:00
|
|
|
images: [
|
|
|
|
%{
|
|
|
|
url: url,
|
2024-05-25 03:15:05 +02:00
|
|
|
camo_url: PhilomenaProxy.Camo.image_url(url)
|
2019-11-28 18:12:10 +01:00
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
end
|
2020-01-11 05:20:19 +01:00
|
|
|
end
|