2019-11-28 18:12:10 +01:00
|
|
|
defmodule Philomena.Scrapers.Twitter do
|
|
|
|
@url_regex ~r|\Ahttps?://(?:mobile\.)?twitter.com/([A-Za-z\d_]+)/status/([\d]+)/?|
|
|
|
|
|
|
|
|
@spec can_handle?(URI.t(), String.t()) :: true | false
|
|
|
|
def can_handle?(_uri, url) do
|
|
|
|
String.match?(url, @url_regex)
|
|
|
|
end
|
|
|
|
|
|
|
|
def scrape(_uri, url) do
|
|
|
|
[user, status_id] = Regex.run(@url_regex, url, capture: :all_but_first)
|
|
|
|
|
2024-03-04 16:57:37 +01:00
|
|
|
image_url = "https://d.fxtwitter.com/#{user}/status/#{status_id}.jpg"
|
2019-11-28 18:12:10 +01:00
|
|
|
|
2024-03-04 16:57:37 +01:00
|
|
|
{:ok, %Tesla.Env{status: 200}} = Philomena.Http.head(image_url)
|
2019-11-28 18:12:10 +01:00
|
|
|
|
2024-03-04 16:57:37 +01:00
|
|
|
%{
|
|
|
|
source_url: "https://twitter.com/#{user}/status/#{status_id}",
|
|
|
|
author_name: user,
|
|
|
|
images: [
|
|
|
|
%{
|
|
|
|
url: image_url,
|
2024-03-04 17:56:56 +01:00
|
|
|
camo_url: Camo.Image.image_url(image_url)
|
|
|
|
}
|
2024-03-04 16:57:37 +01:00
|
|
|
]
|
|
|
|
}
|
2019-11-28 18:12:10 +01:00
|
|
|
end
|
2019-12-19 00:51:02 +01:00
|
|
|
end
|