2024-05-25 03:15:05 +02:00
|
|
|
defmodule PhilomenaProxy.Scrapers.Twitter do
|
|
|
|
@moduledoc false
|
|
|
|
|
|
|
|
alias PhilomenaProxy.Scrapers.Scraper
|
|
|
|
alias PhilomenaProxy.Scrapers
|
|
|
|
|
|
|
|
@behaviour Scraper
|
|
|
|
|
2024-03-23 16:56:17 +01:00
|
|
|
@url_regex ~r|\Ahttps?://(?:mobile\.)?(?:twitter\|x).com/([A-Za-z\d_]+)/status/([\d]+)/?|
|
2019-11-28 18:12:10 +01:00
|
|
|
|
2024-05-25 03:15:05 +02:00
|
|
|
@spec can_handle?(URI.t(), String.t()) :: boolean()
|
2019-11-28 18:12:10 +01:00
|
|
|
def can_handle?(_uri, url) do
|
|
|
|
String.match?(url, @url_regex)
|
|
|
|
end
|
|
|
|
|
2024-05-25 03:15:05 +02:00
|
|
|
@spec scrape(URI.t(), Scrapers.url()) :: Scrapers.scrape_result()
|
2019-11-28 18:12:10 +01:00
|
|
|
def scrape(_uri, url) do
|
|
|
|
[user, status_id] = Regex.run(@url_regex, url, capture: :all_but_first)
|
|
|
|
|
2024-03-07 15:09:50 +01:00
|
|
|
api_url = "https://api.fxtwitter.com/#{user}/status/#{status_id}"
|
2024-05-25 03:15:05 +02:00
|
|
|
{:ok, %Tesla.Env{status: 200, body: body}} = PhilomenaProxy.Http.get(api_url)
|
2019-11-28 18:12:10 +01:00
|
|
|
|
2024-03-07 15:09:50 +01:00
|
|
|
json = Jason.decode!(body)
|
2024-04-05 18:59:16 +02:00
|
|
|
tweet = json["tweet"]
|
2024-03-07 15:09:50 +01:00
|
|
|
|
|
|
|
images =
|
2024-04-05 18:59:16 +02:00
|
|
|
Enum.map(tweet["media"]["photos"], fn p ->
|
2024-03-07 15:09:50 +01:00
|
|
|
%{
|
2024-03-14 14:28:59 +01:00
|
|
|
url: "#{p["url"]}:orig",
|
2024-05-25 03:15:05 +02:00
|
|
|
camo_url: PhilomenaProxy.Camo.image_url(p["url"])
|
2024-03-07 15:09:50 +01:00
|
|
|
}
|
|
|
|
end)
|
2019-11-28 18:12:10 +01:00
|
|
|
|
2024-03-04 16:57:37 +01:00
|
|
|
%{
|
2024-04-05 18:59:16 +02:00
|
|
|
source_url: tweet["url"],
|
|
|
|
author_name: tweet["author"]["screen_name"],
|
|
|
|
description: tweet["text"],
|
2024-03-07 15:09:50 +01:00
|
|
|
images: images
|
2024-03-04 16:57:37 +01:00
|
|
|
}
|
2019-11-28 18:12:10 +01:00
|
|
|
end
|
2019-12-19 00:51:02 +01:00
|
|
|
end
|