philomena/lib/philomena/scrapers/twitter.ex

36 lines
972 B
Elixir
Raw Normal View History

2019-11-28 18:12:10 +01:00
defmodule Philomena.Scrapers.Twitter do
@url_regex ~r|\Ahttps?://(?:mobile\.)?twitter.com/([A-Za-z\d_]+)/status/([\d]+)/?|
@spec can_handle?(URI.t(), String.t()) :: true | false
def can_handle?(_uri, url) do
String.match?(url, @url_regex)
end
def scrape(_uri, url) do
[user, status_id] = Regex.run(@url_regex, url, capture: :all_but_first)
api_url = "https://api.fxtwitter.com/#{user}/status/#{status_id}"
{:ok, %Tesla.Env{status: 200, body: body}} = Philomena.Http.get(api_url)
2019-11-28 18:12:10 +01:00
json = Jason.decode!(body)
images =
Enum.map(json["tweet"]["media"]["photos"], fn p ->
%{
url: large_format(p["url"]),
camo_url: Camo.Image.image_url(p["url"])
}
end)
2019-11-28 18:12:10 +01:00
%{
source_url: "https://twitter.com/#{user}/status/#{status_id}",
author_name: user,
images: images
}
2019-11-28 18:12:10 +01:00
end
defp large_format(str) do
String.replace_suffix(str, ".jpg", "?format=jpg&name=large")
end
2019-12-19 00:51:02 +01:00
end