mirror of
https://github.com/philomena-dev/philomena.git
synced 2025-03-28 06:17:46 +01:00
Fixing Baraag scraper
This commit is contained in:
parent
d8798f3075
commit
4d6932ed02
1 changed files with 20 additions and 9 deletions
|
@ -6,23 +6,34 @@ defmodule Philomena.Scrapers.Baraag do
|
|||
end
|
||||
|
||||
def scrape(_uri, url) do
|
||||
[user, status_id] = Regex.run(@url_regex, url, capture: :all)
|
||||
[_, status_id] = Regex.run(@url_regex, url, capture: :all)
|
||||
|
||||
api_url = "https://baraag.net/api/v1/statuses/#{status_id}"
|
||||
{:ok, %Tesla.Env{status: 200, body: body}} = Philomena.Http.get(api_url)
|
||||
|
||||
toot = Jason.decode!(body)
|
||||
|
||||
images =
|
||||
for x <- toot["media_attachments"] do
|
||||
%{
|
||||
url: "#{x["url"]}",
|
||||
camo_url: Camo.Image.image_url(x["preview_url"])
|
||||
}
|
||||
end
|
||||
|
||||
description =
|
||||
toot["content"]
|
||||
|> HtmlSanitizeEx.strip_tags()
|
||||
|> String.replace(~r/ +/, " ")
|
||||
|> String.replace(~r/\n \n +/, "\n")
|
||||
|> String.replace(~r/\n /, "\n")
|
||||
|> String.trim()
|
||||
|
||||
%{
|
||||
source_url: toot["url"],
|
||||
author_name: toot["account"]["username"],
|
||||
description: toot["content"],
|
||||
images: [
|
||||
%{
|
||||
url: "#{toot["media_attachments"]["url"]}",
|
||||
camo_url: Camo.Image.image_url(toot["media_attachments"]["preview_url"])
|
||||
}
|
||||
]
|
||||
description: description,
|
||||
images: images
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Add table
Reference in a new issue