Trying to add Pixiv scraper

This commit is contained in:
Chaska 2024-04-23 01:08:00 -05:00
parent f90d07e637
commit f2eec71ec5
2 changed files with 36 additions and 0 deletions

View file

@ -7,6 +7,7 @@ defmodule Philomena.Scrapers do
Philomena.Scrapers.Inkbunny,
Philomena.Scrapers.E621,
Philomena.Scrapers.Furaffinity,
Philomena.Scrapers.Pixiv,
Philomena.Scrapers.Raw
]

View file

@ -0,0 +1,35 @@
defmodule Philomena.Scrapers.Pixiv do
@url_regex ~r|\Ahttps?://pixiv\.net/en/artworks/([0-9]+)|
@spec can_handle?(URI.t(), String.t()) :: true | false
def can_handle?(_uri, url) do
String.match?(url, @url_regex)
end
def scrape(_uri, url) do
[_, submission_id] = Regex.run(@url_regex, url, capture: :all)
api_url = "https://www.pixiv.net/touch/ajax/illust/details?illust_id=#{submission_id}"
{:ok, %Tesla.Env{status: 200, body: body}} = Philomena.Http.get(api_url)
submission = Jason.decode!(body)
description = submission["illust_details"]["comment"]
|> HtmlSanitizeEx.strip_tags()
|> String.replace(~r/ +/, " ")
|> String.replace(~r/\n \n +/, "\n")
|> String.replace(~r/\n /, "\n")
|> String.trim()
%{
source_url: url,
author_name: submission["author_details"]["user_account"],
description: description,
images: [
%{
url: "#{submission["illust_details"]["manga_a"]["url_big"]}",
camo_url: Camo.Image.image_url(submission["illust_details"]["manga_a"]["url"])
}
]
}
end
end