mirror of
https://github.com/philomena-dev/philomena.git
synced 2025-03-28 06:17:46 +01:00
Trying to add Pixiv scraper
This commit is contained in:
parent
f90d07e637
commit
f2eec71ec5
2 changed files with 36 additions and 0 deletions
|
@ -7,6 +7,7 @@ defmodule Philomena.Scrapers do
|
|||
Philomena.Scrapers.Inkbunny,
|
||||
Philomena.Scrapers.E621,
|
||||
Philomena.Scrapers.Furaffinity,
|
||||
Philomena.Scrapers.Pixiv,
|
||||
Philomena.Scrapers.Raw
|
||||
]
|
||||
|
||||
|
|
35
lib/philomena/scrapers/pixiv.ex
Normal file
35
lib/philomena/scrapers/pixiv.ex
Normal file
|
@ -0,0 +1,35 @@
|
|||
defmodule Philomena.Scrapers.Pixiv do
|
||||
@url_regex ~r|\Ahttps?://pixiv\.net/en/artworks/([0-9]+)|
|
||||
|
||||
@spec can_handle?(URI.t(), String.t()) :: true | false
|
||||
def can_handle?(_uri, url) do
|
||||
String.match?(url, @url_regex)
|
||||
end
|
||||
|
||||
def scrape(_uri, url) do
|
||||
[_, submission_id] = Regex.run(@url_regex, url, capture: :all)
|
||||
api_url = "https://www.pixiv.net/touch/ajax/illust/details?illust_id=#{submission_id}"
|
||||
{:ok, %Tesla.Env{status: 200, body: body}} = Philomena.Http.get(api_url)
|
||||
|
||||
submission = Jason.decode!(body)
|
||||
|
||||
description = submission["illust_details"]["comment"]
|
||||
|> HtmlSanitizeEx.strip_tags()
|
||||
|> String.replace(~r/ +/, " ")
|
||||
|> String.replace(~r/\n \n +/, "\n")
|
||||
|> String.replace(~r/\n /, "\n")
|
||||
|> String.trim()
|
||||
|
||||
%{
|
||||
source_url: url,
|
||||
author_name: submission["author_details"]["user_account"],
|
||||
description: description,
|
||||
images: [
|
||||
%{
|
||||
url: "#{submission["illust_details"]["manga_a"]["url_big"]}",
|
||||
camo_url: Camo.Image.image_url(submission["illust_details"]["manga_a"]["url"])
|
||||
}
|
||||
]
|
||||
}
|
||||
end
|
||||
end
|
Loading…
Add table
Reference in a new issue