mirror of
https://github.com/philomena-dev/philomena.git
synced 2025-03-17 09:00:05 +01:00
Add CivitAI post scraper
Added a scraper for CivitAI posts. Only supports post URLs (not single images) due to API limitations. API also does not provide the "description" visible on the site itself. Meta information like model, prompt, seed, etc. not used as we do not (yet) has a way to store or show this neatly.
This commit is contained in:
parent
088b958718
commit
1a90f8cdc4
2 changed files with 55 additions and 0 deletions
|
@ -22,6 +22,7 @@ defmodule PhilomenaProxy.Scrapers do
|
|||
|
||||
@scrapers [
|
||||
PhilomenaProxy.Scrapers.Bluesky,
|
||||
PhilomenaProxy.Scrapers.Civitai,
|
||||
PhilomenaProxy.Scrapers.Deviantart,
|
||||
PhilomenaProxy.Scrapers.Pillowfort,
|
||||
PhilomenaProxy.Scrapers.Twitter,
|
||||
|
|
54
lib/philomena_proxy/scrapers/civitai.ex
Normal file
54
lib/philomena_proxy/scrapers/civitai.ex
Normal file
|
@ -0,0 +1,54 @@
|
|||
defmodule PhilomenaProxy.Scrapers.Civitai do
|
||||
@moduledoc false
|
||||
|
||||
alias PhilomenaProxy.Scrapers.Scraper
|
||||
alias PhilomenaProxy.Scrapers
|
||||
|
||||
@behaviour Scraper
|
||||
|
||||
@url_regex ~r|\Ahttps?://(?:www\.)?civitai\.com/posts/([\d]+)/?|
|
||||
|
||||
@spec can_handle?(URI.t(), String.t()) :: boolean()
|
||||
def can_handle?(_uri, url) do
|
||||
String.match?(url, @url_regex)
|
||||
end
|
||||
|
||||
@spec scrape(URI.t(), Scrapers.url()) :: Scrapers.scrape_result()
|
||||
def scrape(_uri, url) do
|
||||
[post_id] = Regex.run(@url_regex, url, capture: :all_but_first)
|
||||
|
||||
api_url = "https://api.civitai.com/v1/images?postId=#{post_id}&nsfw=X"
|
||||
{:ok, %{status: 200, body: body}} = PhilomenaProxy.Http.get(api_url)
|
||||
|
||||
json = Jason.decode!(body)
|
||||
|
||||
case json["items"] do
|
||||
[] ->
|
||||
%{
|
||||
source_url: url,
|
||||
author_name: "",
|
||||
description: "",
|
||||
images: []
|
||||
}
|
||||
|
||||
items ->
|
||||
username = hd(items)["username"]
|
||||
|
||||
images =
|
||||
Enum.map(items, fn item ->
|
||||
image_url = item["url"]
|
||||
%{
|
||||
url: image_url,
|
||||
camo_url: PhilomenaProxy.Camo.image_url(image_url)
|
||||
}
|
||||
end)
|
||||
|
||||
%{
|
||||
source_url: url,
|
||||
author_name: username,
|
||||
description: "",
|
||||
images: images
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Add table
Reference in a new issue