From 1a90f8cdc42f15c34df4bbce2adad90eae20eba9 Mon Sep 17 00:00:00 2001 From: tantabus-oneiros Date: Fri, 14 Mar 2025 21:26:25 +0100 Subject: [PATCH] Add CivitAI post scraper Added a scraper for CivitAI posts. Only supports post URLs (not single images) due to API limitations. API also does not provide the "description" visible on the site itself. Meta information like model, prompt, seed, etc. not used as we do not (yet) has a way to store or show this neatly. --- lib/philomena_proxy/scrapers.ex | 1 + lib/philomena_proxy/scrapers/civitai.ex | 54 +++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 lib/philomena_proxy/scrapers/civitai.ex diff --git a/lib/philomena_proxy/scrapers.ex b/lib/philomena_proxy/scrapers.ex index 08674d44..852d53d9 100644 --- a/lib/philomena_proxy/scrapers.ex +++ b/lib/philomena_proxy/scrapers.ex @@ -22,6 +22,7 @@ defmodule PhilomenaProxy.Scrapers do @scrapers [ PhilomenaProxy.Scrapers.Bluesky, + PhilomenaProxy.Scrapers.Civitai, PhilomenaProxy.Scrapers.Deviantart, PhilomenaProxy.Scrapers.Pillowfort, PhilomenaProxy.Scrapers.Twitter, diff --git a/lib/philomena_proxy/scrapers/civitai.ex b/lib/philomena_proxy/scrapers/civitai.ex new file mode 100644 index 00000000..ff045edc --- /dev/null +++ b/lib/philomena_proxy/scrapers/civitai.ex @@ -0,0 +1,54 @@ +defmodule PhilomenaProxy.Scrapers.Civitai do + @moduledoc false + + alias PhilomenaProxy.Scrapers.Scraper + alias PhilomenaProxy.Scrapers + + @behaviour Scraper + + @url_regex ~r|\Ahttps?://(?:www\.)?civitai\.com/posts/([\d]+)/?| + + @spec can_handle?(URI.t(), String.t()) :: boolean() + def can_handle?(_uri, url) do + String.match?(url, @url_regex) + end + + @spec scrape(URI.t(), Scrapers.url()) :: Scrapers.scrape_result() + def scrape(_uri, url) do + [post_id] = Regex.run(@url_regex, url, capture: :all_but_first) + + api_url = "https://api.civitai.com/v1/images?postId=#{post_id}&nsfw=X" + {:ok, %{status: 200, body: body}} = PhilomenaProxy.Http.get(api_url) + + json = Jason.decode!(body) + + case json["items"] do + [] -> + %{ + source_url: url, + author_name: "", + description: "", + images: [] + } + + items -> + username = hd(items)["username"] + + images = + Enum.map(items, fn item -> + image_url = item["url"] + %{ + url: image_url, + camo_url: PhilomenaProxy.Camo.image_url(image_url) + } + end) + + %{ + source_url: url, + author_name: username, + description: "", + images: images + } + end + end +end