mirror of
https://github.com/philomena-dev/philomena.git
synced 2024-11-30 14:57:59 +01:00
Add Bluesky scraper (#290)
* add bluesky scraper * use com.atproto.sync.getBlob to get original bluesky image * ignore data after bluesky post id * fix json access and missing function * fix bluesky fullsize image regex
This commit is contained in:
parent
34c9f76330
commit
a08510f21e
2 changed files with 49 additions and 0 deletions
|
@ -21,6 +21,7 @@ defmodule PhilomenaProxy.Scrapers do
|
|||
}
|
||||
|
||||
@scrapers [
|
||||
PhilomenaProxy.Scrapers.Bluesky,
|
||||
PhilomenaProxy.Scrapers.Deviantart,
|
||||
PhilomenaProxy.Scrapers.Pillowfort,
|
||||
PhilomenaProxy.Scrapers.Twitter,
|
||||
|
|
48
lib/philomena_proxy/scrapers/bluesky.ex
Normal file
48
lib/philomena_proxy/scrapers/bluesky.ex
Normal file
|
@ -0,0 +1,48 @@
|
|||
defmodule PhilomenaProxy.Scrapers.Bluesky do
|
||||
@moduledoc false
|
||||
|
||||
alias PhilomenaProxy.Scrapers.Scraper
|
||||
alias PhilomenaProxy.Scrapers
|
||||
|
||||
@behaviour Scraper
|
||||
|
||||
@url_regex ~r|https://bsky\.app/profile/([^/]+)/post/([^/?#]+)|
|
||||
@fullsize_image_regex ~r|.*/img/feed_fullsize/plain/([^/]+)/([^@]+).*|
|
||||
@blob_image_url_pattern "https://bsky.social/xrpc/com.atproto.sync.getBlob/?did=\\1&cid=\\2"
|
||||
|
||||
@spec can_handle?(URI.t(), String.t()) :: boolean()
|
||||
def can_handle?(_uri, url) do
|
||||
String.match?(url, @url_regex)
|
||||
end
|
||||
|
||||
@spec scrape(URI.t(), Scrapers.url()) :: Scrapers.scrape_result()
|
||||
def scrape(_uri, url) do
|
||||
[handle, id] = Regex.run(@url_regex, url, capture: :all_but_first)
|
||||
|
||||
api_url_resolve_handle =
|
||||
"https://public.api.bsky.app/xrpc/com.atproto.identity.resolveHandle?handle=#{handle}"
|
||||
|
||||
did = PhilomenaProxy.Http.get(api_url_resolve_handle) |> json!() |> Map.fetch!(:did)
|
||||
|
||||
api_url_get_posts =
|
||||
"https://public.api.bsky.app/xrpc/app.bsky.feed.getPosts?uris=at://#{did}/app.bsky.feed.post/#{id}"
|
||||
|
||||
post_json = PhilomenaProxy.Http.get(api_url_get_posts) |> json!() |> Map.fetch!(:posts) |> hd
|
||||
|
||||
%{
|
||||
source_url: url,
|
||||
author_name: post_json["author"]["handle"],
|
||||
description: post_json["record"]["text"],
|
||||
images:
|
||||
post_json["embed"]["images"]
|
||||
|> Enum.map(
|
||||
&%{
|
||||
url: String.replace(&1["fullsize"], @fullsize_image_regex, @blob_image_url_pattern),
|
||||
camo_url: PhilomenaProxy.Camo.image_url(&1["thumb"])
|
||||
}
|
||||
)
|
||||
}
|
||||
end
|
||||
|
||||
defp json!({:ok, %{body: body, status: 200}}), do: Jason.decode!(body)
|
||||
end
|
Loading…
Reference in a new issue