mirror of
https://github.com/philomena-dev/philomena.git
synced 2025-01-19 22:27:59 +01:00
Feature/main/246 scrape pillowfort (#139)
This commit is contained in:
parent
faa9b0784a
commit
12ce4f82e1
2 changed files with 51 additions and 0 deletions
|
@ -1,6 +1,7 @@
|
|||
defmodule Philomena.Scrapers do
|
||||
@scrapers [
|
||||
Philomena.Scrapers.Deviantart,
|
||||
Philomena.Scrapers.Pillowfort,
|
||||
Philomena.Scrapers.Twitter,
|
||||
Philomena.Scrapers.Tumblr,
|
||||
Philomena.Scrapers.Raw
|
||||
|
|
50
lib/philomena/scrapers/pillowfort.ex
Executable file
50
lib/philomena/scrapers/pillowfort.ex
Executable file
|
@ -0,0 +1,50 @@
|
|||
defmodule Philomena.Scrapers.Pillowfort do
|
||||
@url_regex ~r|\Ahttps?://www\.pillowfort\.social/posts/([0-9]+)|
|
||||
|
||||
@spec can_handle?(URI.t(), String.t()) :: boolean()
|
||||
def can_handle?(_uri, url) do
|
||||
String.match?(url, @url_regex)
|
||||
end
|
||||
|
||||
def scrape(_uri, url) do
|
||||
[post_id] = Regex.run(@url_regex, url, capture: :all_but_first)
|
||||
|
||||
api_url = "https://www.pillowfort.social/posts/#{post_id}/json"
|
||||
|
||||
Philomena.Http.get(api_url)
|
||||
|> json!()
|
||||
|> process_response!(url)
|
||||
end
|
||||
|
||||
defp json!({:ok, %Tesla.Env{body: body, status: 200}}),
|
||||
do: Jason.decode!(body)
|
||||
|
||||
defp process_response!(post_json, url) do
|
||||
images =
|
||||
post_json["media"]
|
||||
|> Enum.map(
|
||||
&%{
|
||||
url: &1["url"],
|
||||
camo_url: Camo.Image.image_url(&1["small_image_url"])
|
||||
}
|
||||
)
|
||||
|
||||
%{
|
||||
source_url: url,
|
||||
author_name: post_json["username"],
|
||||
description: Enum.join(title(post_json) ++ content(post_json), "\n\n---\n\n"),
|
||||
images: images
|
||||
}
|
||||
end
|
||||
|
||||
defp title(%{"title" => title}) when title not in [nil, ""], do: [remove_html_tags(title)]
|
||||
defp title(_), do: []
|
||||
|
||||
defp content(%{"content" => content}) when content not in [nil, ""], do: [remove_html_tags(content)]
|
||||
defp content(_), do: []
|
||||
|
||||
defp remove_html_tags(text) do
|
||||
# The markup parser won't render these tags, so remove them
|
||||
String.replace(text, ~r|<.+?>|, "")
|
||||
end
|
||||
end
|
Loading…
Reference in a new issue