mirror of
https://github.com/philomena-dev/philomena.git
synced 2025-02-20 04:14:23 +01:00
fix tumblr scrapes for text posts
This commit is contained in:
parent
6a3d292983
commit
2347d018ca
1 changed files with 5 additions and 5 deletions
|
@ -1,6 +1,6 @@
|
||||||
defmodule Philomena.Scrapers.Tumblr do
|
defmodule Philomena.Scrapers.Tumblr do
|
||||||
@url_regex ~r|\Ahttps?://(?:.*)/(?:image\|post)/(\d+)(?:\z\|[/?#])|
|
@url_regex ~r|\Ahttps?://(?:.*)/(?:image\|post)/(\d+)(?:\z\|[/?#])|
|
||||||
@inline_regex ~r|https?://(?:\d+\.)?media\.tumblr\.com\/[a-f\d]+\/tumblr(?:_inline)?_[a-z\d]+_\d+\.(?:png\|jpe?g\|gif)|i
|
@media_regex ~r|https?://(?:\d+\.)?media\.tumblr\.com/[a-f\d]+/[a-f\d]+-[a-f\d]+/s\d+x\d+/[a-f\d]+\.(?:png\|jpe?g\|gif)|i
|
||||||
@size_regex ~r|_(\d+)(\..+)\z|
|
@size_regex ~r|_(\d+)(\..+)\z|
|
||||||
@sizes [1280, 540, 500, 400, 250, 100, 75]
|
@sizes [1280, 540, 500, 400, 250, 100, 75]
|
||||||
@tumblr_ranges [
|
@tumblr_ranges [
|
||||||
|
@ -54,10 +54,10 @@ defmodule Philomena.Scrapers.Tumblr do
|
||||||
|
|
||||||
defp process_post!(%{"type" => "text"} = post) do
|
defp process_post!(%{"type" => "text"} = post) do
|
||||||
images =
|
images =
|
||||||
@inline_regex
|
@media_regex
|
||||||
|> Regex.scan(post["text"])
|
|> Regex.scan(post["body"])
|
||||||
|> Enum.map(fn url ->
|
|> Enum.map(fn [url | _captures] ->
|
||||||
%{url: upsize(url), camo_url: Camo.Image.image_url(url)}
|
%{url: url, camo_url: Camo.Image.image_url(url)}
|
||||||
end)
|
end)
|
||||||
|
|
||||||
add_meta(post, images)
|
add_meta(post, images)
|
||||||
|
|
Loading…
Reference in a new issue