mirror of
https://github.com/philomena-dev/philomena.git
synced 2024-11-27 13:47:58 +01:00
fix tumblr scrapes for text posts
This commit is contained in:
parent
6a3d292983
commit
2347d018ca
1 changed files with 5 additions and 5 deletions
|
@ -1,6 +1,6 @@
|
|||
defmodule Philomena.Scrapers.Tumblr do
|
||||
@url_regex ~r|\Ahttps?://(?:.*)/(?:image\|post)/(\d+)(?:\z\|[/?#])|
|
||||
@inline_regex ~r|https?://(?:\d+\.)?media\.tumblr\.com\/[a-f\d]+\/tumblr(?:_inline)?_[a-z\d]+_\d+\.(?:png\|jpe?g\|gif)|i
|
||||
@media_regex ~r|https?://(?:\d+\.)?media\.tumblr\.com/[a-f\d]+/[a-f\d]+-[a-f\d]+/s\d+x\d+/[a-f\d]+\.(?:png\|jpe?g\|gif)|i
|
||||
@size_regex ~r|_(\d+)(\..+)\z|
|
||||
@sizes [1280, 540, 500, 400, 250, 100, 75]
|
||||
@tumblr_ranges [
|
||||
|
@ -54,10 +54,10 @@ defmodule Philomena.Scrapers.Tumblr do
|
|||
|
||||
defp process_post!(%{"type" => "text"} = post) do
|
||||
images =
|
||||
@inline_regex
|
||||
|> Regex.scan(post["text"])
|
||||
|> Enum.map(fn url ->
|
||||
%{url: upsize(url), camo_url: Camo.Image.image_url(url)}
|
||||
@media_regex
|
||||
|> Regex.scan(post["body"])
|
||||
|> Enum.map(fn [url | _captures] ->
|
||||
%{url: url, camo_url: Camo.Image.image_url(url)}
|
||||
end)
|
||||
|
||||
add_meta(post, images)
|
||||
|
|
Loading…
Reference in a new issue