mirror of
https://github.com/philomena-dev/philomena.git
synced 2024-11-23 20:18:00 +01:00
Fix scraper error with invalid hostname
This commit is contained in:
parent
363e27f063
commit
1eed44aa95
1 changed files with 12 additions and 11 deletions
|
@ -56,16 +56,17 @@ defmodule PhilomenaProxy.Scrapers do
|
||||||
def scrape!(url) do
|
def scrape!(url) do
|
||||||
uri = URI.parse(url)
|
uri = URI.parse(url)
|
||||||
|
|
||||||
@scrapers
|
cond do
|
||||||
|> Enum.find(& &1.can_handle?(uri, url))
|
is_nil(uri.host) ->
|
||||||
|> wrap()
|
# Scraping without a hostname doesn't make sense because the proxy cannot fetch it, and
|
||||||
|> Enum.map(& &1.scrape(uri, url))
|
# some scrapers may test properties of the hostname.
|
||||||
|> unwrap()
|
nil
|
||||||
|
|
||||||
|
true ->
|
||||||
|
# Find the first scraper which can handle the URL and process, or return nil
|
||||||
|
Enum.find_value(@scrapers, nil, fn scraper ->
|
||||||
|
scraper.can_handle?(uri, url) && scraper.scrape(uri, url)
|
||||||
|
end)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
defp wrap(nil), do: []
|
|
||||||
defp wrap(res), do: [res]
|
|
||||||
|
|
||||||
defp unwrap([result]), do: result
|
|
||||||
defp unwrap(_result), do: nil
|
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue