mirror of
https://github.com/philomena-dev/philomena.git
synced 2024-11-23 20:18:00 +01:00
Fix scraper error with invalid hostname
This commit is contained in:
parent
363e27f063
commit
1eed44aa95
1 changed files with 12 additions and 11 deletions
|
@ -56,16 +56,17 @@ defmodule PhilomenaProxy.Scrapers do
|
|||
def scrape!(url) do
|
||||
uri = URI.parse(url)
|
||||
|
||||
@scrapers
|
||||
|> Enum.find(& &1.can_handle?(uri, url))
|
||||
|> wrap()
|
||||
|> Enum.map(& &1.scrape(uri, url))
|
||||
|> unwrap()
|
||||
cond do
|
||||
is_nil(uri.host) ->
|
||||
# Scraping without a hostname doesn't make sense because the proxy cannot fetch it, and
|
||||
# some scrapers may test properties of the hostname.
|
||||
nil
|
||||
|
||||
true ->
|
||||
# Find the first scraper which can handle the URL and process, or return nil
|
||||
Enum.find_value(@scrapers, nil, fn scraper ->
|
||||
scraper.can_handle?(uri, url) && scraper.scrape(uri, url)
|
||||
end)
|
||||
end
|
||||
end
|
||||
|
||||
defp wrap(nil), do: []
|
||||
defp wrap(res), do: [res]
|
||||
|
||||
defp unwrap([result]), do: result
|
||||
defp unwrap(_result), do: nil
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue