Fix scraper error with invalid hostname

This commit is contained in:
Liam 2024-06-09 12:40:44 -04:00
parent 363e27f063
commit 1eed44aa95

View file

@ -56,16 +56,17 @@ defmodule PhilomenaProxy.Scrapers do
def scrape!(url) do
uri = URI.parse(url)
@scrapers
|> Enum.find(& &1.can_handle?(uri, url))
|> wrap()
|> Enum.map(& &1.scrape(uri, url))
|> unwrap()
cond do
is_nil(uri.host) ->
# Scraping without a hostname doesn't make sense because the proxy cannot fetch it, and
# some scrapers may test properties of the hostname.
nil
true ->
# Find the first scraper which can handle the URL and process, or return nil
Enum.find_value(@scrapers, nil, fn scraper ->
scraper.can_handle?(uri, url) && scraper.scrape(uri, url)
end)
end
end
defp wrap(nil), do: []
defp wrap(res), do: [res]
defp unwrap([result]), do: result
defp unwrap(_result), do: nil
end