mirror of
https://github.com/philomena-dev/philomena.git
synced 2024-11-23 20:18:00 +01:00
fix twitter scraper
This commit is contained in:
parent
e581bc2d4e
commit
4e70124f36
1 changed files with 3 additions and 6 deletions
|
@ -1,5 +1,5 @@
|
|||
defmodule Philomena.Scrapers.Twitter do
|
||||
@gt_regex ~r|gt=(\d+?);|
|
||||
@gt_regex ~r|document.cookie = decodeURIComponent\("gt=(\d+);|
|
||||
@url_regex ~r|\Ahttps?://(?:mobile\.)?twitter.com/([A-Za-z\d_]+)/status/([\d]+)/?|
|
||||
@script_regex ~r|<script type="text/javascript" .*? src="(https://abs.twimg.com/responsive-web/web/main\.[\da-z]+\.js)">|
|
||||
@bearer_regex ~r|"(AAAAAAAAAAAAA[^"]*)"|
|
||||
|
@ -59,11 +59,8 @@ defmodule Philomena.Scrapers.Twitter do
|
|||
|> Map.put("url", url)
|
||||
end
|
||||
|
||||
defp extract_guest_token_and_bearer(%Tesla.Env{body: page, headers: headers}) do
|
||||
[{_, gt}] =
|
||||
Enum.filter(headers, fn {k, v} -> k == "set-cookie" and String.starts_with?(v, "gt=") end)
|
||||
|
||||
[gt] = Regex.run(@gt_regex, gt, capture: :all_but_first)
|
||||
defp extract_guest_token_and_bearer(%Tesla.Env{body: page}) do
|
||||
[gt] = Regex.run(@gt_regex, page, capture: :all_but_first)
|
||||
[script] = Regex.run(@script_regex, page, capture: :all_but_first)
|
||||
|
||||
%{body: body} = Philomena.Http.get!(script)
|
||||
|
|
Loading…
Reference in a new issue