mirror of
https://github.com/philomena-dev/philomena.git
synced 2024-11-30 14:57:59 +01:00
fix twitter scraper
This commit is contained in:
parent
26c1297c16
commit
cc51981b05
2 changed files with 4 additions and 4 deletions
|
@ -33,7 +33,7 @@ defmodule Philomena.Http do
|
||||||
{Tesla.Middleware.Headers,
|
{Tesla.Middleware.Headers,
|
||||||
[
|
[
|
||||||
{"User-Agent",
|
{"User-Agent",
|
||||||
"Mozilla/5.0 (X11; Philomena; Linux x86_64; rv:70.0) Gecko/20100101 Firefox/76.0"}
|
"Mozilla/5.0 (X11; Philomena; Linux x86_64; rv:86.0) Gecko/20100101 Firefox/86.0"}
|
||||||
| headers
|
| headers
|
||||||
]}
|
]}
|
||||||
],
|
],
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
defmodule Philomena.Scrapers.Twitter do
|
defmodule Philomena.Scrapers.Twitter do
|
||||||
@gt_regex ~r|document.cookie = decodeURIComponent\("gt=(\d+);|
|
@gt_regex ~r|decodeURIComponent\("gt=(\d+);|
|
||||||
@url_regex ~r|\Ahttps?://(?:mobile\.)?twitter.com/([A-Za-z\d_]+)/status/([\d]+)/?|
|
@url_regex ~r|\Ahttps?://(?:mobile\.)?twitter.com/([A-Za-z\d_]+)/status/([\d]+)/?|
|
||||||
@script_regex ~r|<script type="text/javascript" .*? src="(https://abs.twimg.com/responsive-web/client-web/main\.[\da-z]+\.js)">|
|
@script_regex ~r|="(https://abs.twimg.com/responsive-web/client-web(?:-legacy)?/main\.[\da-z]+\.js)"|
|
||||||
@bearer_regex ~r|"(AAAAAAAAAAAAA[^"]*)"|
|
@bearer_regex ~r|"(AAAAAAAAAAAAA[^"]*)"|
|
||||||
|
|
||||||
@spec can_handle?(URI.t(), String.t()) :: true | false
|
@spec can_handle?(URI.t(), String.t()) :: true | false
|
||||||
|
@ -64,7 +64,7 @@ defmodule Philomena.Scrapers.Twitter do
|
||||||
|
|
||||||
defp extract_guest_token_and_bearer({:ok, %Tesla.Env{body: page}}) do
|
defp extract_guest_token_and_bearer({:ok, %Tesla.Env{body: page}}) do
|
||||||
[gt] = Regex.run(@gt_regex, page, capture: :all_but_first)
|
[gt] = Regex.run(@gt_regex, page, capture: :all_but_first)
|
||||||
[script] = Regex.run(@script_regex, page, capture: :all_but_first)
|
[script | _] = Regex.run(@script_regex, page, capture: :all_but_first)
|
||||||
|
|
||||||
{:ok, %{body: body}} = Philomena.Http.get(script)
|
{:ok, %{body: body}} = Philomena.Http.get(script)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue