From f358b824cbbf7e7a34cdc48e1bac658b42ce0f48 Mon Sep 17 00:00:00 2001 From: "byte[]" Date: Thu, 30 Dec 2021 16:03:31 -0500 Subject: [PATCH] Update twitter scraper (fixes derpibooru/philomena#261) --- lib/philomena/scrapers/twitter.ex | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/philomena/scrapers/twitter.ex b/lib/philomena/scrapers/twitter.ex index bc250654..77a51b84 100644 --- a/lib/philomena/scrapers/twitter.ex +++ b/lib/philomena/scrapers/twitter.ex @@ -1,8 +1,8 @@ defmodule Philomena.Scrapers.Twitter do - @gt_regex ~r|decodeURIComponent\("gt=(\d+);| @url_regex ~r|\Ahttps?://(?:mobile\.)?twitter.com/([A-Za-z\d_]+)/status/([\d]+)/?| @script_regex ~r|="(https://abs.twimg.com/responsive-web/client-web(?:-legacy)?/main\.[\da-z]+\.js)"| @bearer_regex ~r|"(AAAAAAAAAAAAA[^"]*)"| + @activate_url "https://api.twitter.com/1.1/guest/activate.json" @spec can_handle?(URI.t(), String.t()) :: true | false def can_handle?(_uri, url) do @@ -63,13 +63,14 @@ defmodule Philomena.Scrapers.Twitter do end defp extract_guest_token_and_bearer({:ok, %Tesla.Env{body: page}}) do - [gt] = Regex.run(@gt_regex, page, capture: :all_but_first) [script | _] = Regex.run(@script_regex, page, capture: :all_but_first) - {:ok, %{body: body}} = Philomena.Http.get(script) [bearer] = Regex.run(@bearer_regex, body, capture: :all_but_first) + {:ok, %{body: body}} = Philomena.Http.post(@activate_url, nil, [{"Authorization", "Bearer #{bearer}"}]) + gt = Map.fetch!(Jason.decode!(body), "guest_token") + {gt, bearer} end end