Skip to content

Commit

Permalink
Switched to Zendriver
Browse files Browse the repository at this point in the history
  • Loading branch information
Xewdy444 committed Feb 11, 2025
1 parent fb1d064 commit 23313c1
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 75 deletions.
9 changes: 3 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# CF-Clearance-Scraper

## nodriver Version
## Zendriver Version
A simple program for scraping Cloudflare clearance (cf_clearance) cookies from websites issuing Cloudflare challenges to visitors. This program works on all Cloudflare challenge types (JavaScript, managed, and interactive). If you would prefer using Playwright, you can check out the [Playwright version](https://github.com/Xewdy444/CF-Clearance-Scraper/tree/playwright).


Expand All @@ -9,7 +9,7 @@ In order to bypass Cloudflare challenges with the clearance cookies, you must ma

- The user agent used to fetch the clearance cookie must match the user agent being used within the requests that use the clearance cookie
> [!NOTE]
> The default user agent used by the scraper is `Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36`.
> The default user agent used by the scraper is `Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36`.
- The IP address used to fetch the clearance cookie must match the IP address being used to make the requests that use the clearance cookie

```mermaid
Expand All @@ -29,9 +29,6 @@ Then, install the Python dependencies:
$ pip install -r requirements.txt

## Usage
> [!NOTE]
> Headless mode is not supported on Windows.
> [!WARNING]
> Depending on the user agent used, it may affect your ability to solve the Cloudflare challenge.
Expand Down Expand Up @@ -67,5 +64,5 @@ options:
[14:24:27] [INFO] Going to https://sergiodemo.com/security/challenge/legacy-challenge...
[14:24:28] [INFO] Solving Cloudflare challenge [Interactive]...
[14:24:31] [INFO] Cookie: cf_clearance=SkyEdEGvKp1BBA2NpRW3Azsw5neMD6sEEqJd6jOCCfs-1736886257-1.2.1.1-cam47ywp3q_yKE1bw0lZ2YS83dnh_BsIHtS7earbsYE.AxQDBtZiifiHvp1nZGRhABaSdjU7XRQpUCVwUSrlJGH8DXr50YR18pNLxBvcEJFO2gPMxr.ZjKze8rWgM9H4rPeET67jzAo_ZRpNP6hGCvdyO62VVCtqDBQDKhKZz9yZQp7YEHK7tchQIteVgu.dUxYdan5_D.R0zewnS382BP0w1AoTf2p40.lQwbhgildEiKG14xACd13V4EEthkZV0dnliwcn35rT3h32ODf50MABQNSQ8WjhZhbLSNOPO_zEhrK9R0Yn4eBuRKvnL9_x9jKvaBPDPAgyiZv_VzFP_g
[14:24:31] [INFO] User agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36
[14:24:31] [INFO] User agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36
[14:24:31] [INFO] Writing Cloudflare clearance cookie information to cookies.json...
88 changes: 22 additions & 66 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,43 +2,24 @@

import argparse
import asyncio
import io
import json
import logging
import sys
from datetime import datetime, timedelta, timezone
from enum import Enum
from typing import Any, Dict, Final, Iterable, List, Optional, TypedDict
from typing import Any, Dict, Final, Iterable, List, Optional

import nest_asyncio
import nodriver
from nodriver.cdp.network import Cookie
from nodriver.core.element import Element
import zendriver
from selenium_authenticated_proxy import SeleniumAuthenticatedProxy

if sys.platform != "win32":
from xvfbwrapper import Xvfb
from zendriver.cdp.network import T_JSON_DICT, Cookie
from zendriver.core.element import Element

COMMAND: Final[str] = (
'{name}: {binary} --header "Cookie: {cookies}" --header "User-Agent: {user_agent}" {url}'
)


class JSONCookie(TypedDict):
"""A type for representing a JSON cookie."""

name: str
value: str
domain: str
path: str
expires: float
httpOnly: bool
secure: bool
sameSite: str


class NodriverOptions(list):
"""A class for managing nodriver options."""
class ZendriverOptions(list):
"""A class for managing zendriver options."""

def add_argument(self, arg: str) -> None:
"""
Expand Down Expand Up @@ -90,7 +71,7 @@ def __init__(
headless: bool,
proxy: Optional[str],
) -> None:
options = NodriverOptions()
options = ZendriverOptions()
options.add_argument(f"--user-agent={user_agent}")

if not http2:
Expand All @@ -99,34 +80,23 @@ def __init__(
if not http3:
options.add_argument("--disable-quic")

if headless and sys.platform == "win32":
raise Exception("Headless mode is not supported on Windows.")

self._virtual_display = Xvfb() if headless else None

if proxy is not None:
auth_proxy = SeleniumAuthenticatedProxy(proxy, use_legacy_extension=True)
auth_proxy.enrich_chrome_options(options)

config = nodriver.Config(browser_args=options, sandbox=False)
self.driver = nodriver.Browser(config)
config = zendriver.Config(headless=headless, browser_args=options)
self.driver = zendriver.Browser(config)
self._timeout = timeout

async def __aenter__(self) -> CloudflareSolver:
if self._virtual_display is not None:
self._virtual_display.start()

await self.driver.start()
return self

async def __aexit__(self, *_: Any) -> None:
self.driver.stop()

if self._virtual_display is not None:
self._virtual_display.stop()
await self.driver.stop()

@staticmethod
def _format_cookies(cookies: Iterable[Cookie]) -> List[JSONCookie]:
def _format_cookies(cookies: Iterable[Cookie]) -> List[T_JSON_DICT]:
"""
Format cookies into a list of JSON cookies.
Expand All @@ -137,38 +107,26 @@ def _format_cookies(cookies: Iterable[Cookie]) -> List[JSONCookie]:
Returns
-------
List[JSONCookie]
List[T_JSON_DICT]
List of JSON cookies.
"""
return [
JSONCookie(
name=cookie["name"],
value=cookie["value"],
domain=cookie["domain"],
path=cookie["path"],
expires=cookie["expires"],
httpOnly=cookie["httpOnly"],
secure=cookie["secure"],
sameSite=cookie["sameSite"],
)
for cookie in [cookie.to_json() for cookie in cookies]
]
return [cookie.to_json() for cookie in cookies]

@staticmethod
def extract_clearance_cookie(
cookies: Iterable[JSONCookie],
) -> Optional[JSONCookie]:
cookies: Iterable[T_JSON_DICT],
) -> Optional[T_JSON_DICT]:
"""
Extract the Cloudflare clearance cookie from a list of cookies.
Parameters
----------
cookies : Iterable[JSONCookie]
cookies : Iterable[T_JSON_DICT]
List of cookies.
Returns
-------
Optional[JSONCookie]
Optional[T_JSON_DICT]
The Cloudflare clearance cookie. Returns None if the cookie is not found.
"""

Expand All @@ -178,13 +136,13 @@ def extract_clearance_cookie(

return None

async def get_cookies(self) -> List[JSONCookie]:
async def get_cookies(self) -> List[T_JSON_DICT]:
"""
Get all cookies from the current page.
Returns
-------
List[JSONCookie]
List[T_JSON_DICT]
List of cookies.
"""
return self._format_cookies(await self.driver.cookies.get_all())
Expand All @@ -198,7 +156,7 @@ async def detect_challenge(self) -> Optional[ChallengePlatform]:
Optional[ChallengePlatform]
The Cloudflare challenge platform.
"""
html: str = await self.driver.main_tab.get_content()
html = await self.driver.main_tab.get_content()

for platform in ChallengePlatform:
if f"cType: '{platform.value}'" in html:
Expand Down Expand Up @@ -282,7 +240,7 @@ async def main() -> None:
parser.add_argument(
"-ua",
"--user-agent",
default="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
default="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
help="The user agent to use for the browser requests",
type=str,
)
Expand Down Expand Up @@ -334,16 +292,14 @@ async def main() -> None:
)

args = parser.parse_args()
sys.stdout = io.StringIO()
nest_asyncio.apply()

logging.basicConfig(
format="[%(asctime)s] [%(levelname)s] %(message)s",
datefmt="%H:%M:%S",
level=logging.INFO,
)

logging.getLogger("nodriver").setLevel(logging.WARNING)
logging.getLogger("zendriver").setLevel(logging.WARNING)
logging.info("Launching %s browser...", "headed" if args.headed else "headless")

challenge_messages = {
Expand Down
4 changes: 1 addition & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
nest_asyncio==1.6.0
nodriver==0.39
selenium_authenticated_proxy==1.1.2
xvfbwrapper==0.2.9; platform_system != "Windows"
zendriver==0.4.1

0 comments on commit 23313c1

Please sign in to comment.