From 543c75245a74c49d7e9b4205bdff0d04c7654df8 Mon Sep 17 00:00:00 2001 From: vladkens Date: Fri, 22 May 2026 02:02:48 +0300 Subject: [PATCH 1/9] =?UTF-8?q?feat!:=20pluggable=20HTTP=20backend=20?= =?UTF-8?q?=E2=80=93=20httpx=20or=20curl-cffi=20(#269)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../20260521-http-backend-migration.md | 292 ++++++++++++++++++ pyproject.toml | 8 +- readme.md | 11 +- scripts/update_gql_ops.py | 10 +- tests/test_http.py | 224 ++++++++++++++ twscrape/account.py | 32 +- twscrape/accounts_pool.py | 4 +- twscrape/api.py | 3 +- twscrape/cli.py | 5 +- twscrape/http.py | 215 +++++++++++++ twscrape/login.py | 8 +- twscrape/models.py | 23 +- twscrape/queue_client.py | 18 +- twscrape/xclid.py | 15 +- uv.lock | 173 ++++++++++- 15 files changed, 967 insertions(+), 74 deletions(-) create mode 100644 docs/plans/completed/20260521-http-backend-migration.md create mode 100644 tests/test_http.py create mode 100644 twscrape/http.py diff --git a/docs/plans/completed/20260521-http-backend-migration.md b/docs/plans/completed/20260521-http-backend-migration.md new file mode 100644 index 00000000..6f741a87 --- /dev/null +++ b/docs/plans/completed/20260521-http-backend-migration.md @@ -0,0 +1,292 @@ +# HTTP Backend Abstraction + +## Overview + +Add a thin HTTP client abstraction layer (`twscrape/http.py`) so twscrape can use either +`httpx` or `curl_cffi` as the HTTP backend. `curl_cffi` enables browser-level TLS fingerprint +spoofing (JA3/SNI), which bypasses Cloudflare bot detection that httpx fails against. + +Both libraries become **optional** extras. Auto-detection picks `curl_cffi` if installed, +falls back to `httpx`. User can override with `TWS_HTTP_BACKEND=curl|httpx` env var. +If env var is set but the specified backend is not installed → `ImportError` immediately. + +Existing users who have `httpx` installed see zero behavior change. + +**Behavior change (intentional):** `WriteTimeout` and `PoolTimeout` previously fell through +to the "unknown error / 15 min timeout" path. After this change they map to `NetworkError` +and trigger silent retry with the same account — which is the correct behavior. + +Reference: [issue #269](https://github.com/vladkens/twscrape/issues/269), +example impl: `delta-farmer/lib/http.py`. + +## Context + +- **Language/framework**: Python 3.10+, async throughout (no sync variants) +- **Relevant files**: + - `twscrape/http.py` — new file (core of this task) + - `twscrape/account.py` — `make_client()` creates `httpx.AsyncClient` + - `twscrape/accounts_pool.py` — `from httpx import HTTPStatusError`; accesses `e.response.status_code` / `e.response.text` + - `twscrape/queue_client.py` — wraps client, catches 5 httpx-specific exceptions + - `twscrape/login.py` — uses `AsyncClient` + `Response`; mutates `client.headers` and reads `client.cookies` after construction + - `twscrape/xclid.py` — creates its own `httpx.AsyncClient` for public page fetching + - `twscrape/models.py`, `twscrape/api.py`, `twscrape/cli.py` — `httpx.Response` type annotations + - `pyproject.toml` — `httpx>=0.26.0` in required `dependencies` +- **Test mocking**: `tests/test_queue_client.py` uses `pytest-httpx`/`HTTPXMock` — still works + since `HttpxClient` wraps `httpx.AsyncClient` internally so `pytest-httpx` intercepts there + +## Development Approach + +- **Testing approach**: Regular (code first, then tests alongside) +- Complete each task fully before moving to the next +- Run `uv run pytest` after each task — all tests must pass before proceeding + +## Solution Overview + +Single new file `twscrape/http.py` contains everything: + +``` +HttpError +├── HttpStatusError(response: Response) ← raise_for_status() failures; carries .response +├── NetworkError ← timeout, read/write/pool/proxy (retry silently) +└── ConnectError ← connection failures (retry 3x then raise) + +Response ← thin wrapper; delegates to httpx.Response or curl_cffi.Response + overrides raise_for_status() → HttpStatusError(response=self) + no __slots__ (allows arbitrary setattr for __username) + +HttpClient ← plain base class; async context manager +├── HttpxClient ← wraps httpx.AsyncClient; maps httpx errors → common types +└── CurlClient ← wraps curl_cffi.AsyncSession(impersonate="chrome") + +_detect_backend() ← reads TWS_HTTP_BACKEND; strict fail if set but not installed +make_client() ← factory; calls _detect_backend() if backend=None +``` + +**HttpClient full API:** +```python +async def request(method, url, *, params=None, headers=None, json=None, data=None) -> Response +async def get(url, *, params=None, headers=None) -> Response +async def post(url, *, params=None, headers=None, json=None, data=None) -> Response +async def aclose() -> None +async def __aenter__(self) -> HttpClient +async def __aexit__(self, *args) -> None # calls aclose() +@property cookies # mutable dict-like: __contains__, __getitem__, __setitem__, .get(), .update(), .items() +@property headers # mutable dict-like: __setitem__, .update() +``` + +**Error mapping:** + +| httpx | → | +|---|---| +| `ReadTimeout, WriteTimeout, PoolTimeout, ProxyError` | `NetworkError` | +| `ConnectError, ConnectTimeout` | `ConnectError` | +| `raise_for_status()` → `HTTPStatusError` | `HttpStatusError` | + +| curl_cffi | → | +|---|---| +| `errors.ConnectionError, errors.ConnectTimeout` | `ConnectError` | +| `errors.RequestsError` (base; covers Timeout, ProxyError, etc.) | `NetworkError` | +| `raise_for_status()` | `HttpStatusError` | + +## Implementation Steps + +--- + +### Task 1: Create `twscrape/http.py` + +**Files:** +- Create: `twscrape/http.py` +- Create: `tests/test_http.py` + +- [x] define exception hierarchy: + - `HttpError(Exception)` + - `HttpStatusError(HttpError)` with field `response: Response` (set in `__init__`) + - `NetworkError(HttpError)` + - `ConnectError(HttpError)` +- [x] define `Response` wrapper class (no `__slots__`): + - properties: `status_code`, `text`, `headers`, `content`, `url`, `request` + - method `json() -> Any` — delegates + - method `raise_for_status()` — catches any exception from `self._rep.raise_for_status()`, raises `HttpStatusError(response=self)` +- [x] define `HttpClient` base class: + - `async request(method, url, *, params, headers, json, data) -> Response` + - `async get(url, **kwargs) -> Response` + - `async post(url, **kwargs) -> Response` + - `async aclose() -> None` + - `async __aenter__ / __aexit__` + - `@property cookies` — mutable dict-like + - `@property headers` — mutable dict-like +- [x] define `HttpxClient(HttpClient)`: + - import aliased: `from httpx import ConnectError as _HttpxConnectError` etc. to avoid name clash with project's `ConnectError` + - wraps `httpx.AsyncClient(proxy=..., follow_redirects=True, transport=AsyncHTTPTransport(retries=3))` + - `_wrap(coro)`: maps `ReadTimeout, WriteTimeout, PoolTimeout, ProxyError → NetworkError`; `ConnectError, ConnectTimeout → ConnectError` + - `request()` returns `Response(raw_rep)` +- [x] define `CurlClient(HttpClient)`: + - wraps `curl_cffi.requests.AsyncSession(impersonate="chrome", proxy=..., allow_redirects=True)` + - `_wrap(coro)`: maps `errors.ConnectionError, errors.ConnectTimeout → ConnectError`; `errors.RequestsError → NetworkError` + - `request()` returns `Response(raw_rep)` + - `aclose()` calls `await self._session.close()` +- [x] define `_detect_backend() -> str`: + - if `TWS_HTTP_BACKEND` set: attempt to import the named backend; raise `ImportError` with install hint if missing + - else: try `import curl_cffi` → `"curl"`; try `import httpx` → `"httpx"`; else raise `ImportError` +- [x] define `make_client(backend=None, *, proxy=None, headers=None, cookies=None) -> HttpClient` +- [x] write `tests/test_http.py`: + - `_detect_backend()` respects `TWS_HTTP_BACKEND` + - `TWS_HTTP_BACKEND` set but backend not installed → `ImportError` + - `Response.raise_for_status()` raises `HttpStatusError`; `err.response.status_code` and `err.response.text` are accessible + - `Response` delegates `.status_code`, `.json()`, `.text`, `.headers` + - `setattr(rep, "__username", "x"); assert getattr(rep, "__username") == "x"` (no `__slots__`) + - `HttpxClient` is an async context manager + - `HttpxClient.cookies` and `.headers` support `__setitem__` and `__contains__` +- [x] run `uv run pytest tests/test_http.py` — must pass + +--- + +### Task 2: Update `account.py` + +**Files:** +- Modify: `twscrape/account.py` + +- [x] remove `from httpx import AsyncClient, AsyncHTTPTransport` +- [x] add `from .http import HttpClient, make_client as _make_http_client` +- [x] rewrite `make_client()` return type `HttpClient`; assemble all headers upfront: + ```python + headers = {**self.headers} + headers["user-agent"] = self.user_agent + headers["content-type"] = "application/json" + headers["authorization"] = TOKEN + headers["x-twitter-active-user"] = "yes" + headers["x-twitter-client-language"] = "en" + if "ct0" in self.cookies: + headers["x-csrf-token"] = self.cookies["ct0"] + return _make_http_client(proxy=proxy, headers=headers, cookies=self.cookies) + ``` +- [x] run `uv run pytest` — must pass + +--- + +### Task 3: Update `queue_client.py` + +**Files:** +- Modify: `twscrape/queue_client.py` + +- [x] remove `import httpx` and `from httpx import AsyncClient, Response` +- [x] add `from .http import HttpClient, NetworkError, ConnectError, HttpStatusError, Response` +- [x] `Ctx.__init__`: `clt: AsyncClient` → `clt: HttpClient` +- [x] `Ctx.req()` / `QueueClient.get()` / `QueueClient.req()` return types: `Response | None` +- [x] in `_check_rep`: `except httpx.HTTPStatusError` → `except HttpStatusError` +- [x] in `QueueClient.req()`: + ```python + except NetworkError: continue + except ConnectError as e: + connection_retry += 1 + if connection_retry >= 3: raise e + ``` +- [x] run `uv run pytest` — must pass + +--- + +### Task 4: Update `accounts_pool.py` + +**Files:** +- Modify: `twscrape/accounts_pool.py` + +- [x] remove `from httpx import HTTPStatusError` +- [x] add `from .http import HttpStatusError` +- [x] `except HTTPStatusError as e:` — unchanged; `e.response.status_code` and `e.response.text` work via `HttpStatusError.response` +- [x] run `uv run pytest` — must pass + +--- + +### Task 5: Update `login.py` + +**Files:** +- Modify: `twscrape/login.py` + +- [x] remove `from httpx import AsyncClient, Response` +- [x] add `from .http import HttpClient, Response` +- [x] `TaskCtx.client: AsyncClient` → `TaskCtx.client: HttpClient` +- [x] all `Response` type annotations → `Response` +- [x] verify all `client.headers["..."] = ...` and `client.cookies.get(...)` calls still work (both backends expose mutable dict-like jars) +- [x] run `uv run pytest` — must pass + +--- + +### Task 6: Update `xclid.py` + +**Files:** +- Modify: `twscrape/xclid.py` + +- [x] remove `import httpx` +- [x] add `from .http import HttpClient, make_client as _make_http_client` +- [x] `_make_client() -> HttpClient`: `return _make_http_client(headers={"user-agent": UserAgent().chrome})` +- [x] update all function signatures: `clt: httpx.AsyncClient` → `clt: HttpClient` +- [x] run `uv run pytest` — must pass + +--- + +### Task 7: Update type annotations in `models.py`, `api.py`, `cli.py` + +**Files:** +- Modify: `twscrape/models.py` +- Modify: `twscrape/api.py` +- Modify: `twscrape/cli.py` + +- [x] `models.py`: remove `import httpx`; add `from .http import Response`; replace all `httpx.Response` with `Response` in parse_* signatures; update comment at line 811 removing httpx mention +- [x] `api.py`: remove `from httpx import Response`; add `from .http import Response` +- [x] `cli.py`: remove `import httpx`; add `from .http import Response`; update `to_str()` signature +- [x] fix `[tool.pyright]` in `pyproject.toml`: change `include` from non-existent dirs to `["twscrape"]` +- [ ] run `uv run pyright twscrape/` — no errors +- [x] run `uv run pytest` — must pass + +--- + +### Task 8: Update `pyproject.toml` + +**Files:** +- Modify: `pyproject.toml` + +- [x] remove `httpx>=0.26.0` from `dependencies` +- [x] add optional extras: + ```toml + [project.optional-dependencies] + httpx = ["httpx>=0.26.0"] + curl = ["curl-cffi>=0.7.0"] + ``` +- [x] keep `pytest-httpx` in dev dependencies (still intercepts at httpx level inside `HttpxClient`) +- [x] run `uv run pytest` — must pass (httpx still installed in dev env via pytest-httpx) + +--- + +### Task 9: Verify acceptance criteria + +- [x] `uv run pytest` — all tests pass (73/73) +- [ ] `uv run pyright twscrape/` — no errors +- [x] `grep -r "import httpx" twscrape/` — returns empty (no direct httpx usage outside http.py) +- [ ] verify `TWS_HTTP_BACKEND=httpx` override works +- [ ] verify clean import: `python -c "from twscrape.http import Response, HttpClient, NetworkError, ConnectError, HttpStatusError"` + +--- + +### Task 10: [Final] Update documentation + +- [ ] update `readme.md`: add section on optional dependencies and backend selection +- [ ] add entry to `changelog.md`: httpx is now optional; install `twscrape[httpx]` or `twscrape[curl]`; `curl_cffi` backend preferred when installed +- [ ] move this plan to `docs/plans/completed/` + +## Post-Completion + +**Manual verification:** +- Test login flow with a real account using `curl_cffi` backend +- Verify TLS fingerprint via `tls.browserleaks.com` — should show Chrome profile +- Test `TWS_HTTP_BACKEND=httpx` override still works after curl_cffi is installed + +**Isolation test** (to verify optional-deps work end to end): +```bash +uv venv /tmp/twscrape-curl-only +/tmp/twscrape-curl-only/bin/pip install -e ".[curl]" +/tmp/twscrape-curl-only/bin/python -c "import twscrape; print('ok')" +``` + +**Breaking change notice** (in release notes): +- `httpx` is no longer a required dependency — users must install `twscrape[httpx]` or `twscrape[curl]` +- Existing installs unaffected: httpx was installed before, auto-detect finds it diff --git a/pyproject.toml b/pyproject.toml index 80db1ce2..05330c11 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,14 +19,18 @@ classifiers = [ dependencies = [ "aiosqlite>=0.17.0", "fake-useragent>=1.4.0", - "httpx>=0.26.0", "loguru>=0.7.0", "pyotp>=2.9.0", "beautifulsoup4>=4.13.0", ] +[project.optional-dependencies] +httpx = ["httpx>=0.26.0"] +curl = ["curl-cffi>=0.7.0"] + [dependency-groups] dev = [ + "curl-cffi>=0.7.0", "pyright>=1.1.369", "pytest-asyncio>=0.23.3", "pytest-cov>=4.1.0", @@ -66,5 +70,5 @@ select = ["E", "F", "I", "UP", "C4", "SIM"] ignore = ["E501", "UP035", "SIM105"] [tool.pyright] -include = ["apps", "clients", "strategy", "lib", "scripts"] +include = ["twscrape"] typeCheckingMode = "standard" diff --git a/readme.md b/readme.md index be7141b4..9087387d 100644 --- a/readme.md +++ b/readme.md @@ -18,14 +18,15 @@ Twitter GraphQL API implementation with [SNScrape](https://github.com/JustAnothe ## Install +twscrape requires one HTTP backend to be installed (peer dependency): + ```bash -pip install twscrape -``` -Or development version: -```bash -pip install git+https://github.com/vladkens/twscrape.git +pip install twscrape[httpx] # httpx (legacy) +pip install twscrape[curl] # curl-cffi (recommended, better TLS fingerprinting) ``` +`curl-cffi` uses libcurl with browser-level TLS fingerprint spoofing and is recommended to avoid Cloudflare bot detection. When both are installed, `curl-cffi` is preferred. Override with `TWS_HTTP_BACKEND=httpx` or `TWS_HTTP_BACKEND=curl`. + ## Features - Support both Search & GraphQL Twitter API - Async/Await functions (can run multiple scrapers in parallel at the same time) diff --git a/scripts/update_gql_ops.py b/scripts/update_gql_ops.py index 94cca23e..e6cae5b4 100644 --- a/scripts/update_gql_ops.py +++ b/scripts/update_gql_ops.py @@ -11,9 +11,9 @@ import os import re import sys +from typing import Any -import httpx - +from twscrape.http import make_client from twscrape.xclid import get_scripts_list, get_tw_page_text, script_url API_FILE = "twscrape/api.py" @@ -28,7 +28,7 @@ def _is_relevant_script(url: str) -> bool: async def get_scripts() -> list[tuple[str, str]]: os.makedirs(CACHE_DIR, exist_ok=True) - async with httpx.AsyncClient(follow_redirects=True) as clt: + async with make_client() as clt: text = await get_tw_page_text("https://x.com/elonmusk", clt) urls = list(get_scripts_list(text)) @@ -50,7 +50,7 @@ async def fetch_scripts(scripts: list[tuple[str, str]], force: bool) -> None: print(f"Downloading {len(todo)} scripts.") sem = asyncio.Semaphore(10) - async def fetch(clt: httpx.AsyncClient, i: int, url: str, path: str) -> None: + async def fetch(clt: Any, i: int, url: str, path: str) -> None: async with sem: print(f" ({i:3d}/{len(todo):3d}) {url}") rep = await clt.get(url) @@ -61,7 +61,7 @@ async def fetch(clt: httpx.AsyncClient, i: int, url: str, path: str) -> None: with open(path, "w", encoding="utf-8") as fp: fp.write(rep.text) - async with httpx.AsyncClient(follow_redirects=True) as clt: + async with make_client() as clt: await asyncio.gather(*[fetch(clt, i, url, path) for i, (url, path) in enumerate(todo, 1)]) diff --git a/tests/test_http.py b/tests/test_http.py new file mode 100644 index 00000000..c097541d --- /dev/null +++ b/tests/test_http.py @@ -0,0 +1,224 @@ +from unittest.mock import MagicMock, patch + +import pytest + +from twscrape.http import ( + ConnectError, + HttpClient, + HttpError, + HttpStatusError, + NetworkError, + Response, + _detect_backend, + make_client, +) + + +def _mock_response(status_code=200, text="ok", json_data=None, headers=None): + rep = MagicMock() + rep.status_code = status_code + rep.text = text + rep.content = text.encode() + rep.headers = headers or {} + rep.url = "https://example.com" + rep.request = MagicMock() + rep.request.method = "GET" + rep.request.url = "https://example.com" + rep.json.return_value = json_data or {} + + if status_code >= 400: + rep.raise_for_status.side_effect = Exception(f"HTTP {status_code}") + else: + rep.raise_for_status.return_value = None + + return rep + + +# --- Response wrapper --- + + +def test_response_delegates_attributes(): + raw = _mock_response(200, "hello", {"key": "val"}) + rep = Response(raw) + assert rep.status_code == 200 + assert rep.text == "hello" + assert rep.json() == {"key": "val"} + assert rep.request.method == "GET" + + +def test_response_raise_for_status_ok(): + rep = Response(_mock_response(200)) + rep.raise_for_status() # should not raise + + +def test_response_raise_for_status_error(): + rep = Response(_mock_response(403)) + with pytest.raises(HttpStatusError) as exc_info: + rep.raise_for_status() + err = exc_info.value + assert err.response is rep + assert err.response.status_code == 403 + assert err.response.text == "ok" + + +def test_response_allows_setattr(): + rep = Response(_mock_response(200)) + setattr(rep, "__username", "alice") + assert getattr(rep, "__username") == "alice" + + +# --- Exception hierarchy --- + + +def test_exception_hierarchy(): + assert issubclass(HttpStatusError, HttpError) + assert issubclass(NetworkError, HttpError) + assert issubclass(ConnectError, HttpError) + + +def test_http_status_error_carries_response(): + raw = _mock_response(500, "server error") + resp = Response(raw) + err = HttpStatusError("fail", response=resp) + assert err.response is resp + assert err.response.status_code == 500 + assert err.response.text == "server error" + + +# --- _detect_backend --- + + +def test_detect_backend_env_curl_not_installed(monkeypatch): + monkeypatch.setenv("TWS_HTTP_BACKEND", "curl") + with ( + patch.dict("sys.modules", {"curl_cffi": None}), + pytest.raises(ImportError, match="curl-cffi is not installed"), + ): + _detect_backend() + + +def test_detect_backend_env_httpx(monkeypatch): + monkeypatch.setenv("TWS_HTTP_BACKEND", "httpx") + result = _detect_backend() + assert result == "httpx" + + +def test_detect_backend_env_missing_backend(monkeypatch): + monkeypatch.setenv("TWS_HTTP_BACKEND", "httpx") + with ( + patch.dict("sys.modules", {"httpx": None}), + pytest.raises(ImportError, match="not installed"), + ): + _detect_backend() + + +def test_detect_backend_invalid_value(monkeypatch): + monkeypatch.setenv("TWS_HTTP_BACKEND", "requests") + with pytest.raises(ValueError, match="Invalid"): + _detect_backend() + + +def test_detect_backend_auto_httpx(monkeypatch): + monkeypatch.delenv("TWS_HTTP_BACKEND", raising=False) + with patch.dict("sys.modules", {"curl_cffi": None}): + result = _detect_backend() + assert result == "httpx" + + +def test_detect_backend_no_backends(monkeypatch): + monkeypatch.delenv("TWS_HTTP_BACKEND", raising=False) + with ( + patch.dict("sys.modules", {"curl_cffi": None, "httpx": None}), + pytest.raises(ImportError, match="No HTTP backend"), + ): + _detect_backend() + + +# --- HttpClient base --- + + +def test_http_client_is_async_context_manager(): + class MinimalClient(HttpClient): + closed = False + + async def request(self, method, url, **kwargs): + return Response(_mock_response()) + + async def aclose(self): + self.closed = True + + @property + def cookies(self): + return {} + + @property + def headers(self): + return {} + + import asyncio + + async def run(): + client = MinimalClient() + async with client as c: + assert c is client + assert client.closed + + asyncio.get_event_loop().run_until_complete(run()) + + +# --- HttpxClient --- + + +def test_make_client_httpx_returns_httpx_client(monkeypatch): + monkeypatch.delenv("TWS_HTTP_BACKEND", raising=False) + from twscrape.http import HttpxClient + + client = make_client("httpx") + assert isinstance(client, HttpxClient) + + +async def test_httpx_client_cookies_and_headers_are_mutable(): + from twscrape.http import HttpxClient + + client = HttpxClient(headers={"x-foo": "bar"}, cookies={"ct0": "token"}) + # headers support __setitem__ + client.headers["x-new"] = "value" + # cookies support __contains__ + assert "ct0" in client.cookies + await client.aclose() + + +async def test_httpx_client_maps_network_errors(httpx_mock): + import httpx + + from twscrape.http import HttpxClient + + httpx_mock.add_exception(httpx.ReadTimeout("timeout")) + client = HttpxClient() + with pytest.raises(NetworkError): + await client.get("https://example.com") + await client.aclose() + + +async def test_httpx_client_maps_connect_errors(httpx_mock): + import httpx + + from twscrape.http import HttpxClient + + httpx_mock.add_exception(httpx.ConnectError("refused")) + client = HttpxClient() + with pytest.raises(ConnectError): + await client.get("https://example.com") + await client.aclose() + + +async def test_httpx_client_returns_response_wrapper(httpx_mock): + from twscrape.http import HttpxClient + + httpx_mock.add_response(url="https://example.com", json={"ok": True}, status_code=200) + client = HttpxClient() + rep = await client.get("https://example.com") + assert isinstance(rep, Response) + assert rep.status_code == 200 + assert rep.json() == {"ok": True} + await client.aclose() diff --git a/twscrape/account.py b/twscrape/account.py index 895819e0..28da25de 100644 --- a/twscrape/account.py +++ b/twscrape/account.py @@ -4,8 +4,8 @@ from dataclasses import asdict, dataclass, field from datetime import datetime -from httpx import AsyncClient, AsyncHTTPTransport - +from .http import HttpClient +from .http import make_client as _make_http_client from .models import JSONTrait from .utils import parse_proxy, utc @@ -50,26 +50,18 @@ def to_rs(self): rs["last_used"] = rs["last_used"].isoformat() if rs["last_used"] else None return rs - def make_client(self, proxy: str | None = None) -> AsyncClient: + def make_client(self, proxy: str | None = None) -> HttpClient: proxies = [proxy, os.getenv("TWS_PROXY"), self.proxy] proxies = [x for x in proxies if x is not None] proxy = parse_proxy(proxies[0]) if proxies else None - transport = AsyncHTTPTransport(retries=3) - client = AsyncClient(proxy=proxy, follow_redirects=True, transport=transport) - - # saved from previous usage - client.cookies.update(self.cookies) - client.headers.update(self.headers) - - # default settings - client.headers["user-agent"] = self.user_agent - client.headers["content-type"] = "application/json" - client.headers["authorization"] = TOKEN - client.headers["x-twitter-active-user"] = "yes" - client.headers["x-twitter-client-language"] = "en" - - if "ct0" in client.cookies: - client.headers["x-csrf-token"] = client.cookies["ct0"] + headers = {**self.headers} + headers["user-agent"] = self.user_agent + headers["content-type"] = "application/json" + headers["authorization"] = TOKEN + headers["x-twitter-active-user"] = "yes" + headers["x-twitter-client-language"] = "en" + if "ct0" in self.cookies: + headers["x-csrf-token"] = self.cookies["ct0"] - return client + return _make_http_client(proxy=proxy, headers=headers, cookies=self.cookies) diff --git a/twscrape/accounts_pool.py b/twscrape/accounts_pool.py index b07ade87..5a19832e 100644 --- a/twscrape/accounts_pool.py +++ b/twscrape/accounts_pool.py @@ -5,10 +5,10 @@ from typing import TypedDict from fake_useragent import UserAgent -from httpx import HTTPStatusError from .account import Account from .db import execute, fetchall, fetchone +from .http import HttpStatusError from .logger import logger from .login import LoginConfig, login from .utils import get_env_bool, parse_cookies, utc @@ -167,7 +167,7 @@ async def login(self, account: Account): await login(account, cfg=self._login_config) logger.info(f"Logged in to {account.username} successfully") return True - except HTTPStatusError as e: + except HttpStatusError as e: rep = e.response logger.error(f"Failed to login '{account.username}': {rep.status_code} - {rep.text}") return False diff --git a/twscrape/api.py b/twscrape/api.py index 9f1397e7..34120895 100644 --- a/twscrape/api.py +++ b/twscrape/api.py @@ -1,9 +1,8 @@ from contextlib import aclosing from typing import Literal -from httpx import Response - from .accounts_pool import AccountsPool +from .http import Response from .logger import logger, set_log_level from .models import ( AccountAbout, diff --git a/twscrape/cli.py b/twscrape/cli.py index beec772b..54bab79b 100644 --- a/twscrape/cli.py +++ b/twscrape/cli.py @@ -8,10 +8,9 @@ import sqlite3 from importlib.metadata import version -import httpx - from .api import API, AccountsPool from .db import get_sqlite_version +from .http import Response from .logger import logger, set_log_level from .login import LoginConfig from .models import Tweet, User @@ -36,7 +35,7 @@ def get_fn_arg(args): exit(1) -def to_str(doc: httpx.Response | Tweet | User | None) -> str: +def to_str(doc: Response | Tweet | User | None) -> str: if doc is None: return "Not Found. See --raw for more details." diff --git a/twscrape/http.py b/twscrape/http.py new file mode 100644 index 00000000..19449bc6 --- /dev/null +++ b/twscrape/http.py @@ -0,0 +1,215 @@ +import importlib.util +import os +from typing import Any, Literal + +HttpMethod = Literal["GET", "POST", "PUT", "DELETE", "OPTIONS", "HEAD", "TRACE", "PATCH", "QUERY"] + + +class Response: + """Thin wrapper around httpx.Response or curl_cffi.Response.""" + + def __init__(self, rep: Any): + self._rep = rep + + @property + def status_code(self) -> int: + return self._rep.status_code + + @property + def text(self) -> str: + return self._rep.text + + @property + def content(self) -> bytes: + return self._rep.content + + @property + def headers(self) -> Any: + return self._rep.headers + + @property + def url(self) -> Any: + return self._rep.url + + @property + def request(self) -> Any: + return self._rep.request + + def json(self) -> Any: + return self._rep.json() + + def raise_for_status(self) -> None: + try: + self._rep.raise_for_status() + except Exception as e: + raise HttpStatusError(str(e), response=self) from e + + +class HttpError(Exception): ... + + +class NetworkError(HttpError): ... + + +class ConnectError(HttpError): ... + + +class HttpStatusError(HttpError): + def __init__(self, message: str, *, response: Response): + super().__init__(message) + self.response = response + + +class HttpClient: + async def request(self, method: HttpMethod, url: str, **kwargs) -> Response: ... + + async def get(self, url: str, **kwargs) -> Response: + return await self.request("GET", url, **kwargs) + + async def post(self, url: str, **kwargs) -> Response: + return await self.request("POST", url, **kwargs) + + async def aclose(self) -> None: ... + + async def __aenter__(self) -> "HttpClient": + return self + + async def __aexit__(self, *args: Any) -> None: + await self.aclose() + + @property + def cookies(self) -> Any: ... + + @property + def headers(self) -> Any: ... + + +class HttpxClient(HttpClient): + def __init__( + self, *, proxy: str | None = None, headers: dict | None = None, cookies: dict | None = None + ): + import httpx + from httpx import AsyncHTTPTransport + + self._httpx = httpx + transport = AsyncHTTPTransport(retries=3) + self._client = httpx.AsyncClient( + proxy=proxy, + follow_redirects=True, + transport=transport, + headers=headers or {}, + cookies=cookies or {}, + ) + + @property + def cookies(self) -> Any: + return self._client.cookies + + @property + def headers(self) -> Any: + return self._client.headers + + async def request(self, method: HttpMethod, url: str, **kwargs) -> Response: + return await self._wrap(self._client.request(method, url, **kwargs)) + + async def aclose(self) -> None: + await self._client.aclose() + + async def _wrap(self, coro: Any) -> Response: + hx = self._httpx + try: + return Response(await coro) + except (hx.ConnectError, hx.ConnectTimeout) as e: + raise ConnectError(str(e)) from e + except (hx.ReadTimeout, hx.WriteTimeout, hx.PoolTimeout, hx.ProxyError) as e: + raise NetworkError(str(e)) from e + + +class CurlClient(HttpClient): + def __init__( + self, *, proxy: str | None = None, headers: dict | None = None, cookies: dict | None = None + ): + from curl_cffi.requests import AsyncSession + + self._session = AsyncSession( + impersonate="chrome", proxy=proxy, allow_redirects=True, headers=headers or {} + ) + if cookies: + self._session.cookies.update(cookies) + + @property + def cookies(self) -> Any: + return self._session.cookies + + @property + def headers(self) -> Any: + return self._session.headers + + async def request(self, method: HttpMethod, url: str, **kwargs) -> Response: + return await self._wrap(self._session.request(method, url, **kwargs)) + + async def aclose(self) -> None: + await self._session.close() + + async def _wrap(self, coro: Any) -> Response: + try: + return Response(await coro) + except Exception as e: + from curl_cffi.requests import errors as _curl_errors + + if isinstance(e, _curl_errors.RequestsError): + # libcurl error codes: 5=COULDNT_RESOLVE_PROXY, 6=COULDNT_RESOLVE_HOST, 7=COULDNT_CONNECT + if getattr(e, "code", -1) in {5, 6, 7}: + raise ConnectError(str(e)) from e + raise NetworkError(str(e)) from e + raise + + +def _detect_backend() -> str: + forced = os.getenv("TWS_HTTP_BACKEND", "").lower().strip() + + if forced == "curl": + if importlib.util.find_spec("curl_cffi") is None: + raise ImportError( + "TWS_HTTP_BACKEND=curl but curl-cffi is not installed. " + "Run: pip install twscrape[curl]" + ) + return "curl" + + if forced == "httpx": + if importlib.util.find_spec("httpx") is None: + raise ImportError( + "TWS_HTTP_BACKEND=httpx but httpx is not installed. " + "Run: pip install twscrape[httpx]" + ) + return "httpx" + + if forced: + raise ValueError(f"Invalid TWS_HTTP_BACKEND={forced!r}. Expected 'curl' or 'httpx'.") + + if importlib.util.find_spec("curl_cffi") is not None: + return "curl" + if importlib.util.find_spec("httpx") is not None: + return "httpx" + + raise ImportError( + "No HTTP backend installed. Run: pip install twscrape[httpx] or pip install twscrape[curl]" + ) + + +def make_client( + backend: str | None = None, + *, + proxy: str | None = None, + headers: dict | None = None, + cookies: dict | None = None, +) -> HttpClient: + if backend is None: + backend = _detect_backend() + + if backend == "curl": + return CurlClient(proxy=proxy, headers=headers, cookies=cookies) + if backend == "httpx": + return HttpxClient(proxy=proxy, headers=headers, cookies=cookies) + + raise ValueError(f"Unknown backend: {backend!r}. Expected 'curl' or 'httpx'.") diff --git a/twscrape/login.py b/twscrape/login.py index 870dd293..4735082a 100644 --- a/twscrape/login.py +++ b/twscrape/login.py @@ -4,9 +4,9 @@ from typing import Any import pyotp -from httpx import AsyncClient, Response from .account import Account +from .http import HttpClient, Response from .imap import imap_get_email_code, imap_login from .logger import logger from .utils import utc @@ -22,20 +22,20 @@ class LoginConfig: @dataclass class TaskCtx: - client: AsyncClient + client: HttpClient acc: Account cfg: LoginConfig prev: Any imap: None | imaplib.IMAP4_SSL -async def get_guest_token(client: AsyncClient): +async def get_guest_token(client: HttpClient): rep = await client.post("https://api.x.com/1.1/guest/activate.json") rep.raise_for_status() return rep.json()["guest_token"] -async def login_initiate(client: AsyncClient) -> Response: +async def login_initiate(client: HttpClient) -> Response: payload = { "input_flow_data": { "flow_context": {"debug_overrides": {}, "start_location": {"location": "unknown"}} diff --git a/twscrape/models.py b/twscrape/models.py index 23318e30..142ab8ba 100644 --- a/twscrape/models.py +++ b/twscrape/models.py @@ -10,8 +10,7 @@ from datetime import datetime, timezone from typing import Generator, Optional, Union -import httpx - +from .http import Response from .logger import logger from .utils import find_item, get_or, int_or, to_old_rep, utc @@ -798,7 +797,7 @@ def _write_dump(kind: str, e: Exception, x: dict, obj: dict): logger.error(f"Failed to parse response of {kind}, writing dump to {dumpfile}") -def _parse_items(rep: httpx.Response, kind: str, limit: int = -1): +def _parse_items(rep: Response, kind: str, limit: int = -1): if kind == "user": Cls, key = User, "users" elif kind == "tweet": @@ -808,7 +807,7 @@ def _parse_items(rep: httpx.Response, kind: str, limit: int = -1): else: raise ValueError(f"Invalid kind: {kind}") - # check for dict, because httpx.Response can be mocked in tests with different type + # check for dict, because Response can be mocked in tests with different type res = rep if isinstance(rep, dict) else rep.json() obj = to_old_rep(res) @@ -833,7 +832,7 @@ def _parse_items(rep: httpx.Response, kind: str, limit: int = -1): # public helpers -def parse_tweet(rep: httpx.Response, twid: int) -> Tweet | None: +def parse_tweet(rep: Response, twid: int) -> Tweet | None: try: docs = list(parse_tweets(rep)) for x in docs: @@ -845,7 +844,7 @@ def parse_tweet(rep: httpx.Response, twid: int) -> Tweet | None: return None -def parse_user(rep: httpx.Response) -> User | None: +def parse_user(rep: Response) -> User | None: try: docs = list(parse_users(rep)) if len(docs) == 1: @@ -856,7 +855,7 @@ def parse_user(rep: httpx.Response) -> User | None: return None -def parse_trend(rep: httpx.Response) -> Trend | None: +def parse_trend(rep: Response) -> Trend | None: try: docs = list(parse_trends(rep)) if len(docs) == 1: @@ -867,7 +866,7 @@ def parse_trend(rep: httpx.Response) -> Trend | None: return None -def parse_about(rep: httpx.Response | dict) -> AccountAbout | None: +def parse_about(rep: Response | dict) -> AccountAbout | None: try: res = rep if isinstance(rep, dict) else rep.json() obj = get_or(res, "data.user_result_by_screen_name.result") @@ -879,7 +878,7 @@ def parse_about(rep: httpx.Response | dict) -> AccountAbout | None: return None -def parse_community(rep: httpx.Response | dict) -> Community | None: +def parse_community(rep: Response | dict) -> Community | None: try: res = rep if isinstance(rep, dict) else rep.json() community = get_or(res, "data.communityResults.result") @@ -891,13 +890,13 @@ def parse_community(rep: httpx.Response | dict) -> Community | None: return None -def parse_tweets(rep: httpx.Response, limit: int = -1) -> Generator[Tweet, None, None]: +def parse_tweets(rep: Response, limit: int = -1) -> Generator[Tweet, None, None]: return _parse_items(rep, "tweet", limit) # type: ignore -def parse_users(rep: httpx.Response, limit: int = -1) -> Generator[User, None, None]: +def parse_users(rep: Response, limit: int = -1) -> Generator[User, None, None]: return _parse_items(rep, "user", limit) # type: ignore -def parse_trends(rep: httpx.Response, limit: int = -1) -> Generator[Trend, None, None]: +def parse_trends(rep: Response, limit: int = -1) -> Generator[Trend, None, None]: return _parse_items(rep, kind="trends", limit=limit) # type: ignore diff --git a/twscrape/queue_client.py b/twscrape/queue_client.py index dc71fb76..65b99006 100644 --- a/twscrape/queue_client.py +++ b/twscrape/queue_client.py @@ -4,10 +4,8 @@ from typing import Any from urllib.parse import urlparse -import httpx -from httpx import AsyncClient, Response - from .accounts_pool import Account, AccountsPool +from .http import ConnectError, HttpClient, HttpMethod, HttpStatusError, NetworkError, Response from .logger import logger from .utils import utc from .xclid import XClIdGen @@ -49,7 +47,7 @@ async def get(cls, username: str, fresh=False) -> XClIdGen: class Ctx: - def __init__(self, acc: Account, clt: AsyncClient): + def __init__(self, acc: Account, clt: HttpClient): self.req_count = 0 self.acc = acc self.clt = clt @@ -57,7 +55,7 @@ def __init__(self, acc: Account, clt: AsyncClient): async def aclose(self): await self.clt.aclose() - async def req(self, method: str, url: str, params: ReqParams = None) -> Response: + async def req(self, method: HttpMethod, url: str, params: ReqParams = None) -> Response: # if code 404 on first try then generate new x-client-transaction-id and retry # https://github.com/vladkens/twscrape/issues/248 path = urlparse(url).path or "/" @@ -249,7 +247,7 @@ async def _check_rep(self, rep: Response) -> None: try: rep.raise_for_status() - except httpx.HTTPStatusError: + except HttpStatusError: logger.error(f"Unhandled API response code: {log_msg}") await self._close_ctx(utc.ts() + 60 * 15) # 15 minutes raise HandledError() @@ -257,7 +255,7 @@ async def _check_rep(self, rep: Response) -> None: async def get(self, url: str, params: ReqParams = None) -> Response | None: return await self.req("GET", url, params=params) - async def req(self, method: str, url: str, params: ReqParams = None) -> Response | None: + async def req(self, method: HttpMethod, url: str, params: ReqParams = None) -> Response | None: unknown_retry, connection_retry = 0, 0 while True: @@ -279,11 +277,11 @@ async def req(self, method: str, url: str, params: ReqParams = None) -> Response except HandledError: # retry with new account continue - except (httpx.ReadTimeout, httpx.ProxyError): + except NetworkError: # http transport failed, just retry with same account continue - except (httpx.ConnectError, httpx.ConnectTimeout) as e: - # if proxy missconfigured or ??? + except ConnectError as e: + # if proxy misconfigured or host unreachable connection_retry += 1 if connection_retry >= 3: raise e diff --git a/twscrape/xclid.py b/twscrape/xclid.py index 3e92713d..9c523755 100644 --- a/twscrape/xclid.py +++ b/twscrape/xclid.py @@ -7,16 +7,17 @@ from typing import Iterator import bs4 -import httpx from fake_useragent import UserAgent +from .http import HttpClient +from .http import make_client as _make_http_client -def _make_client() -> httpx.AsyncClient: - headers = {"user-agent": UserAgent().chrome} - return httpx.AsyncClient(headers=headers, follow_redirects=True) +def _make_client() -> HttpClient: + return _make_http_client(headers={"user-agent": UserAgent().chrome}) -async def get_tw_page_text(url: str, clt: httpx.AsyncClient): + +async def get_tw_page_text(url: str, clt: HttpClient): rep = await clt.get(url) rep.raise_for_status() @@ -216,7 +217,7 @@ def parse_vk_bytes(soup: bs4.BeautifulSoup) -> list[int]: return list(base64.b64decode(bytes(el, "utf-8"))) -async def parse_anim_idx(text: str, clt: httpx.AsyncClient) -> list[int]: +async def parse_anim_idx(text: str, clt: HttpClient) -> list[int]: scripts = list(get_scripts_list(text)) scripts = [x for x in scripts if "/ondemand.s." in x] if not scripts: @@ -244,7 +245,7 @@ def parse_anim_arr(soup: bs4.BeautifulSoup, vk_bytes: list[int]) -> list[list[fl return arr -async def load_keys(soup: bs4.BeautifulSoup, clt: httpx.AsyncClient) -> tuple[list[int], str]: +async def load_keys(soup: bs4.BeautifulSoup, clt: HttpClient) -> tuple[list[int], str]: anim_idx = await parse_anim_idx(str(soup), clt) vk_bytes = parse_vk_bytes(soup) anim_arr = parse_anim_arr(soup, vk_bytes) diff --git a/uv.lock b/uv.lock index 0d682f5b..5eb99140 100644 --- a/uv.lock +++ b/uv.lock @@ -60,6 +60,88 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/22/30/7cd8fdcdfbc5b869528b079bfb76dcdf6056b1a2097a662e5e8c04f42965/certifi-2026.4.22-py3-none-any.whl", hash = "sha256:3cb2210c8f88ba2318d29b0388d1023c8492ff72ecdde4ebdaddbb13a31b1c4a", size = 135707, upload-time = "2026-04-22T11:26:09.372Z" }, ] +[[package]] +name = "cffi" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pycparser", marker = "implementation_name != 'PyPy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/93/d7/516d984057745a6cd96575eea814fe1edd6646ee6efd552fb7b0921dec83/cffi-2.0.0-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:0cf2d91ecc3fcc0625c2c530fe004f82c110405f101548512cce44322fa8ac44", size = 184283, upload-time = "2025-09-08T23:22:08.01Z" }, + { url = "https://files.pythonhosted.org/packages/9e/84/ad6a0b408daa859246f57c03efd28e5dd1b33c21737c2db84cae8c237aa5/cffi-2.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f73b96c41e3b2adedc34a7356e64c8eb96e03a3782b535e043a986276ce12a49", size = 180504, upload-time = "2025-09-08T23:22:10.637Z" }, + { url = "https://files.pythonhosted.org/packages/50/bd/b1a6362b80628111e6653c961f987faa55262b4002fcec42308cad1db680/cffi-2.0.0-cp310-cp310-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:53f77cbe57044e88bbd5ed26ac1d0514d2acf0591dd6bb02a3ae37f76811b80c", size = 208811, upload-time = "2025-09-08T23:22:12.267Z" }, + { url = "https://files.pythonhosted.org/packages/4f/27/6933a8b2562d7bd1fb595074cf99cc81fc3789f6a6c05cdabb46284a3188/cffi-2.0.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3e837e369566884707ddaf85fc1744b47575005c0a229de3327f8f9a20f4efeb", size = 216402, upload-time = "2025-09-08T23:22:13.455Z" }, + { url = "https://files.pythonhosted.org/packages/05/eb/b86f2a2645b62adcfff53b0dd97e8dfafb5c8aa864bd0d9a2c2049a0d551/cffi-2.0.0-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:5eda85d6d1879e692d546a078b44251cdd08dd1cfb98dfb77b670c97cee49ea0", size = 203217, upload-time = "2025-09-08T23:22:14.596Z" }, + { url = "https://files.pythonhosted.org/packages/9f/e0/6cbe77a53acf5acc7c08cc186c9928864bd7c005f9efd0d126884858a5fe/cffi-2.0.0-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:9332088d75dc3241c702d852d4671613136d90fa6881da7d770a483fd05248b4", size = 203079, upload-time = "2025-09-08T23:22:15.769Z" }, + { url = "https://files.pythonhosted.org/packages/98/29/9b366e70e243eb3d14a5cb488dfd3a0b6b2f1fb001a203f653b93ccfac88/cffi-2.0.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc7de24befaeae77ba923797c7c87834c73648a05a4bde34b3b7e5588973a453", size = 216475, upload-time = "2025-09-08T23:22:17.427Z" }, + { url = "https://files.pythonhosted.org/packages/21/7a/13b24e70d2f90a322f2900c5d8e1f14fa7e2a6b3332b7309ba7b2ba51a5a/cffi-2.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:cf364028c016c03078a23b503f02058f1814320a56ad535686f90565636a9495", size = 218829, upload-time = "2025-09-08T23:22:19.069Z" }, + { url = "https://files.pythonhosted.org/packages/60/99/c9dc110974c59cc981b1f5b66e1d8af8af764e00f0293266824d9c4254bc/cffi-2.0.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e11e82b744887154b182fd3e7e8512418446501191994dbf9c9fc1f32cc8efd5", size = 211211, upload-time = "2025-09-08T23:22:20.588Z" }, + { url = "https://files.pythonhosted.org/packages/49/72/ff2d12dbf21aca1b32a40ed792ee6b40f6dc3a9cf1644bd7ef6e95e0ac5e/cffi-2.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8ea985900c5c95ce9db1745f7933eeef5d314f0565b27625d9a10ec9881e1bfb", size = 218036, upload-time = "2025-09-08T23:22:22.143Z" }, + { url = "https://files.pythonhosted.org/packages/e2/cc/027d7fb82e58c48ea717149b03bcadcbdc293553edb283af792bd4bcbb3f/cffi-2.0.0-cp310-cp310-win32.whl", hash = "sha256:1f72fb8906754ac8a2cc3f9f5aaa298070652a0ffae577e0ea9bd480dc3c931a", size = 172184, upload-time = "2025-09-08T23:22:23.328Z" }, + { url = "https://files.pythonhosted.org/packages/33/fa/072dd15ae27fbb4e06b437eb6e944e75b068deb09e2a2826039e49ee2045/cffi-2.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:b18a3ed7d5b3bd8d9ef7a8cb226502c6bf8308df1525e1cc676c3680e7176739", size = 182790, upload-time = "2025-09-08T23:22:24.752Z" }, + { url = "https://files.pythonhosted.org/packages/12/4a/3dfd5f7850cbf0d06dc84ba9aa00db766b52ca38d8b86e3a38314d52498c/cffi-2.0.0-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:b4c854ef3adc177950a8dfc81a86f5115d2abd545751a304c5bcf2c2c7283cfe", size = 184344, upload-time = "2025-09-08T23:22:26.456Z" }, + { url = "https://files.pythonhosted.org/packages/4f/8b/f0e4c441227ba756aafbe78f117485b25bb26b1c059d01f137fa6d14896b/cffi-2.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2de9a304e27f7596cd03d16f1b7c72219bd944e99cc52b84d0145aefb07cbd3c", size = 180560, upload-time = "2025-09-08T23:22:28.197Z" }, + { url = "https://files.pythonhosted.org/packages/b1/b7/1200d354378ef52ec227395d95c2576330fd22a869f7a70e88e1447eb234/cffi-2.0.0-cp311-cp311-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:baf5215e0ab74c16e2dd324e8ec067ef59e41125d3eade2b863d294fd5035c92", size = 209613, upload-time = "2025-09-08T23:22:29.475Z" }, + { url = "https://files.pythonhosted.org/packages/b8/56/6033f5e86e8cc9bb629f0077ba71679508bdf54a9a5e112a3c0b91870332/cffi-2.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:730cacb21e1bdff3ce90babf007d0a0917cc3e6492f336c2f0134101e0944f93", size = 216476, upload-time = "2025-09-08T23:22:31.063Z" }, + { url = "https://files.pythonhosted.org/packages/dc/7f/55fecd70f7ece178db2f26128ec41430d8720f2d12ca97bf8f0a628207d5/cffi-2.0.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:6824f87845e3396029f3820c206e459ccc91760e8fa24422f8b0c3d1731cbec5", size = 203374, upload-time = "2025-09-08T23:22:32.507Z" }, + { url = "https://files.pythonhosted.org/packages/84/ef/a7b77c8bdc0f77adc3b46888f1ad54be8f3b7821697a7b89126e829e676a/cffi-2.0.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:9de40a7b0323d889cf8d23d1ef214f565ab154443c42737dfe52ff82cf857664", size = 202597, upload-time = "2025-09-08T23:22:34.132Z" }, + { url = "https://files.pythonhosted.org/packages/d7/91/500d892b2bf36529a75b77958edfcd5ad8e2ce4064ce2ecfeab2125d72d1/cffi-2.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8941aaadaf67246224cee8c3803777eed332a19d909b47e29c9842ef1e79ac26", size = 215574, upload-time = "2025-09-08T23:22:35.443Z" }, + { url = "https://files.pythonhosted.org/packages/44/64/58f6255b62b101093d5df22dcb752596066c7e89dd725e0afaed242a61be/cffi-2.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a05d0c237b3349096d3981b727493e22147f934b20f6f125a3eba8f994bec4a9", size = 218971, upload-time = "2025-09-08T23:22:36.805Z" }, + { url = "https://files.pythonhosted.org/packages/ab/49/fa72cebe2fd8a55fbe14956f9970fe8eb1ac59e5df042f603ef7c8ba0adc/cffi-2.0.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:94698a9c5f91f9d138526b48fe26a199609544591f859c870d477351dc7b2414", size = 211972, upload-time = "2025-09-08T23:22:38.436Z" }, + { url = "https://files.pythonhosted.org/packages/0b/28/dd0967a76aab36731b6ebfe64dec4e981aff7e0608f60c2d46b46982607d/cffi-2.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:5fed36fccc0612a53f1d4d9a816b50a36702c28a2aa880cb8a122b3466638743", size = 217078, upload-time = "2025-09-08T23:22:39.776Z" }, + { url = "https://files.pythonhosted.org/packages/2b/c0/015b25184413d7ab0a410775fdb4a50fca20f5589b5dab1dbbfa3baad8ce/cffi-2.0.0-cp311-cp311-win32.whl", hash = "sha256:c649e3a33450ec82378822b3dad03cc228b8f5963c0c12fc3b1e0ab940f768a5", size = 172076, upload-time = "2025-09-08T23:22:40.95Z" }, + { url = "https://files.pythonhosted.org/packages/ae/8f/dc5531155e7070361eb1b7e4c1a9d896d0cb21c49f807a6c03fd63fc877e/cffi-2.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:66f011380d0e49ed280c789fbd08ff0d40968ee7b665575489afa95c98196ab5", size = 182820, upload-time = "2025-09-08T23:22:42.463Z" }, + { url = "https://files.pythonhosted.org/packages/95/5c/1b493356429f9aecfd56bc171285a4c4ac8697f76e9bbbbb105e537853a1/cffi-2.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:c6638687455baf640e37344fe26d37c404db8b80d037c3d29f58fe8d1c3b194d", size = 177635, upload-time = "2025-09-08T23:22:43.623Z" }, + { url = "https://files.pythonhosted.org/packages/ea/47/4f61023ea636104d4f16ab488e268b93008c3d0bb76893b1b31db1f96802/cffi-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d", size = 185271, upload-time = "2025-09-08T23:22:44.795Z" }, + { url = "https://files.pythonhosted.org/packages/df/a2/781b623f57358e360d62cdd7a8c681f074a71d445418a776eef0aadb4ab4/cffi-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c", size = 181048, upload-time = "2025-09-08T23:22:45.938Z" }, + { url = "https://files.pythonhosted.org/packages/ff/df/a4f0fbd47331ceeba3d37c2e51e9dfc9722498becbeec2bd8bc856c9538a/cffi-2.0.0-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe", size = 212529, upload-time = "2025-09-08T23:22:47.349Z" }, + { url = "https://files.pythonhosted.org/packages/d5/72/12b5f8d3865bf0f87cf1404d8c374e7487dcf097a1c91c436e72e6badd83/cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062", size = 220097, upload-time = "2025-09-08T23:22:48.677Z" }, + { url = "https://files.pythonhosted.org/packages/c2/95/7a135d52a50dfa7c882ab0ac17e8dc11cec9d55d2c18dda414c051c5e69e/cffi-2.0.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e", size = 207983, upload-time = "2025-09-08T23:22:50.06Z" }, + { url = "https://files.pythonhosted.org/packages/3a/c8/15cb9ada8895957ea171c62dc78ff3e99159ee7adb13c0123c001a2546c1/cffi-2.0.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037", size = 206519, upload-time = "2025-09-08T23:22:51.364Z" }, + { url = "https://files.pythonhosted.org/packages/78/2d/7fa73dfa841b5ac06c7b8855cfc18622132e365f5b81d02230333ff26e9e/cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba", size = 219572, upload-time = "2025-09-08T23:22:52.902Z" }, + { url = "https://files.pythonhosted.org/packages/07/e0/267e57e387b4ca276b90f0434ff88b2c2241ad72b16d31836adddfd6031b/cffi-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94", size = 222963, upload-time = "2025-09-08T23:22:54.518Z" }, + { url = "https://files.pythonhosted.org/packages/b6/75/1f2747525e06f53efbd878f4d03bac5b859cbc11c633d0fb81432d98a795/cffi-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187", size = 221361, upload-time = "2025-09-08T23:22:55.867Z" }, + { url = "https://files.pythonhosted.org/packages/7b/2b/2b6435f76bfeb6bbf055596976da087377ede68df465419d192acf00c437/cffi-2.0.0-cp312-cp312-win32.whl", hash = "sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18", size = 172932, upload-time = "2025-09-08T23:22:57.188Z" }, + { url = "https://files.pythonhosted.org/packages/f8/ed/13bd4418627013bec4ed6e54283b1959cf6db888048c7cf4b4c3b5b36002/cffi-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5", size = 183557, upload-time = "2025-09-08T23:22:58.351Z" }, + { url = "https://files.pythonhosted.org/packages/95/31/9f7f93ad2f8eff1dbc1c3656d7ca5bfd8fb52c9d786b4dcf19b2d02217fa/cffi-2.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6", size = 177762, upload-time = "2025-09-08T23:22:59.668Z" }, + { url = "https://files.pythonhosted.org/packages/4b/8d/a0a47a0c9e413a658623d014e91e74a50cdd2c423f7ccfd44086ef767f90/cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb", size = 185230, upload-time = "2025-09-08T23:23:00.879Z" }, + { url = "https://files.pythonhosted.org/packages/4a/d2/a6c0296814556c68ee32009d9c2ad4f85f2707cdecfd7727951ec228005d/cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca", size = 181043, upload-time = "2025-09-08T23:23:02.231Z" }, + { url = "https://files.pythonhosted.org/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446, upload-time = "2025-09-08T23:23:03.472Z" }, + { url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101, upload-time = "2025-09-08T23:23:04.792Z" }, + { url = "https://files.pythonhosted.org/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948, upload-time = "2025-09-08T23:23:06.127Z" }, + { url = "https://files.pythonhosted.org/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422, upload-time = "2025-09-08T23:23:07.753Z" }, + { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499, upload-time = "2025-09-08T23:23:09.648Z" }, + { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928, upload-time = "2025-09-08T23:23:10.928Z" }, + { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302, upload-time = "2025-09-08T23:23:12.42Z" }, + { url = "https://files.pythonhosted.org/packages/eb/6d/bf9bda840d5f1dfdbf0feca87fbdb64a918a69bca42cfa0ba7b137c48cb8/cffi-2.0.0-cp313-cp313-win32.whl", hash = "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27", size = 172909, upload-time = "2025-09-08T23:23:14.32Z" }, + { url = "https://files.pythonhosted.org/packages/37/18/6519e1ee6f5a1e579e04b9ddb6f1676c17368a7aba48299c3759bbc3c8b3/cffi-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75", size = 183402, upload-time = "2025-09-08T23:23:15.535Z" }, + { url = "https://files.pythonhosted.org/packages/cb/0e/02ceeec9a7d6ee63bb596121c2c8e9b3a9e150936f4fbef6ca1943e6137c/cffi-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91", size = 177780, upload-time = "2025-09-08T23:23:16.761Z" }, + { url = "https://files.pythonhosted.org/packages/92/c4/3ce07396253a83250ee98564f8d7e9789fab8e58858f35d07a9a2c78de9f/cffi-2.0.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5", size = 185320, upload-time = "2025-09-08T23:23:18.087Z" }, + { url = "https://files.pythonhosted.org/packages/59/dd/27e9fa567a23931c838c6b02d0764611c62290062a6d4e8ff7863daf9730/cffi-2.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13", size = 181487, upload-time = "2025-09-08T23:23:19.622Z" }, + { url = "https://files.pythonhosted.org/packages/d6/43/0e822876f87ea8a4ef95442c3d766a06a51fc5298823f884ef87aaad168c/cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b", size = 220049, upload-time = "2025-09-08T23:23:20.853Z" }, + { url = "https://files.pythonhosted.org/packages/b4/89/76799151d9c2d2d1ead63c2429da9ea9d7aac304603de0c6e8764e6e8e70/cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c", size = 207793, upload-time = "2025-09-08T23:23:22.08Z" }, + { url = "https://files.pythonhosted.org/packages/bb/dd/3465b14bb9e24ee24cb88c9e3730f6de63111fffe513492bf8c808a3547e/cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef", size = 206300, upload-time = "2025-09-08T23:23:23.314Z" }, + { url = "https://files.pythonhosted.org/packages/47/d9/d83e293854571c877a92da46fdec39158f8d7e68da75bf73581225d28e90/cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775", size = 219244, upload-time = "2025-09-08T23:23:24.541Z" }, + { url = "https://files.pythonhosted.org/packages/2b/0f/1f177e3683aead2bb00f7679a16451d302c436b5cbf2505f0ea8146ef59e/cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205", size = 222828, upload-time = "2025-09-08T23:23:26.143Z" }, + { url = "https://files.pythonhosted.org/packages/c6/0f/cafacebd4b040e3119dcb32fed8bdef8dfe94da653155f9d0b9dc660166e/cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1", size = 220926, upload-time = "2025-09-08T23:23:27.873Z" }, + { url = "https://files.pythonhosted.org/packages/3e/aa/df335faa45b395396fcbc03de2dfcab242cd61a9900e914fe682a59170b1/cffi-2.0.0-cp314-cp314-win32.whl", hash = "sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f", size = 175328, upload-time = "2025-09-08T23:23:44.61Z" }, + { url = "https://files.pythonhosted.org/packages/bb/92/882c2d30831744296ce713f0feb4c1cd30f346ef747b530b5318715cc367/cffi-2.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25", size = 185650, upload-time = "2025-09-08T23:23:45.848Z" }, + { url = "https://files.pythonhosted.org/packages/9f/2c/98ece204b9d35a7366b5b2c6539c350313ca13932143e79dc133ba757104/cffi-2.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad", size = 180687, upload-time = "2025-09-08T23:23:47.105Z" }, + { url = "https://files.pythonhosted.org/packages/3e/61/c768e4d548bfa607abcda77423448df8c471f25dbe64fb2ef6d555eae006/cffi-2.0.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9", size = 188773, upload-time = "2025-09-08T23:23:29.347Z" }, + { url = "https://files.pythonhosted.org/packages/2c/ea/5f76bce7cf6fcd0ab1a1058b5af899bfbef198bea4d5686da88471ea0336/cffi-2.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d", size = 185013, upload-time = "2025-09-08T23:23:30.63Z" }, + { url = "https://files.pythonhosted.org/packages/be/b4/c56878d0d1755cf9caa54ba71e5d049479c52f9e4afc230f06822162ab2f/cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c", size = 221593, upload-time = "2025-09-08T23:23:31.91Z" }, + { url = "https://files.pythonhosted.org/packages/e0/0d/eb704606dfe8033e7128df5e90fee946bbcb64a04fcdaa97321309004000/cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8", size = 209354, upload-time = "2025-09-08T23:23:33.214Z" }, + { url = "https://files.pythonhosted.org/packages/d8/19/3c435d727b368ca475fb8742ab97c9cb13a0de600ce86f62eab7fa3eea60/cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc", size = 208480, upload-time = "2025-09-08T23:23:34.495Z" }, + { url = "https://files.pythonhosted.org/packages/d0/44/681604464ed9541673e486521497406fadcc15b5217c3e326b061696899a/cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592", size = 221584, upload-time = "2025-09-08T23:23:36.096Z" }, + { url = "https://files.pythonhosted.org/packages/25/8e/342a504ff018a2825d395d44d63a767dd8ebc927ebda557fecdaca3ac33a/cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512", size = 224443, upload-time = "2025-09-08T23:23:37.328Z" }, + { url = "https://files.pythonhosted.org/packages/e1/5e/b666bacbbc60fbf415ba9988324a132c9a7a0448a9a8f125074671c0f2c3/cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", size = 223437, upload-time = "2025-09-08T23:23:38.945Z" }, + { url = "https://files.pythonhosted.org/packages/a0/1d/ec1a60bd1a10daa292d3cd6bb0b359a81607154fb8165f3ec95fe003b85c/cffi-2.0.0-cp314-cp314t-win32.whl", hash = "sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e", size = 180487, upload-time = "2025-09-08T23:23:40.423Z" }, + { url = "https://files.pythonhosted.org/packages/bf/41/4c1168c74fac325c0c8156f04b6749c8b6a8f405bbf91413ba088359f60d/cffi-2.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6", size = 191726, upload-time = "2025-09-08T23:23:41.742Z" }, + { url = "https://files.pythonhosted.org/packages/ae/3a/dbeec9d1ee0844c679f6bb5d6ad4e9f198b1224f4e7a32825f47f6192b0c/cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9", size = 184195, upload-time = "2025-09-08T23:23:43.004Z" }, +] + [[package]] name = "colorama" version = "0.4.6" @@ -187,6 +269,39 @@ toml = [ { name = "tomli", marker = "python_full_version <= '3.11'" }, ] +[[package]] +name = "curl-cffi" +version = "0.15.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "cffi" }, + { name = "rich" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/48/5b/89fcfebd3e5e85134147ac99e9f2b2271165fd4d71984fc65da5f17819b7/curl_cffi-0.15.0.tar.gz", hash = "sha256:ea0c67652bf6893d34ee0f82c944f37e488f6147e9421bef1771cc6545b02ded", size = 196437, upload-time = "2026-04-03T11:12:31.525Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5e/42/54ddd442c795f30ce5dd4e49f87ce77505958d3777cd96a91567a3975d2a/curl_cffi-0.15.0-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:bda66404010e9ed743b1b83c20c86f24fe21a9a6873e17479d6e67e29d8ded28", size = 2795267, upload-time = "2026-04-03T11:11:46.48Z" }, + { url = "https://files.pythonhosted.org/packages/83/2d/3915e238579b3c5a92cead5c79130c3b8d20caaba7616cc4d894650e1d6b/curl_cffi-0.15.0-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:a25620d9bf989c9c029a7d1642999c4c265abb0bad811deb2f77b0b5b2b12e5b", size = 2573544, upload-time = "2026-04-03T11:11:47.951Z" }, + { url = "https://files.pythonhosted.org/packages/2a/b3/9d2f1057749a1b07ba1989db3c1503ce8bed998310bae9aea2c43aa64f20/curl_cffi-0.15.0-cp310-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:582e570aa2586b96ed47cf4a17586b9a3c462cbe43f780487c3dc245c6ef1527", size = 10515369, upload-time = "2026-04-03T11:11:50.126Z" }, + { url = "https://files.pythonhosted.org/packages/b5/1d/6d10dded5ce3fd8157e558ebd97d09e551b77a62cdc1c31e93d0a633cee5/curl_cffi-0.15.0-cp310-abi3-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:838e48212447d9c81364b04707a5c861daf08f8320f9ecb3406a8919d1d5c3b3", size = 10160045, upload-time = "2026-04-03T11:11:52.664Z" }, + { url = "https://files.pythonhosted.org/packages/5c/12/c70b835487ace3b9ba1502631912e3440082b8ae3a162f60b59cb0b6444d/curl_cffi-0.15.0-cp310-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2b6c847d86283b07ae69bb72c82eb8a59242277142aa35b89850f89e792a02fc", size = 11090433, upload-time = "2026-04-03T11:11:55.049Z" }, + { url = "https://files.pythonhosted.org/packages/ea/0d/78edcc4f71934225db99df68197a107386d59080742fc7bf6bb4d007924f/curl_cffi-0.15.0-cp310-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:9e5e69eee735f659287e2c84444319d68a1fa68dd37abf228943a4074864283a", size = 10479178, upload-time = "2026-04-03T11:11:57.685Z" }, + { url = "https://files.pythonhosted.org/packages/5b/84/1e101c1acb1ea2f0b4992f5c3024f596d8e21db0d53540b9d583f673c4e7/curl_cffi-0.15.0-cp310-abi3-manylinux_2_34_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:aa1323950224db24f4c510d010b3affa02196ca853fb424191fa917a513d3f4b", size = 10317051, upload-time = "2026-04-03T11:12:00.295Z" }, + { url = "https://files.pythonhosted.org/packages/28/42/8ef236b22a6c23d096c85a1dc507efe37bfdfc7a2f8a4b34efb590197369/curl_cffi-0.15.0-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:41f80170ba844009273b2660da1964ec31e99e5719d16b3422ada87177e32e13", size = 11299660, upload-time = "2026-04-03T11:12:02.791Z" }, + { url = "https://files.pythonhosted.org/packages/1d/01/56aeb055d962da87a1be0d74c6c644e251c7e88129b5471dc44ac724e678/curl_cffi-0.15.0-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:1977e1e12cfb5c11352cbb74acef1bed24eb7d226dab61ca57c168c21acd4d61", size = 11945049, upload-time = "2026-04-03T11:12:05.912Z" }, + { url = "https://files.pythonhosted.org/packages/d8/8c/2abf99a38d6340d66cf0557e0c750ef3f8883dfc5d450087e01c85861343/curl_cffi-0.15.0-cp310-abi3-win_amd64.whl", hash = "sha256:5a0c1896a0d5a5ac1eb89cd24b008d2b718dd1df6fd2f75451b59ca66e49e572", size = 1661649, upload-time = "2026-04-03T11:12:07.948Z" }, + { url = "https://files.pythonhosted.org/packages/3d/39/dfd54f2240d3a9b96d77bacc62b97813b35e2aa8ecf5cd5013c683f1ba96/curl_cffi-0.15.0-cp310-abi3-win_arm64.whl", hash = "sha256:a6d57f8389273a3a1f94370473c74897467bcc36af0a17336989780c507fa43d", size = 1410741, upload-time = "2026-04-03T11:12:10.073Z" }, + { url = "https://files.pythonhosted.org/packages/19/6a/c24df8a4fc22fa84070dcd94abeba43c15e08cc09e35869565c0bad196fd/curl_cffi-0.15.0-cp313-abi3-android_24_arm64_v8a.whl", hash = "sha256:4682dc38d4336e0eb0b185374db90a760efde63cbea994b4e63f3521d44c4c92", size = 7190427, upload-time = "2026-04-03T11:12:12.142Z" }, + { url = "https://files.pythonhosted.org/packages/11/56/132225cb3491d07cc6adcce5fe395e059bde87c68cff1ef87a31c88c7819/curl_cffi-0.15.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:967ad7355bd8e9586f8c2d02eaa99953747549e7ea4a9b25cd53353e6b67fe6d", size = 2795723, upload-time = "2026-04-03T11:12:13.668Z" }, + { url = "https://files.pythonhosted.org/packages/07/8f/f4f83cd303bef7e8f1749512e5dd157e7e5d08b0a36c8211f9640a2757bf/curl_cffi-0.15.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7e63539d0d839d0a8c5eacf86229bc68c57803547f35e0db7ee0986328b478c3", size = 2573739, upload-time = "2026-04-03T11:12:15.08Z" }, + { url = "https://files.pythonhosted.org/packages/e8/5c/643d65c7fc9acd742876aa55c2d7823c438cb7665810acd2e66c9976c4d9/curl_cffi-0.15.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:08c799b89740b9bc49c09fbc3d5907f13ac1f845ca52620507ef9466d4639dd5", size = 10521046, upload-time = "2026-04-03T11:12:17.034Z" }, + { url = "https://files.pythonhosted.org/packages/7f/0b/9b8037113c93f4c5323096163471fa7c35c7676c3f608eeaf1287cd99d58/curl_cffi-0.15.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7b7a92767a888ee90147e18964b396d8435ff42737030d6fb00824ffd6094805", size = 11096115, upload-time = "2026-04-03T11:12:19.694Z" }, + { url = "https://files.pythonhosted.org/packages/5f/96/fff2fcbd924ef4042e0d67379f751a8a4e3186a91e75e35a4cf218b306ee/curl_cffi-0.15.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:829cc357061ecb99cc2d406301f609a039e05665322f5c025ec67c38b0dc49ce", size = 11305346, upload-time = "2026-04-03T11:12:22.151Z" }, + { url = "https://files.pythonhosted.org/packages/53/1b/304b253a45ab28691c8c5e8cca1e6cbb9cf8e46dfceae4648dd536f75e73/curl_cffi-0.15.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:408d6f14e346841cd889c2e0962832bb235ba3b6749ebf609f347f747da5e60f", size = 11949834, upload-time = "2026-04-03T11:12:24.986Z" }, + { url = "https://files.pythonhosted.org/packages/5a/ff/4723d92f08259c707a974aba27a08d0a822b9555e35ca581bf18d055a364/curl_cffi-0.15.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b624c7ce087bfda967a013ed0a64702a525444e5b6e97d23534d567ccc6525aa", size = 1702771, upload-time = "2026-04-03T11:12:28.201Z" }, + { url = "https://files.pythonhosted.org/packages/59/8c/36bbe06d66fa2b765e4a07199f643a59a9cd1a754207a96335402a9520f4/curl_cffi-0.15.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0b6c0543b993996670e9e4b78e305a2d60809d5681903ffb5568e21a387434d3", size = 1466312, upload-time = "2026-04-03T11:12:30.054Z" }, +] + [[package]] name = "exceptiongroup" version = "1.3.1" @@ -276,6 +391,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595, upload-time = "2024-12-06T11:20:54.538Z" }, ] +[[package]] +name = "markdown-it-py" +version = "4.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mdurl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/ff/7841249c247aa650a76b9ee4bbaeae59370dc8bfd2f6c01f3630c35eb134/markdown_it_py-4.2.0.tar.gz", hash = "sha256:04a21681d6fbb623de53f6f364d352309d4094dd4194040a10fd51833e418d49", size = 82454, upload-time = "2026-05-07T12:08:28.36Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/81/4da04ced5a082363ecfa159c010d200ecbd959ae410c10c0264a38cac0f5/markdown_it_py-4.2.0-py3-none-any.whl", hash = "sha256:9f7ebbcd14fe59494226453aed97c1070d83f8d24b6fc3a3bcf9a38092641c4a", size = 91687, upload-time = "2026-05-07T12:08:27.182Z" }, +] + +[[package]] +name = "mdurl" +version = "0.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, +] + [[package]] name = "nodeenv" version = "1.10.0" @@ -303,6 +439,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, ] +[[package]] +name = "pycparser" +version = "3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1b/7d/92392ff7815c21062bea51aa7b87d45576f649f16458d78b7cf94b9ab2e6/pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29", size = 103492, upload-time = "2026-01-21T14:26:51.89Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172, upload-time = "2026-01-21T14:26:50.693Z" }, +] + [[package]] name = "pygments" version = "2.20.0" @@ -393,6 +538,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1e/55/1fa65f8e4fceb19dd6daa867c162ad845d547f6058cd92b4b02384a44777/pytest_httpx-0.36.2-py3-none-any.whl", hash = "sha256:d42ebd5679442dc7bfb0c48e0767b6562e9bc4534d805127b0084171886a5e22", size = 20315, upload-time = "2026-04-09T13:57:18.587Z" }, ] +[[package]] +name = "rich" +version = "15.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c0/8f/0722ca900cc807c13a6a0c696dacf35430f72e0ec571c4275d2371fca3e9/rich-15.0.0.tar.gz", hash = "sha256:edd07a4824c6b40189fb7ac9bc4c52536e9780fbbfbddf6f1e2502c31b068c36", size = 230680, upload-time = "2026-04-12T08:24:00.75Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/82/3b/64d4899d73f91ba49a8c18a8ff3f0ea8f1c1d75481760df8c68ef5235bf5/rich-15.0.0-py3-none-any.whl", hash = "sha256:33bd4ef74232fb73fe9279a257718407f169c09b78a87ad3d296f548e27de0bb", size = 310654, upload-time = "2026-04-12T08:24:02.83Z" }, +] + [[package]] name = "ruff" version = "0.15.13" @@ -489,13 +647,21 @@ dependencies = [ { name = "aiosqlite" }, { name = "beautifulsoup4" }, { name = "fake-useragent" }, - { name = "httpx" }, { name = "loguru" }, { name = "pyotp" }, ] +[package.optional-dependencies] +curl = [ + { name = "curl-cffi" }, +] +httpx = [ + { name = "httpx" }, +] + [package.dev-dependencies] dev = [ + { name = "curl-cffi" }, { name = "pyright" }, { name = "pytest" }, { name = "pytest-asyncio" }, @@ -508,14 +674,17 @@ dev = [ requires-dist = [ { name = "aiosqlite", specifier = ">=0.17.0" }, { name = "beautifulsoup4", specifier = ">=4.13.0" }, + { name = "curl-cffi", marker = "extra == 'curl'", specifier = ">=0.7.0" }, { name = "fake-useragent", specifier = ">=1.4.0" }, - { name = "httpx", specifier = ">=0.26.0" }, + { name = "httpx", marker = "extra == 'httpx'", specifier = ">=0.26.0" }, { name = "loguru", specifier = ">=0.7.0" }, { name = "pyotp", specifier = ">=2.9.0" }, ] +provides-extras = ["httpx", "curl"] [package.metadata.requires-dev] dev = [ + { name = "curl-cffi", specifier = ">=0.7.0" }, { name = "pyright", specifier = ">=1.1.369" }, { name = "pytest", specifier = ">=7.4.4" }, { name = "pytest-asyncio", specifier = ">=0.23.3" }, From 01cde040409ef516c45ffa3460df0791f7a27ce2 Mon Sep 17 00:00:00 2001 From: vladkens Date: Fri, 22 May 2026 02:16:54 +0300 Subject: [PATCH 2/9] feat: http client tests --- pyproject.toml | 2 +- tests/conftest.py | 9 +- tests/mock_http.py | 65 ++++++++++ tests/test_http.py | 37 ++++-- tests/test_queue_client.py | 238 ++++++++++++++++++++++++++++++------- uv.lock | 17 +-- 6 files changed, 296 insertions(+), 72 deletions(-) create mode 100644 tests/mock_http.py diff --git a/pyproject.toml b/pyproject.toml index 05330c11..09a14581 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,10 +31,10 @@ curl = ["curl-cffi>=0.7.0"] [dependency-groups] dev = [ "curl-cffi>=0.7.0", + "httpx>=0.26.0", "pyright>=1.1.369", "pytest-asyncio>=0.23.3", "pytest-cov>=4.1.0", - "pytest-httpx>=0.28.0", "pytest>=7.4.4", "ruff>=0.1.11", ] diff --git a/tests/conftest.py b/tests/conftest.py index 0487bcc9..eb5ebdf1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,10 +1,13 @@ import pytest +from twscrape.account import Account from twscrape.accounts_pool import AccountsPool from twscrape.api import API from twscrape.logger import set_log_level from twscrape.queue_client import QueueClient, XClIdGenStore +from .mock_http import MockClient + set_log_level("ERROR") @@ -28,7 +31,9 @@ def pool_mock(tmp_path): @pytest.fixture -async def client_fixture(pool_mock: AccountsPool): +async def client_fixture(pool_mock: AccountsPool, monkeypatch): + mock_clt = MockClient() + monkeypatch.setattr(Account, "make_client", lambda self, proxy=None: mock_clt) pool_mock._order_by = "username" for x in range(1, 3): @@ -36,7 +41,7 @@ async def client_fixture(pool_mock: AccountsPool): await pool_mock.set_active(f"user{x}", True) client = QueueClient(pool_mock, "SearchTimeline") - yield pool_mock, client + yield pool_mock, client, mock_clt @pytest.fixture diff --git a/tests/mock_http.py b/tests/mock_http.py new file mode 100644 index 00000000..f0de744b --- /dev/null +++ b/tests/mock_http.py @@ -0,0 +1,65 @@ +from unittest.mock import MagicMock + +from twscrape.http import HttpClient, HttpMethod, Response + + +def _raw(*, status_code: int = 200, json_data=None, text: str = "", headers: dict | None = None): + raw = MagicMock() + raw.status_code = status_code + raw.text = text + raw.content = text.encode() + raw.headers = headers or {} + raw.url = "https://mock.local" + raw.request = MagicMock() + raw.request.method = "GET" + raw.request.url = "https://mock.local" + raw.json.return_value = json_data if json_data is not None else {} + if status_code >= 400: + raw.raise_for_status.side_effect = Exception(f"HTTP {status_code}") + else: + raw.raise_for_status.return_value = None + return raw + + +class MockClient(HttpClient): + def __init__(self): + self._queue: list = [] + self._cookies: dict = {} + self._headers: dict = {} + + def add_response( + self, + *, + status_code: int = 200, + json: dict | list | None = None, + text: str = "", + headers: dict | None = None, + ) -> "MockClient": + self._queue.append(("response", status_code, json, text, headers)) + return self + + def add_exception(self, exc: Exception) -> "MockClient": + self._queue.append(("exc", exc)) + return self + + @property + def cookies(self): + return self._cookies + + @property + def headers(self): + return self._headers + + async def request(self, method: HttpMethod, url: str, **kwargs) -> Response: + if not self._queue: + raise RuntimeError("MockClient: no more queued responses") + item = self._queue.pop(0) + if item[0] == "exc": + raise item[1] + _, status_code, json_data, text, headers = item + return Response( + _raw(status_code=status_code, json_data=json_data, text=text, headers=headers) + ) + + async def aclose(self) -> None: + pass diff --git a/tests/test_http.py b/tests/test_http.py index c097541d..7fb4e015 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -188,36 +188,55 @@ async def test_httpx_client_cookies_and_headers_are_mutable(): await client.aclose() -async def test_httpx_client_maps_network_errors(httpx_mock): +async def test_httpx_client_maps_network_errors(): + from unittest.mock import AsyncMock, patch + import httpx from twscrape.http import HttpxClient - httpx_mock.add_exception(httpx.ReadTimeout("timeout")) client = HttpxClient() - with pytest.raises(NetworkError): + with ( + patch.object( + client._client, "request", AsyncMock(side_effect=httpx.ReadTimeout("timeout")) + ), + pytest.raises(NetworkError), + ): await client.get("https://example.com") await client.aclose() -async def test_httpx_client_maps_connect_errors(httpx_mock): +async def test_httpx_client_maps_connect_errors(): + from unittest.mock import AsyncMock, patch + import httpx from twscrape.http import HttpxClient - httpx_mock.add_exception(httpx.ConnectError("refused")) client = HttpxClient() - with pytest.raises(ConnectError): + with ( + patch.object( + client._client, "request", AsyncMock(side_effect=httpx.ConnectError("refused")) + ), + pytest.raises(ConnectError), + ): await client.get("https://example.com") await client.aclose() -async def test_httpx_client_returns_response_wrapper(httpx_mock): +async def test_httpx_client_returns_response_wrapper(): + from unittest.mock import AsyncMock, patch + + import httpx + from twscrape.http import HttpxClient - httpx_mock.add_response(url="https://example.com", json={"ok": True}, status_code=200) + raw = httpx.Response( + 200, json={"ok": True}, request=httpx.Request("GET", "https://example.com") + ) client = HttpxClient() - rep = await client.get("https://example.com") + with patch.object(client._client, "request", AsyncMock(return_value=raw)): + rep = await client.get("https://example.com") assert isinstance(rep, Response) assert rep.status_code == 200 assert rep.json() == {"ok": True} diff --git a/tests/test_queue_client.py b/tests/test_queue_client.py index aefc8a74..c1395ebf 100644 --- a/tests/test_queue_client.py +++ b/tests/test_queue_client.py @@ -1,14 +1,15 @@ from contextlib import aclosing -import httpx -from pytest_httpx import HTTPXMock +import pytest from twscrape.accounts_pool import AccountsPool +from twscrape.http import ConnectError, NetworkError from twscrape.queue_client import QueueClient -DB_FILE = "/tmp/twscrape_test_queue_client.db" +from .mock_http import MockClient + URL = "https://example.com/api" -CF = tuple[AccountsPool, QueueClient] +CF = tuple[AccountsPool, QueueClient, MockClient] async def get_locked(pool: AccountsPool) -> set[str]: @@ -16,68 +17,65 @@ async def get_locked(pool: AccountsPool) -> set[str]: return {x.username for x in rep if x.locks.get("SearchTimeline", None) is not None} -async def test_lock_account_when_used(httpx_mock: HTTPXMock, client_fixture): - pool, client = client_fixture +async def get_inactive(pool: AccountsPool) -> set[str]: + rep = await pool.get_all() + return {x.username for x in rep if not x.active} + + +async def test_lock_account_when_used(client_fixture: CF): + pool, client, mock = client_fixture locked = await get_locked(pool) assert len(locked) == 0 - # should lock account on getting it await client.__aenter__() locked = await get_locked(pool) assert len(locked) == 1 assert "user1" in locked - # keep locked on request - httpx_mock.add_response(url=URL, json={"foo": "bar"}, status_code=200) - assert (await client.get(URL)).json() == {"foo": "bar"} + mock.add_response(json={"foo": "bar"}) + rep = await client.get(URL) + assert rep is not None + assert rep.json() == {"foo": "bar"} locked = await get_locked(pool) assert len(locked) == 1 assert "user1" in locked - # unlock on exit await client.__aexit__(None, None, None) locked = await get_locked(pool) assert len(locked) == 0 -async def test_do_not_switch_account_on_200(httpx_mock: HTTPXMock, client_fixture: CF): - pool, client = client_fixture +async def test_do_not_switch_account_on_200(client_fixture: CF): + pool, client, mock = client_fixture - # get account and lock it await client.__aenter__() locked1 = await get_locked(pool) assert len(locked1) == 1 - # make several requests with status=200 - for x in range(1): - httpx_mock.add_response(url=URL, json={"foo": x}, status_code=200) + for x in range(3): + mock.add_response(json={"foo": x}) rep = await client.get(URL) assert rep is not None assert rep.json() == {"foo": x} - # account should not be switched locked2 = await get_locked(pool) assert locked1 == locked2 - # unlock on exit await client.__aexit__(None, None, None) - locked3 = await get_locked(pool) - assert len(locked3) == 0 + assert len(await get_locked(pool)) == 0 -async def test_switch_acc_on_http_error(httpx_mock: HTTPXMock, client_fixture: CF): - pool, client = client_fixture +async def test_switch_acc_on_http_error(client_fixture: CF): + pool, client, mock = client_fixture - # locked account on enter await client.__aenter__() locked1 = await get_locked(pool) assert len(locked1) == 1 - # fail one request, account should be switched - httpx_mock.add_response(url=URL, json={"foo": "1"}, status_code=403) - httpx_mock.add_response(url=URL, json={"foo": "2"}, status_code=200) + mock.add_response(status_code=403, json={}) + mock.add_response(json={"foo": "2"}) rep = await client.get(URL) assert rep is not None @@ -86,39 +84,34 @@ async def test_switch_acc_on_http_error(httpx_mock: HTTPXMock, client_fixture: C locked2 = await get_locked(pool) assert len(locked2) == 2 - # unlock on exit (failed account still should locked) await client.__aexit__(None, None, None) locked3 = await get_locked(pool) assert len(locked3) == 1 - assert locked1 == locked3 # failed account locked + assert locked1 == locked3 -async def test_retry_with_same_acc_on_network_error(httpx_mock: HTTPXMock, client_fixture: CF): - pool, client = client_fixture +async def test_retry_with_same_acc_on_network_error(client_fixture: CF): + pool, client, mock = client_fixture - # locked account on enter await client.__aenter__() locked1 = await get_locked(pool) assert len(locked1) == 1 - # timeout on first request, account should not be switched - httpx_mock.add_exception(httpx.ReadTimeout("Unable to read within timeout")) - httpx_mock.add_response(url=URL, json={"foo": "2"}, status_code=200) + mock.add_exception(NetworkError("timeout")) + mock.add_response(json={"foo": "2"}) rep = await client.get(URL) assert rep is not None assert rep.json() == {"foo": "2"} - locked2 = await get_locked(pool) - assert locked2 == locked1 + assert await get_locked(pool) == locked1 - # check username added to request obj (for logging) username = getattr(rep, "__username", None) assert username is not None -async def test_ctx_closed_on_break(httpx_mock: HTTPXMock, client_fixture: CF): - pool, client = client_fixture +async def test_ctx_closed_on_break(client_fixture: CF): + pool, client, mock = client_fixture async def get_data_stream(): async with client as c: @@ -129,10 +122,10 @@ async def get_data_stream(): before_ctx = c.ctx if check_retry: - httpx_mock.add_response(url=URL, json={"counter": counter}, status_code=403) - httpx_mock.add_response(url=URL, json={"counter": counter}, status_code=200) + mock.add_response(status_code=403, json={"counter": counter}) + mock.add_response(json={"counter": counter}) else: - httpx_mock.add_response(url=URL, json={"counter": counter}, status_code=200) + mock.add_response(json={"counter": counter}) rep = await c.get(URL) @@ -148,11 +141,166 @@ async def get_data_stream(): if counter == 9: return - # need to use async with to break to work async with aclosing(get_data_stream()) as gen: async for x in gen: if x == 3: break - # ctx should be None after break assert client.ctx is None + + +# --- ConnectError --- + + +async def test_connect_error_raises_after_3_retries(client_fixture: CF): + pool, client, mock = client_fixture + await client.__aenter__() + + mock.add_exception(ConnectError("refused")) + mock.add_exception(ConnectError("refused")) + mock.add_exception(ConnectError("refused")) + + with pytest.raises(ConnectError): + await client.get(URL) + + await client.__aexit__(None, None, None) + + +async def test_connect_error_recovers_before_3_retries(client_fixture: CF): + pool, client, mock = client_fixture + await client.__aenter__() + + mock.add_exception(ConnectError("refused")) + mock.add_exception(ConnectError("refused")) + mock.add_response(json={"ok": True}) + + rep = await client.get(URL) + assert rep is not None + assert rep.json() == {"ok": True} + + await client.__aexit__(None, None, None) + + +# --- Rate limit --- + + +async def test_rate_limit_locks_account_and_switches(client_fixture: CF): + pool, client, mock = client_fixture + await client.__aenter__() + assert "user1" in await get_locked(pool) + + future_ts = 9999999999 + mock.add_response( + headers={"x-rate-limit-remaining": "0", "x-rate-limit-reset": str(future_ts)}, + ) + mock.add_response(json={"ok": True}) + + rep = await client.get(URL) + assert rep is not None + assert rep.json() == {"ok": True} + + user1 = next(x for x in await pool.get_all() if x.username == "user1") + assert user1.locks.get("SearchTimeline") is not None + + await client.__aexit__(None, None, None) + + +# --- Ban / inactive --- + + +async def test_ban_88_marks_account_inactive(client_fixture: CF): + pool, client, mock = client_fixture + await client.__aenter__() + + mock.add_response( + json={"errors": [{"code": 88, "message": "Rate limit exceeded"}]}, + headers={"x-rate-limit-remaining": "1"}, + ) + mock.add_response(json={"ok": True}) + + rep = await client.get(URL) + assert rep is not None + assert "user1" in await get_inactive(pool) + + await client.__aexit__(None, None, None) + + +async def test_ban_326_marks_account_inactive(client_fixture: CF): + pool, client, mock = client_fixture + await client.__aenter__() + + mock.add_response( + json={"errors": [{"code": 326, "message": "Authorization: Denied by access control"}]}, + ) + mock.add_response(json={"ok": True}) + + rep = await client.get(URL) + assert rep is not None + assert "user1" in await get_inactive(pool) + + await client.__aexit__(None, None, None) + + +async def test_ban_32_marks_account_inactive(client_fixture: CF): + pool, client, mock = client_fixture + await client.__aenter__() + + mock.add_response( + json={"errors": [{"code": 32, "message": "Could not authenticate you"}]}, + ) + mock.add_response(json={"ok": True}) + + rep = await client.get(URL) + assert rep is not None + assert "user1" in await get_inactive(pool) + + await client.__aexit__(None, None, None) + + +async def test_403_no_errors_marks_account_inactive(client_fixture: CF): + pool, client, mock = client_fixture + await client.__aenter__() + + mock.add_response(status_code=403, json={}) + mock.add_response(json={"ok": True}) + + rep = await client.get(URL) + assert rep is not None + assert "user1" in await get_inactive(pool) + + await client.__aexit__(None, None, None) + + +# --- Cloudflare / HTML block --- + + +async def test_cloudflare_block_returns_none(client_fixture: CF): + pool, client, mock = client_fixture + await client.__aenter__() + + mock.add_response( + status_code=403, + text="blocked", + headers={"content-type": "text/html", "cf-ray": "abc123"}, + ) + + rep = await client.get(URL) + assert rep is None + + await client.__aexit__(None, None, None) + + +async def test_html_block_without_cf_returns_none(client_fixture: CF): + pool, client, mock = client_fixture + await client.__aenter__() + + mock.add_response( + status_code=503, + text="error", + headers={"content-type": "text/html"}, + ) + + rep = await client.get(URL) + assert rep is None + + await client.__aexit__(None, None, None) diff --git a/uv.lock b/uv.lock index 5eb99140..9761fbb7 100644 --- a/uv.lock +++ b/uv.lock @@ -525,19 +525,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9d/7a/d968e294073affff457b041c2be9868a40c1c71f4a35fcc1e45e5493067b/pytest_cov-7.1.0-py3-none-any.whl", hash = "sha256:a0461110b7865f9a271aa1b51e516c9a95de9d696734a2f71e3e78f46e1d4678", size = 22876, upload-time = "2026-03-21T20:11:14.438Z" }, ] -[[package]] -name = "pytest-httpx" -version = "0.36.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "httpx" }, - { name = "pytest" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/4e/42/f53c58570e80d503ade9dd42ce57f2915d14bcbe25f6308138143950d1d6/pytest_httpx-0.36.2.tar.gz", hash = "sha256:05a56527484f7f4e8c856419ea379b8dc359c36801c4992fdb330f294c690356", size = 57683, upload-time = "2026-04-09T13:57:19.837Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/1e/55/1fa65f8e4fceb19dd6daa867c162ad845d547f6058cd92b4b02384a44777/pytest_httpx-0.36.2-py3-none-any.whl", hash = "sha256:d42ebd5679442dc7bfb0c48e0767b6562e9bc4534d805127b0084171886a5e22", size = 20315, upload-time = "2026-04-09T13:57:18.587Z" }, -] - [[package]] name = "rich" version = "15.0.0" @@ -662,11 +649,11 @@ httpx = [ [package.dev-dependencies] dev = [ { name = "curl-cffi" }, + { name = "httpx" }, { name = "pyright" }, { name = "pytest" }, { name = "pytest-asyncio" }, { name = "pytest-cov" }, - { name = "pytest-httpx" }, { name = "ruff" }, ] @@ -685,11 +672,11 @@ provides-extras = ["httpx", "curl"] [package.metadata.requires-dev] dev = [ { name = "curl-cffi", specifier = ">=0.7.0" }, + { name = "httpx", specifier = ">=0.26.0" }, { name = "pyright", specifier = ">=1.1.369" }, { name = "pytest", specifier = ">=7.4.4" }, { name = "pytest-asyncio", specifier = ">=0.23.3" }, { name = "pytest-cov", specifier = ">=4.1.0" }, - { name = "pytest-httpx", specifier = ">=0.28.0" }, { name = "ruff", specifier = ">=0.1.11" }, ] From b02b73a2f8e07f8a6eafaa7df240216e1dbbd94e Mon Sep 17 00:00:00 2001 From: vladkens Date: Fri, 22 May 2026 02:52:21 +0300 Subject: [PATCH 3/9] chore: improve tests --- .../20260521-http-backend-migration.md | 292 ------------------ pyproject.toml | 2 +- readme.md | 16 +- tests/conftest.py | 2 +- tests/mock_http.py | 12 + tests/test_http.py | 201 ++++++++++++ tests/test_pool.py | 101 +++++- tests/test_queue_client.py | 150 +++++++++ tests/test_utils.py | 56 +++- twscrape/http.py | 5 +- uv.lock | 8 +- 11 files changed, 535 insertions(+), 310 deletions(-) delete mode 100644 docs/plans/completed/20260521-http-backend-migration.md diff --git a/docs/plans/completed/20260521-http-backend-migration.md b/docs/plans/completed/20260521-http-backend-migration.md deleted file mode 100644 index 6f741a87..00000000 --- a/docs/plans/completed/20260521-http-backend-migration.md +++ /dev/null @@ -1,292 +0,0 @@ -# HTTP Backend Abstraction - -## Overview - -Add a thin HTTP client abstraction layer (`twscrape/http.py`) so twscrape can use either -`httpx` or `curl_cffi` as the HTTP backend. `curl_cffi` enables browser-level TLS fingerprint -spoofing (JA3/SNI), which bypasses Cloudflare bot detection that httpx fails against. - -Both libraries become **optional** extras. Auto-detection picks `curl_cffi` if installed, -falls back to `httpx`. User can override with `TWS_HTTP_BACKEND=curl|httpx` env var. -If env var is set but the specified backend is not installed → `ImportError` immediately. - -Existing users who have `httpx` installed see zero behavior change. - -**Behavior change (intentional):** `WriteTimeout` and `PoolTimeout` previously fell through -to the "unknown error / 15 min timeout" path. After this change they map to `NetworkError` -and trigger silent retry with the same account — which is the correct behavior. - -Reference: [issue #269](https://github.com/vladkens/twscrape/issues/269), -example impl: `delta-farmer/lib/http.py`. - -## Context - -- **Language/framework**: Python 3.10+, async throughout (no sync variants) -- **Relevant files**: - - `twscrape/http.py` — new file (core of this task) - - `twscrape/account.py` — `make_client()` creates `httpx.AsyncClient` - - `twscrape/accounts_pool.py` — `from httpx import HTTPStatusError`; accesses `e.response.status_code` / `e.response.text` - - `twscrape/queue_client.py` — wraps client, catches 5 httpx-specific exceptions - - `twscrape/login.py` — uses `AsyncClient` + `Response`; mutates `client.headers` and reads `client.cookies` after construction - - `twscrape/xclid.py` — creates its own `httpx.AsyncClient` for public page fetching - - `twscrape/models.py`, `twscrape/api.py`, `twscrape/cli.py` — `httpx.Response` type annotations - - `pyproject.toml` — `httpx>=0.26.0` in required `dependencies` -- **Test mocking**: `tests/test_queue_client.py` uses `pytest-httpx`/`HTTPXMock` — still works - since `HttpxClient` wraps `httpx.AsyncClient` internally so `pytest-httpx` intercepts there - -## Development Approach - -- **Testing approach**: Regular (code first, then tests alongside) -- Complete each task fully before moving to the next -- Run `uv run pytest` after each task — all tests must pass before proceeding - -## Solution Overview - -Single new file `twscrape/http.py` contains everything: - -``` -HttpError -├── HttpStatusError(response: Response) ← raise_for_status() failures; carries .response -├── NetworkError ← timeout, read/write/pool/proxy (retry silently) -└── ConnectError ← connection failures (retry 3x then raise) - -Response ← thin wrapper; delegates to httpx.Response or curl_cffi.Response - overrides raise_for_status() → HttpStatusError(response=self) - no __slots__ (allows arbitrary setattr for __username) - -HttpClient ← plain base class; async context manager -├── HttpxClient ← wraps httpx.AsyncClient; maps httpx errors → common types -└── CurlClient ← wraps curl_cffi.AsyncSession(impersonate="chrome") - -_detect_backend() ← reads TWS_HTTP_BACKEND; strict fail if set but not installed -make_client() ← factory; calls _detect_backend() if backend=None -``` - -**HttpClient full API:** -```python -async def request(method, url, *, params=None, headers=None, json=None, data=None) -> Response -async def get(url, *, params=None, headers=None) -> Response -async def post(url, *, params=None, headers=None, json=None, data=None) -> Response -async def aclose() -> None -async def __aenter__(self) -> HttpClient -async def __aexit__(self, *args) -> None # calls aclose() -@property cookies # mutable dict-like: __contains__, __getitem__, __setitem__, .get(), .update(), .items() -@property headers # mutable dict-like: __setitem__, .update() -``` - -**Error mapping:** - -| httpx | → | -|---|---| -| `ReadTimeout, WriteTimeout, PoolTimeout, ProxyError` | `NetworkError` | -| `ConnectError, ConnectTimeout` | `ConnectError` | -| `raise_for_status()` → `HTTPStatusError` | `HttpStatusError` | - -| curl_cffi | → | -|---|---| -| `errors.ConnectionError, errors.ConnectTimeout` | `ConnectError` | -| `errors.RequestsError` (base; covers Timeout, ProxyError, etc.) | `NetworkError` | -| `raise_for_status()` | `HttpStatusError` | - -## Implementation Steps - ---- - -### Task 1: Create `twscrape/http.py` - -**Files:** -- Create: `twscrape/http.py` -- Create: `tests/test_http.py` - -- [x] define exception hierarchy: - - `HttpError(Exception)` - - `HttpStatusError(HttpError)` with field `response: Response` (set in `__init__`) - - `NetworkError(HttpError)` - - `ConnectError(HttpError)` -- [x] define `Response` wrapper class (no `__slots__`): - - properties: `status_code`, `text`, `headers`, `content`, `url`, `request` - - method `json() -> Any` — delegates - - method `raise_for_status()` — catches any exception from `self._rep.raise_for_status()`, raises `HttpStatusError(response=self)` -- [x] define `HttpClient` base class: - - `async request(method, url, *, params, headers, json, data) -> Response` - - `async get(url, **kwargs) -> Response` - - `async post(url, **kwargs) -> Response` - - `async aclose() -> None` - - `async __aenter__ / __aexit__` - - `@property cookies` — mutable dict-like - - `@property headers` — mutable dict-like -- [x] define `HttpxClient(HttpClient)`: - - import aliased: `from httpx import ConnectError as _HttpxConnectError` etc. to avoid name clash with project's `ConnectError` - - wraps `httpx.AsyncClient(proxy=..., follow_redirects=True, transport=AsyncHTTPTransport(retries=3))` - - `_wrap(coro)`: maps `ReadTimeout, WriteTimeout, PoolTimeout, ProxyError → NetworkError`; `ConnectError, ConnectTimeout → ConnectError` - - `request()` returns `Response(raw_rep)` -- [x] define `CurlClient(HttpClient)`: - - wraps `curl_cffi.requests.AsyncSession(impersonate="chrome", proxy=..., allow_redirects=True)` - - `_wrap(coro)`: maps `errors.ConnectionError, errors.ConnectTimeout → ConnectError`; `errors.RequestsError → NetworkError` - - `request()` returns `Response(raw_rep)` - - `aclose()` calls `await self._session.close()` -- [x] define `_detect_backend() -> str`: - - if `TWS_HTTP_BACKEND` set: attempt to import the named backend; raise `ImportError` with install hint if missing - - else: try `import curl_cffi` → `"curl"`; try `import httpx` → `"httpx"`; else raise `ImportError` -- [x] define `make_client(backend=None, *, proxy=None, headers=None, cookies=None) -> HttpClient` -- [x] write `tests/test_http.py`: - - `_detect_backend()` respects `TWS_HTTP_BACKEND` - - `TWS_HTTP_BACKEND` set but backend not installed → `ImportError` - - `Response.raise_for_status()` raises `HttpStatusError`; `err.response.status_code` and `err.response.text` are accessible - - `Response` delegates `.status_code`, `.json()`, `.text`, `.headers` - - `setattr(rep, "__username", "x"); assert getattr(rep, "__username") == "x"` (no `__slots__`) - - `HttpxClient` is an async context manager - - `HttpxClient.cookies` and `.headers` support `__setitem__` and `__contains__` -- [x] run `uv run pytest tests/test_http.py` — must pass - ---- - -### Task 2: Update `account.py` - -**Files:** -- Modify: `twscrape/account.py` - -- [x] remove `from httpx import AsyncClient, AsyncHTTPTransport` -- [x] add `from .http import HttpClient, make_client as _make_http_client` -- [x] rewrite `make_client()` return type `HttpClient`; assemble all headers upfront: - ```python - headers = {**self.headers} - headers["user-agent"] = self.user_agent - headers["content-type"] = "application/json" - headers["authorization"] = TOKEN - headers["x-twitter-active-user"] = "yes" - headers["x-twitter-client-language"] = "en" - if "ct0" in self.cookies: - headers["x-csrf-token"] = self.cookies["ct0"] - return _make_http_client(proxy=proxy, headers=headers, cookies=self.cookies) - ``` -- [x] run `uv run pytest` — must pass - ---- - -### Task 3: Update `queue_client.py` - -**Files:** -- Modify: `twscrape/queue_client.py` - -- [x] remove `import httpx` and `from httpx import AsyncClient, Response` -- [x] add `from .http import HttpClient, NetworkError, ConnectError, HttpStatusError, Response` -- [x] `Ctx.__init__`: `clt: AsyncClient` → `clt: HttpClient` -- [x] `Ctx.req()` / `QueueClient.get()` / `QueueClient.req()` return types: `Response | None` -- [x] in `_check_rep`: `except httpx.HTTPStatusError` → `except HttpStatusError` -- [x] in `QueueClient.req()`: - ```python - except NetworkError: continue - except ConnectError as e: - connection_retry += 1 - if connection_retry >= 3: raise e - ``` -- [x] run `uv run pytest` — must pass - ---- - -### Task 4: Update `accounts_pool.py` - -**Files:** -- Modify: `twscrape/accounts_pool.py` - -- [x] remove `from httpx import HTTPStatusError` -- [x] add `from .http import HttpStatusError` -- [x] `except HTTPStatusError as e:` — unchanged; `e.response.status_code` and `e.response.text` work via `HttpStatusError.response` -- [x] run `uv run pytest` — must pass - ---- - -### Task 5: Update `login.py` - -**Files:** -- Modify: `twscrape/login.py` - -- [x] remove `from httpx import AsyncClient, Response` -- [x] add `from .http import HttpClient, Response` -- [x] `TaskCtx.client: AsyncClient` → `TaskCtx.client: HttpClient` -- [x] all `Response` type annotations → `Response` -- [x] verify all `client.headers["..."] = ...` and `client.cookies.get(...)` calls still work (both backends expose mutable dict-like jars) -- [x] run `uv run pytest` — must pass - ---- - -### Task 6: Update `xclid.py` - -**Files:** -- Modify: `twscrape/xclid.py` - -- [x] remove `import httpx` -- [x] add `from .http import HttpClient, make_client as _make_http_client` -- [x] `_make_client() -> HttpClient`: `return _make_http_client(headers={"user-agent": UserAgent().chrome})` -- [x] update all function signatures: `clt: httpx.AsyncClient` → `clt: HttpClient` -- [x] run `uv run pytest` — must pass - ---- - -### Task 7: Update type annotations in `models.py`, `api.py`, `cli.py` - -**Files:** -- Modify: `twscrape/models.py` -- Modify: `twscrape/api.py` -- Modify: `twscrape/cli.py` - -- [x] `models.py`: remove `import httpx`; add `from .http import Response`; replace all `httpx.Response` with `Response` in parse_* signatures; update comment at line 811 removing httpx mention -- [x] `api.py`: remove `from httpx import Response`; add `from .http import Response` -- [x] `cli.py`: remove `import httpx`; add `from .http import Response`; update `to_str()` signature -- [x] fix `[tool.pyright]` in `pyproject.toml`: change `include` from non-existent dirs to `["twscrape"]` -- [ ] run `uv run pyright twscrape/` — no errors -- [x] run `uv run pytest` — must pass - ---- - -### Task 8: Update `pyproject.toml` - -**Files:** -- Modify: `pyproject.toml` - -- [x] remove `httpx>=0.26.0` from `dependencies` -- [x] add optional extras: - ```toml - [project.optional-dependencies] - httpx = ["httpx>=0.26.0"] - curl = ["curl-cffi>=0.7.0"] - ``` -- [x] keep `pytest-httpx` in dev dependencies (still intercepts at httpx level inside `HttpxClient`) -- [x] run `uv run pytest` — must pass (httpx still installed in dev env via pytest-httpx) - ---- - -### Task 9: Verify acceptance criteria - -- [x] `uv run pytest` — all tests pass (73/73) -- [ ] `uv run pyright twscrape/` — no errors -- [x] `grep -r "import httpx" twscrape/` — returns empty (no direct httpx usage outside http.py) -- [ ] verify `TWS_HTTP_BACKEND=httpx` override works -- [ ] verify clean import: `python -c "from twscrape.http import Response, HttpClient, NetworkError, ConnectError, HttpStatusError"` - ---- - -### Task 10: [Final] Update documentation - -- [ ] update `readme.md`: add section on optional dependencies and backend selection -- [ ] add entry to `changelog.md`: httpx is now optional; install `twscrape[httpx]` or `twscrape[curl]`; `curl_cffi` backend preferred when installed -- [ ] move this plan to `docs/plans/completed/` - -## Post-Completion - -**Manual verification:** -- Test login flow with a real account using `curl_cffi` backend -- Verify TLS fingerprint via `tls.browserleaks.com` — should show Chrome profile -- Test `TWS_HTTP_BACKEND=httpx` override still works after curl_cffi is installed - -**Isolation test** (to verify optional-deps work end to end): -```bash -uv venv /tmp/twscrape-curl-only -/tmp/twscrape-curl-only/bin/pip install -e ".[curl]" -/tmp/twscrape-curl-only/bin/python -c "import twscrape; print('ok')" -``` - -**Breaking change notice** (in release notes): -- `httpx` is no longer a required dependency — users must install `twscrape[httpx]` or `twscrape[curl]` -- Existing installs unaffected: httpx was installed before, auto-detect finds it diff --git a/pyproject.toml b/pyproject.toml index 09a14581..db4c3171 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,13 +19,13 @@ classifiers = [ dependencies = [ "aiosqlite>=0.17.0", "fake-useragent>=1.4.0", + "httpx>=0.26.0", "loguru>=0.7.0", "pyotp>=2.9.0", "beautifulsoup4>=4.13.0", ] [project.optional-dependencies] -httpx = ["httpx>=0.26.0"] curl = ["curl-cffi>=0.7.0"] [dependency-groups] diff --git a/readme.md b/readme.md index 9087387d..973450b7 100644 --- a/readme.md +++ b/readme.md @@ -18,14 +18,17 @@ Twitter GraphQL API implementation with [SNScrape](https://github.com/JustAnothe ## Install -twscrape requires one HTTP backend to be installed (peer dependency): +```bash +pip install twscrape +``` + +`httpx` is included by default. For better Cloudflare/bot-detection bypass, install `curl-cffi` as well — it uses libcurl with browser-level TLS fingerprint spoofing and is preferred automatically when present: ```bash -pip install twscrape[httpx] # httpx (legacy) -pip install twscrape[curl] # curl-cffi (recommended, better TLS fingerprinting) +pip install twscrape[curl] ``` -`curl-cffi` uses libcurl with browser-level TLS fingerprint spoofing and is recommended to avoid Cloudflare bot detection. When both are installed, `curl-cffi` is preferred. Override with `TWS_HTTP_BACKEND=httpx` or `TWS_HTTP_BACKEND=curl`. +Override the backend explicitly with `TWS_HTTP_BACKEND=httpx` or `TWS_HTTP_BACKEND=curl`. ## Features - Support both Search & GraphQL Twitter API @@ -123,9 +126,9 @@ async def main(): async for tweet in api.search("elon musk"): print(tweet.id, tweet.user.username, tweet.rawContent) # tweet is `Tweet` object - # NOTE 2: all methods have `raw` version (returns `httpx.Response` object): + # NOTE 2: all methods have `raw` version (returns `twscrape.Response` object): async for rep in api.search_raw("elon musk"): - print(rep.status_code, rep.json()) # rep is `httpx.Response` object + print(rep.status_code, rep.json()) # rep is `twscrape.Response` object # change log level, default info set_log_level("DEBUG") @@ -361,6 +364,7 @@ _Note:_ If proxy not working, exception will be raised from API class. - `TWS_PROXY` - global proxy for all accounts (e.g. `socks5://user:pass@127.0.0.1:1080`) - `TWS_WAIT_EMAIL_CODE` - timeout for email verification code during login (default: `30`, in seconds) - `TWS_RAISE_WHEN_NO_ACCOUNT` - raise `NoAccountError` exception when no available accounts, instead of waiting (default: `false`, values: `false`/`0`/`true`/`1`) +- `TWS_HTTP_BACKEND` - force HTTP backend: `httpx` or `curl` (default: `curl` if installed, otherwise `httpx`) ## Limitations diff --git a/tests/conftest.py b/tests/conftest.py index eb5ebdf1..6f58f7aa 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -8,7 +8,7 @@ from .mock_http import MockClient -set_log_level("ERROR") +set_log_level("CRITICAL") class ClIdGenMock: diff --git a/tests/mock_http.py b/tests/mock_http.py index f0de744b..5561c6e8 100644 --- a/tests/mock_http.py +++ b/tests/mock_http.py @@ -1,3 +1,4 @@ +import json as _json from unittest.mock import MagicMock from twscrape.http import HttpClient, HttpMethod, Response @@ -42,6 +43,12 @@ def add_exception(self, exc: Exception) -> "MockClient": self._queue.append(("exc", exc)) return self + def add_invalid_json_response( + self, *, status_code: int = 200, text: str = "not-json", headers: dict | None = None + ) -> "MockClient": + self._queue.append(("invalid_json", status_code, text, headers)) + return self + @property def cookies(self): return self._cookies @@ -56,6 +63,11 @@ async def request(self, method: HttpMethod, url: str, **kwargs) -> Response: item = self._queue.pop(0) if item[0] == "exc": raise item[1] + if item[0] == "invalid_json": + _, status_code, text, headers = item + raw = _raw(status_code=status_code, text=text, headers=headers) + raw.json.side_effect = _json.JSONDecodeError("no json", "", 0) + return Response(raw) _, status_code, json_data, text, headers = item return Response( _raw(status_code=status_code, json_data=json_data, text=text, headers=headers) diff --git a/tests/test_http.py b/tests/test_http.py index 7fb4e015..a0fd97c8 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -241,3 +241,204 @@ async def test_httpx_client_returns_response_wrapper(): assert rep.status_code == 200 assert rep.json() == {"ok": True} await client.aclose() + + +async def test_httpx_client_maps_connect_timeout(): + from unittest.mock import AsyncMock, patch + + import httpx + + from twscrape.http import HttpxClient + + client = HttpxClient() + with ( + patch.object( + client._client, "request", AsyncMock(side_effect=httpx.ConnectTimeout("timeout")) + ), + pytest.raises(ConnectError), + ): + await client.get("https://example.com") + await client.aclose() + + +async def test_httpx_client_maps_write_and_pool_timeout(): + from unittest.mock import AsyncMock, patch + + import httpx + + from twscrape.http import HttpxClient + + for exc_cls in (httpx.WriteTimeout, httpx.PoolTimeout, httpx.ProxyError): + client = HttpxClient() + with ( + patch.object(client._client, "request", AsyncMock(side_effect=exc_cls("err"))), + pytest.raises(NetworkError), + ): + await client.get("https://example.com") + await client.aclose() + + +# --- _detect_backend: missing paths --- + + +def test_detect_backend_auto_curl_preferred(monkeypatch): + monkeypatch.delenv("TWS_HTTP_BACKEND", raising=False) + # curl_cffi is installed in the dev env, so auto-detect should pick it + result = _detect_backend() + assert result == "curl" + + +def test_detect_backend_env_curl_installed(monkeypatch): + monkeypatch.setenv("TWS_HTTP_BACKEND", "curl") + result = _detect_backend() + assert result == "curl" + + +# --- make_client: missing paths --- + + +def test_make_client_curl_returns_curl_client(): + from twscrape.http import CurlClient + + client = make_client("curl") + assert isinstance(client, CurlClient) + + +def test_make_client_unknown_backend_raises(): + with pytest.raises(ValueError, match="Unknown backend"): + make_client("unknown_backend") + + +def test_make_client_none_uses_auto_detect(monkeypatch): + monkeypatch.delenv("TWS_HTTP_BACKEND", raising=False) + from twscrape.http import CurlClient + + client = make_client(None) + assert isinstance(client, CurlClient) + + +# --- CurlClient --- + + +async def test_curl_client_returns_response_wrapper(): + from unittest.mock import AsyncMock, MagicMock, patch + + from twscrape.http import CurlClient + + raw = MagicMock() + raw.status_code = 200 + raw.text = "hello" + raw.content = b"hello" + raw.headers = {} + raw.url = "https://example.com" + raw.request = MagicMock() + raw.json.return_value = {"ok": True} + raw.raise_for_status.return_value = None + + client = CurlClient() + with patch.object(client._session, "request", AsyncMock(return_value=raw)): + rep = await client.get("https://example.com") + + assert isinstance(rep, Response) + assert rep.status_code == 200 + assert rep.json() == {"ok": True} + await client.aclose() + + +async def test_curl_client_connect_error_codes(): + from unittest.mock import AsyncMock, patch + + from curl_cffi.const import CurlECode + from curl_cffi.requests.errors import RequestsError + + from twscrape.http import CurlClient + + for code in (CurlECode(5), CurlECode(6), CurlECode(7)): + client = CurlClient() + err = RequestsError("connect failed", code=code) + with ( + patch.object(client._session, "request", AsyncMock(side_effect=err)), + pytest.raises(ConnectError), + ): + await client.get("https://example.com") + await client.aclose() + + +async def test_curl_client_network_error(): + from unittest.mock import AsyncMock, patch + + from curl_cffi.const import CurlECode + from curl_cffi.requests.errors import RequestsError + + from twscrape.http import CurlClient + + client = CurlClient() + err = RequestsError("operation timed out", code=CurlECode(28)) + + with ( + patch.object(client._session, "request", AsyncMock(side_effect=err)), + pytest.raises(NetworkError), + ): + await client.get("https://example.com") + await client.aclose() + + +async def test_curl_client_cookies_and_headers(): + from twscrape.http import CurlClient + + client = CurlClient(headers={"x-foo": "bar"}, cookies={"ct0": "token"}) + assert "ct0" in client.cookies + assert client.headers is not None + await client.aclose() + + +async def test_curl_client_non_curl_error_propagates(): + from unittest.mock import AsyncMock, patch + + from twscrape.http import CurlClient + + client = CurlClient() + with ( + patch.object(client._session, "request", AsyncMock(side_effect=ValueError("unexpected"))), + pytest.raises(ValueError), + ): + await client.get("https://example.com") + await client.aclose() + + +# --- HttpClient.post --- + + +async def test_http_client_post_delegates_to_request(): + rep_mock = Response(_mock_response(201)) + + class PostableClient(HttpClient): + async def request(self, method, url, **kwargs): + assert method == "POST" + return rep_mock + + async def aclose(self): + pass + + @property + def cookies(self): + return {} + + @property + def headers(self): + return {} + + client = PostableClient() + result = await client.post("https://example.com", json={"x": 1}) + assert result is rep_mock + + +# --- Response: remaining properties --- + + +def test_response_content_and_headers(): + raw = _mock_response(200, "body", headers={"x-custom": "val"}) + rep = Response(raw) + assert rep.content == b"body" + assert rep.headers == {"x-custom": "val"} + assert rep.url == "https://example.com" diff --git a/tests/test_pool.py b/tests/test_pool.py index 69194731..15936376 100644 --- a/tests/test_pool.py +++ b/tests/test_pool.py @@ -1,4 +1,6 @@ -from twscrape.accounts_pool import AccountsPool +import pytest + +from twscrape.accounts_pool import AccountsPool, NoAccountError from twscrape.utils import utc @@ -161,3 +163,100 @@ async def test_get_stats(pool_mock: AccountsPool): assert stats["total"] == 1 assert stats["active"] == 1 assert stats[f"locked_{Q}"] == 1 + + +async def test_delete_accounts(pool_mock: AccountsPool): + await pool_mock.add_account("user1", "pass1", "email1", "ep1") + await pool_mock.add_account("user2", "pass2", "email2", "ep2") + + await pool_mock.delete_accounts("user1") + usernames = {x.username for x in await pool_mock.get_all()} + assert "user1" not in usernames + assert "user2" in usernames + + await pool_mock.delete_accounts(["user2"]) + assert len(await pool_mock.get_all()) == 0 + + +async def test_delete_inactive(pool_mock: AccountsPool): + await pool_mock.add_account("user1", "pass1", "email1", "ep1") + await pool_mock.add_account("user2", "pass2", "email2", "ep2") + await pool_mock.set_active("user2", True) + + await pool_mock.delete_inactive() + accs = await pool_mock.get_all() + assert len(accs) == 1 + assert accs[0].username == "user2" + + +async def test_reset_locks(pool_mock: AccountsPool): + Q = "TestQueue" + await pool_mock.add_account("user1", "pass1", "email1", "ep1") + await pool_mock.set_active("user1", True) + + await pool_mock.get_for_queue(Q) + user = await pool_mock.get("user1") + assert Q in user.locks + + await pool_mock.reset_locks() + user = await pool_mock.get("user1") + assert Q not in user.locks + + +async def test_mark_inactive(pool_mock: AccountsPool): + await pool_mock.add_account("user1", "pass1", "email1", "ep1") + await pool_mock.set_active("user1", True) + + await pool_mock.mark_inactive("user1", "banned by system") + acc = await pool_mock.get("user1") + assert acc.active is False + assert acc.error_msg == "banned by system" + + +async def test_next_available_at_none_when_empty(pool_mock: AccountsPool): + assert await pool_mock.next_available_at("TestQueue") is None + + +async def test_next_available_at_returns_future_time(pool_mock: AccountsPool): + Q = "TestQueue" + await pool_mock.add_account("user1", "pass1", "email1", "ep1") + await pool_mock.set_active("user1", True) + await pool_mock.lock_until("user1", Q, utc.ts() + 3600) + + result = await pool_mock.next_available_at(Q) + assert result is not None + assert result != "now" + + +async def test_get_for_queue_or_wait_raises_when_flag_set(pool_mock: AccountsPool): + pool = AccountsPool(pool_mock._db_file, raise_when_no_account=True) + with pytest.raises(NoAccountError): + await pool.get_for_queue_or_wait("TestQueue") + + +async def test_get_for_queue_or_wait_raises_via_env(pool_mock: AccountsPool, monkeypatch): + monkeypatch.setenv("TWS_RAISE_WHEN_NO_ACCOUNT", "1") + with pytest.raises(NoAccountError): + await pool_mock.get_for_queue_or_wait("TestQueue") + + +async def test_accounts_info_active_first(pool_mock: AccountsPool): + await pool_mock.add_account("user_b", "pass", "email", "ep") + await pool_mock.add_account("user_a", "pass", "email", "ep") + await pool_mock.set_active("user_a", True) + + items = await pool_mock.accounts_info() + assert items[0]["username"] == "user_a" + assert items[0]["active"] is True + assert items[1]["username"] == "user_b" + assert items[1]["active"] is False + + +async def test_load_from_file(pool_mock: AccountsPool, tmp_path): + filepath = tmp_path / "accounts.txt" + filepath.write_text("user1:pass1:email1:ep1\nuser2:pass2:email2:ep2\n") + + await pool_mock.load_from_file(str(filepath), "username:password:email:email_password") + usernames = {x.username for x in await pool_mock.get_all()} + assert "user1" in usernames + assert "user2" in usernames diff --git a/tests/test_queue_client.py b/tests/test_queue_client.py index c1395ebf..4097606a 100644 --- a/tests/test_queue_client.py +++ b/tests/test_queue_client.py @@ -5,6 +5,7 @@ from twscrape.accounts_pool import AccountsPool from twscrape.http import ConnectError, NetworkError from twscrape.queue_client import QueueClient +from twscrape.utils import utc from .mock_http import MockClient @@ -304,3 +305,152 @@ async def test_html_block_without_cf_returns_none(client_fixture: CF): assert rep is None await client.__aexit__(None, None, None) + + +# --- _check_rep branches --- + + +async def test_131_with_user_data_continues(client_fixture: CF): + pool, client, mock = client_fixture + await client.__aenter__() + + mock.add_response( + json={ + "errors": [{"code": 131, "message": "Dependency: Internal error"}], + "data": {"user": {"id": "123"}}, + } + ) + + rep = await client.get(URL) + assert rep is not None + + await client.__aexit__(None, None, None) + + +async def test_131_without_user_data_aborts(client_fixture: CF): + pool, client, mock = client_fixture + await client.__aenter__() + + mock.add_response(json={"errors": [{"code": 131, "message": "Dependency: Internal error"}]}) + + rep = await client.get(URL) + assert rep is None + + await client.__aexit__(None, None, None) + + +async def test_missing_status_error_ignored(client_fixture: CF): + pool, client, mock = client_fixture + await client.__aenter__() + + mock.add_response( + json={"errors": [{"code": -1, "message": "_Missing: No status found with that ID"}]} + ) + + rep = await client.get(URL) + assert rep is not None + + await client.__aexit__(None, None, None) + + +async def test_authorization_error_200_ignored(client_fixture: CF): + pool, client, mock = client_fixture + await client.__aenter__() + + mock.add_response( + json={"errors": [{"code": -1, "message": "Authorization: Denied by unknown rule"}]} + ) + + rep = await client.get(URL) + assert rep is not None + + await client.__aexit__(None, None, None) + + +async def test_unknown_error_msg_ignored(client_fixture: CF): + pool, client, mock = client_fixture + await client.__aenter__() + + mock.add_response(json={"errors": [{"code": 999, "message": "Some unfamiliar error"}]}) + + rep = await client.get(URL) + assert rep is not None + + await client.__aexit__(None, None, None) + + +async def test_unhandled_status_code_locks_and_retries(client_fixture: CF): + pool, client, mock = client_fixture + await client.__aenter__() + + mock.add_response(status_code=500, json={}) + mock.add_response(json={"ok": True}) + + rep = await client.get(URL) + assert rep is not None + assert rep.json() == {"ok": True} + + user1 = next(x for x in await pool.get_all() if x.username == "user1") + assert "SearchTimeline" in user1.locks + assert int(user1.locks["SearchTimeline"].timestamp()) > utc.ts() + 60 * 10 + + await client.__aexit__(None, None, None) + + +async def test_no_active_accounts_returns_none(pool_mock: AccountsPool): + client = QueueClient(pool_mock, "SearchTimeline") + rep = await client.get(URL) + assert rep is None + + +async def test_unknown_exception_retries_then_locks_account(client_fixture: CF): + pool, client, mock = client_fixture + await client.__aenter__() + + mock.add_exception(RuntimeError("boom")) + mock.add_exception(RuntimeError("boom")) + mock.add_exception(RuntimeError("boom")) + mock.add_response(json={"ok": True}) + + rep = await client.get(URL) + assert rep is not None + assert rep.json() == {"ok": True} + + user1 = next(x for x in await pool.get_all() if x.username == "user1") + assert "SearchTimeline" in user1.locks + + await client.__aexit__(None, None, None) + + +async def test_invalid_json_body_falls_back_to_raw_text(client_fixture: CF): + pool, client, mock = client_fixture + await client.__aenter__() + + mock.add_invalid_json_response(text="not-json") + rep = await client.get(URL) + assert rep is not None + + await client.__aexit__(None, None, None) + + +async def test_close_ctx_noop_when_ctx_is_none(pool_mock: AccountsPool): + client = QueueClient(pool_mock, "SearchTimeline") + # ctx is None — _close_ctx must be a no-op + await client._close_ctx() + + +async def test_404_retries_exhaust_and_abort(client_fixture: CF): + from unittest.mock import patch + + pool, client, mock = client_fixture + await client.__aenter__() + + mock.add_response(status_code=404, json={}) + mock.add_response(status_code=404, json={}) + mock.add_response(status_code=404, json={}) + + with patch("twscrape.queue_client.asyncio.sleep"): + rep = await client.get(URL) + assert rep is None + + await client.__aexit__(None, None, None) diff --git a/tests/test_utils.py b/tests/test_utils.py index 893a11e0..b50889fa 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,6 +1,6 @@ import pytest -from twscrape.utils import parse_cookies, parse_proxy +from twscrape.utils import get_env_bool, parse_cookies, parse_proxy, to_old_obj def test_cookies_parse(): @@ -45,3 +45,57 @@ def test_proxy_parse(): # user:pass@host:port (no scheme) assert parse_proxy("user:pass@1.2.3.4:8080") == "http://user:pass@1.2.3.4:8080" + + +def test_get_env_bool(monkeypatch): + monkeypatch.delenv("TEST_BOOL_FLAG", raising=False) + assert get_env_bool("TEST_BOOL_FLAG") is False + assert get_env_bool("TEST_BOOL_FLAG", default_val=True) is True + + for truthy in ("1", "true", "yes", "True", "YES"): + monkeypatch.setenv("TEST_BOOL_FLAG", truthy) + assert get_env_bool("TEST_BOOL_FLAG") is True + + for falsy in ("0", "false", "no", ""): + monkeypatch.setenv("TEST_BOOL_FLAG", falsy) + assert get_env_bool("TEST_BOOL_FLAG") is False + + +def test_to_old_obj_user_new_schema(): + obj = { + "__typename": "User", + "rest_id": "12345", + "core": { + "screen_name": "testuser", + "name": "Test User", + "created_at": "Mon Jan 01 00:00:00 +0000 2020", + }, + "avatar": {"image_url": "https://example.com/avatar.jpg"}, + "location": {"location": "Earth"}, + "privacy": {"protected": False}, + "verification": {"verified": True}, + "profile_bio": {"description": "A test bio"}, + "is_blue_verified": True, + } + + flat = to_old_obj(obj) + assert flat["screen_name"] == "testuser" + assert flat["profile_image_url_https"] == "https://example.com/avatar.jpg" + assert flat["location"] == "Earth" + assert flat["protected"] is False + assert flat["verified"] is True + assert flat["description"] == "A test bio" + assert flat["is_blue_verified"] is True + assert flat["id"] == 12345 + + +def test_to_old_obj_tweet_new_schema(): + obj = { + "__typename": "Tweet", + "rest_id": "9876", + "source": "Twitter Web App", + } + + flat = to_old_obj(obj) + assert flat["source"] == "Twitter Web App" + assert flat["id"] == 9876 diff --git a/twscrape/http.py b/twscrape/http.py index 19449bc6..7457b0e6 100644 --- a/twscrape/http.py +++ b/twscrape/http.py @@ -179,8 +179,7 @@ def _detect_backend() -> str: if forced == "httpx": if importlib.util.find_spec("httpx") is None: raise ImportError( - "TWS_HTTP_BACKEND=httpx but httpx is not installed. " - "Run: pip install twscrape[httpx]" + "TWS_HTTP_BACKEND=httpx but httpx is not installed. Run: pip install twscrape" ) return "httpx" @@ -193,7 +192,7 @@ def _detect_backend() -> str: return "httpx" raise ImportError( - "No HTTP backend installed. Run: pip install twscrape[httpx] or pip install twscrape[curl]" + "No HTTP backend installed. Run: pip install twscrape or pip install twscrape[curl]" ) diff --git a/uv.lock b/uv.lock index 9761fbb7..19382a8e 100644 --- a/uv.lock +++ b/uv.lock @@ -634,6 +634,7 @@ dependencies = [ { name = "aiosqlite" }, { name = "beautifulsoup4" }, { name = "fake-useragent" }, + { name = "httpx" }, { name = "loguru" }, { name = "pyotp" }, ] @@ -642,9 +643,6 @@ dependencies = [ curl = [ { name = "curl-cffi" }, ] -httpx = [ - { name = "httpx" }, -] [package.dev-dependencies] dev = [ @@ -663,11 +661,11 @@ requires-dist = [ { name = "beautifulsoup4", specifier = ">=4.13.0" }, { name = "curl-cffi", marker = "extra == 'curl'", specifier = ">=0.7.0" }, { name = "fake-useragent", specifier = ">=1.4.0" }, - { name = "httpx", marker = "extra == 'httpx'", specifier = ">=0.26.0" }, + { name = "httpx", specifier = ">=0.26.0" }, { name = "loguru", specifier = ">=0.7.0" }, { name = "pyotp", specifier = ">=2.9.0" }, ] -provides-extras = ["httpx", "curl"] +provides-extras = ["curl"] [package.metadata.requires-dev] dev = [ From 6421fd1c3c7cd395c9c53ae3631ed3715a7db7c5 Mon Sep 17 00:00:00 2001 From: vladkens Date: Fri, 22 May 2026 03:03:07 +0300 Subject: [PATCH 4/9] fix: pr comments --- tests/test_http.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/tests/test_http.py b/tests/test_http.py index a0fd97c8..08f2be83 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -137,7 +137,7 @@ def test_detect_backend_no_backends(monkeypatch): # --- HttpClient base --- -def test_http_client_is_async_context_manager(): +async def test_http_client_is_async_context_manager(): class MinimalClient(HttpClient): closed = False @@ -155,15 +155,10 @@ def cookies(self): def headers(self): return {} - import asyncio - - async def run(): - client = MinimalClient() - async with client as c: - assert c is client - assert client.closed - - asyncio.get_event_loop().run_until_complete(run()) + client = MinimalClient() + async with client as c: + assert c is client + assert client.closed # --- HttpxClient --- From d3dd0b391fd2ace10c3d36188a2f802106c4c7ce Mon Sep 17 00:00:00 2001 From: vladkens Date: Fri, 22 May 2026 03:06:50 +0300 Subject: [PATCH 5/9] fix: ci --- .github/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f1861f1a..5412b60d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,6 +2,8 @@ name: build on: push: + branches: [main] + tags: ["v*"] jobs: test: From afa3e80f164af3b999dacd62240ecd2d804f8b00 Mon Sep 17 00:00:00 2001 From: vladkens Date: Fri, 22 May 2026 04:07:30 +0300 Subject: [PATCH 6/9] docs: add browser impersonate & UA resolution plan --- .github/workflows/pr.yml | 21 -- docs/plans/20260522-browser-impersonate.md | 294 +++++++++++++++++++++ scripts/update_gql_ops.py | 4 +- tests/test_http.py | 67 +++-- twscrape/http.py | 21 +- 5 files changed, 352 insertions(+), 55 deletions(-) create mode 100644 docs/plans/20260522-browser-impersonate.md diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index 2deb717a..4fbab286 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -2,14 +2,11 @@ name: pr on: pull_request: - pull_request_target: - types: [opened, reopened] permissions: read-all jobs: test: - if: github.event_name == 'pull_request' runs-on: ubuntu-latest strategy: matrix: @@ -23,21 +20,3 @@ jobs: - run: uv sync --frozen - run: make check - run: make test - - maintainer-edits: - if: github.event_name == 'pull_request_target' && github.event.pull_request.head.repo.full_name != github.repository - runs-on: ubuntu-latest - permissions: - pull-requests: write - steps: - - name: Warn if maintainer edits disabled - if: "!github.event.pull_request.maintainer_can_modify" - uses: actions/github-script@v7 - with: - script: | - await github.rest.issues.createComment({ - issue_number: context.issue.number, - owner: context.repo.owner, - repo: context.repo.repo, - body: 'Please enable **[Allow edits from maintainers](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/allowing-changes-to-a-pull-request-branch-created-from-a-fork#enabling-repository-maintainer-permissions-on-existing-pull-requests)** on this PR — this lets us push fixes directly to your branch without waiting for you to resolve conflicts manually.' - }) diff --git a/docs/plans/20260522-browser-impersonate.md b/docs/plans/20260522-browser-impersonate.md new file mode 100644 index 00000000..be7ad9f1 --- /dev/null +++ b/docs/plans/20260522-browser-impersonate.md @@ -0,0 +1,294 @@ +# Browser Impersonate & UA Resolution + +## Overview + +Replace `fake_useragent` dependency and hardcoded `impersonate="chrome"` (not a valid +`BrowserType` value) with a `_resolve_browser(hint)` function that maps meta-strings +(`"@chrome"`, `"@safari"`, `"@firefox"`, `"@edge"`) or real UA strings to a +`(ua_string, impersonate_profile)` pair. + +**Problem it solves:** +- `fake_useragent` is deprecated and will be removed +- `impersonate="chrome"` is not a valid `BrowserType` value — could silently break +- Safari UA (from `fake_useragent`) + Chrome TLS fingerprint = bot-detection red flag +- No way to control the browser profile without patching source + +**Key benefits:** +- `_latest_profile(family)` queries `BrowserType` at runtime → new curl_cffi versions auto-picked +- Both backends use consistent browser identity +- `fake_useragent` removed from all source files and `pyproject.toml` +- Accounts get a stable `"@chrome"` / `"@safari"` etc. stored in DB at creation time + +## Context (from discovery) + +- **Files involved:** `twscrape/http.py`, `twscrape/account.py`, `twscrape/accounts_pool.py`, + `twscrape/xclid.py`, `pyproject.toml`, `tests/test_http.py` +- **`fake_useragent` used in:** + - `accounts_pool.py:7` — import; `line 96` — `UserAgent().safari` in `add_account` default; + `line 213` — `UserAgent().safari` inlined in raw SQL inside `relogin` + - `xclid.py:10` — import; `line 17` — `UserAgent().chrome` in `_make_client` +- **curl_cffi 0.11.4 `BrowserType` desktop profiles:** `chrome99`…`chrome136` (note `chrome133a` + variant), `firefox133`, `firefox135`, `safari15_3`, `safari15_5`, `safari17_0`, `safari18_0`, + `edge99`, `edge101`; excluded: `*_android`, `*_ios`, `tor145` + +## Development Approach + +- **Testing approach:** Regular (code → tests) +- Complete each task fully before moving to the next; all tests must pass + +## Solution Overview + +**Single resolution point:** `Account.make_client()` and `xclid._make_client()` call +`_resolve_browser(hint)` and pass already-resolved values to `make_client()`. +`CurlClient` and `HttpxClient` receive final values; no resolution inside constructors. + +``` +hint: "@chrome" | "@safari" | "@firefox" | "@edge" | real-UA-string + └→ family: "chrome" | "safari" | "firefox" | "edge" + ├─ _latest_profile(family) → impersonate_profile (e.g. "chrome136") + └─ _ua_for_profile(profile) → ua_string +``` + +`_pick_browser_hint()` (used by `add_account` default): random family by weight +`[chrome 60%, safari 20%, firefox 15%, edge 5%]`, returns `"@chrome"` etc. — called once at +account creation so each account has a **stable** browser identity in the DB. + +**HttpxClient:** receives `headers` with resolved `ua_string`; `impersonate` dropped in +`make_client()` — not passed to `HttpxClient` at all. + +**CurlClient:** receives `impersonate` profile directly; `user-agent` key stripped from headers +copy (not mutating caller dict) so curl_cffi sets its own matching UA automatically. + +## Technical Details + +```python +_BROWSER_FAMILIES = {"chrome", "safari", "firefox", "edge"} + +_BROWSER_WEIGHTS = [("chrome", 60), ("safari", 20), ("firefox", 15), ("edge", 5)] + +def _pick_browser_hint() -> str: + """Return a random "@" hint using browser market-share weights.""" + import random + families, weights = zip(*_BROWSER_WEIGHTS) + return "@" + random.choices(families, weights=weights)[0] + +def _latest_profile(family: str) -> str: + """Find the highest-versioned desktop BrowserType entry for family. + + Uses strict allow-list regex ^\\d+(_\\d+)?$ to reject: + - chrome133a (alpha/beta suffix) + - chrome131_android, safari17_2_ios (mobile) + - tor145 (different family) + """ + import re + from curl_cffi.requests import BrowserType + pattern = re.compile(rf"^{family}\d+(_\d+)?$") + candidates = [name for name in dir(BrowserType) if pattern.match(name)] + if not candidates: + raise ValueError(f"No BrowserType entry found for family {family!r}") + # sort by numeric components: safari18_0 > safari17_0 > safari15_5 + candidates.sort(key=lambda s: [int(x) for x in re.findall(r"\d+", s)]) + return candidates[-1] + +# UA templates — version number extracted from profile name and injected. +# Platform: Windows for Chrome/Firefox/Edge, macOS for Safari. +def _ua_for_profile(profile: str) -> str: + """Return a matching User-Agent string for the given impersonate profile. + + profile examples: "chrome136", "safari18_0", "firefox135", "edge101" + """ + import re + nums = re.findall(r"\d+", profile) + major = nums[0] # e.g. "136" from "chrome136", "18" from "safari18_0" + if profile.startswith("chrome"): + return (f"Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + f"AppleWebKit/537.36 (KHTML, like Gecko) " + f"Chrome/{major}.0.0.0 Safari/537.36") + if profile.startswith("edge"): + return (f"Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + f"AppleWebKit/537.36 (KHTML, like Gecko) " + f"Chrome/{major}.0.0.0 Safari/537.36 Edg/{major}.0.0.0") + if profile.startswith("firefox"): + return (f"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:{major}.0) " + f"Gecko/20100101 Firefox/{major}.0") + if profile.startswith("safari"): + minor = nums[1] if len(nums) > 1 else "0" + wk = "605.1.15" + return (f"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " + f"AppleWebKit/{wk} (KHTML, like Gecko) " + f"Version/{major}.{minor} Safari/{wk}") + raise ValueError(f"Unknown profile {profile!r}") + +# Family detection from a real UA string (for existing DB rows with fake_useragent UA). +# Order matters: check Edge before Chrome (Edge UA contains "Chrome/"). +_UA_FAMILY_RULES = [ + ("Edg/", "edge"), + ("Firefox/", "firefox"), + ("Chrome/", "chrome"), + ("Safari/", "safari"), # Safari UA does NOT contain "Chrome/" +] + +def _detect_family(ua: str) -> str: + for marker, family in _UA_FAMILY_RULES: + if marker in ua: + return family + return "chrome" # fallback + +def _resolve_browser(hint: str | None) -> tuple[str, str]: + """Return (ua_string, impersonate_profile) for the given hint. + + hint=None or "@chrome"/"@safari"/"@firefox"/"@edge" → use that family. + hint=real UA string → detect family, use latest profile for it. + Unknown "@xxx" hint → fallback to "@chrome". + """ + if hint is None: + hint = "@chrome" + if hint.startswith("@"): + family = hint[1:].lower() + if family not in _BROWSER_FAMILIES: + family = "chrome" + else: + family = _detect_family(hint) + profile = _latest_profile(family) + ua = _ua_for_profile(profile) + return ua, profile +``` + +`make_client()` updated signature: +```python +def make_client(backend=None, *, proxy=None, headers=None, cookies=None, + impersonate: str | None = None) -> HttpClient: + ... + if backend == "curl": + return CurlClient(proxy=proxy, headers=headers, cookies=cookies, + impersonate=impersonate) + if backend == "httpx": + # impersonate silently dropped; httpx has no TLS fingerprinting + return HttpxClient(proxy=proxy, headers=headers, cookies=cookies) +``` + +`CurlClient.__init__` strips `user-agent` from a **copy** of headers: +```python +safe_headers = {k: v for k, v in (headers or {}).items() + if k.lower() != "user-agent"} +self._session = AsyncSession( + impersonate=impersonate, proxy=proxy, allow_redirects=True, + headers=safe_headers, +) +``` + +## What Goes Where + +**Implementation Steps** — all code changes and their tests. + +**Post-Completion** — real login flow verification with accounts carrying old +`fake_useragent`-generated UA strings already in the DB. + +## Implementation Steps + +### Task 1: Core helpers in `http.py` + +**Files:** +- Modify: `twscrape/http.py` +- Modify: `tests/test_http.py` + +- [ ] add `_BROWSER_FAMILIES`, `_BROWSER_WEIGHTS`, `_UA_FAMILY_RULES` constants +- [ ] implement `_pick_browser_hint()` using `random.choices` +- [ ] implement `_latest_profile(family)` with strict allow-list regex (see Technical Details) +- [ ] implement `_ua_for_profile(profile)` with template per family (see Technical Details) +- [ ] implement `_detect_family(ua)` using `_UA_FAMILY_RULES` ordered detection +- [ ] implement `_resolve_browser(hint)` composing the above +- [ ] add `impersonate: str | None = None` to `make_client()` signature; pass to `CurlClient`; + silently drop for `HttpxClient` +- [ ] tests: `_latest_profile("chrome")` returns a `chrome*` member of `BrowserType` +- [ ] tests: `_latest_profile("safari")` returns `safari18_0` (current latest desktop) +- [ ] tests: `_latest_profile` excludes `chrome133a`, `chrome131_android`, `safari17_2_ios` +- [ ] tests: `_ua_for_profile` produces correct UA for chrome, safari, firefox, edge profiles +- [ ] tests: `_detect_family` correctly identifies Edge/Firefox/Chrome/Safari + fallback +- [ ] tests: `_resolve_browser` for all hint variants (None, @chrome, @safari, real UA, unknown @hint) +- [ ] tests: `_pick_browser_hint` with monkeypatched `random.choices` returns `"@"` form +- [ ] run tests — must pass before task 2 + +### Task 2: Update `CurlClient` to use `impersonate` param + +**Files:** +- Modify: `twscrape/http.py` +- Modify: `tests/test_http.py` + +- [ ] add `impersonate: str | None = None` to `CurlClient.__init__` +- [ ] strip `user-agent` from a copy of `headers` (do not mutate caller's dict) +- [ ] pass `impersonate` to `AsyncSession(impersonate=impersonate, ...)` +- [ ] remove `_CURL_IMPERSONATE` constant usage (already replaced in Task 1) +- [ ] update existing `test_curl_client_*` tests to pass an explicit `impersonate` or verify + the existing auto-resolution path no longer lives in `CurlClient` +- [ ] test: `CurlClient(impersonate="chrome136")` passes `"chrome136"` to `AsyncSession` +- [ ] test: headers dict passed to `CurlClient` with `user-agent` key is not mutated by caller +- [ ] run tests — must pass before task 3 + +### Task 3: Update `Account.make_client()` — resolution entry point + +**Files:** +- Modify: `twscrape/account.py` +- Modify: `tests/test_http.py` or `tests/test_queue_client.py` + +- [ ] import `_resolve_browser` from `.http` +- [ ] call `ua_string, impersonate = _resolve_browser(self.user_agent)` at top of `make_client` +- [ ] set `headers["user-agent"] = ua_string` (for httpx backend to use) +- [ ] pass `impersonate=impersonate` to `_make_http_client(...)` +- [ ] test: account with `user_agent="@safari"` → `make_client` creates a `CurlClient` with + `impersonate` matching `safari*` +- [ ] test: account with `user_agent="@chrome"` → `HttpxClient` headers contain a Chrome UA string +- [ ] test: account with old `fake_useragent`-style Safari UA string → resolves to `safari*` profile +- [ ] run tests — must pass before task 4 + +### Task 4: Update `AccountsPool` — remove `fake_useragent` + +**Files:** +- Modify: `twscrape/accounts_pool.py` + +- [ ] import `_pick_browser_hint` from `.http` +- [ ] in `add_account`: replace `user_agent or UserAgent().safari` with + `user_agent or _pick_browser_hint()` +- [ ] in `relogin` SQL (line 213): replace `"{UserAgent().safari}"` with `"@chrome"` + (reset accounts to explicit Chrome; no runtime call inside SQL string) +- [ ] remove `from fake_useragent import UserAgent` from `accounts_pool.py` +- [ ] test: `add_account(user_agent=None)` stores a `"@"` meta-string (not a real UA) +- [ ] test: `add_account(user_agent="@safari")` stores `"@safari"` verbatim +- [ ] run tests — must pass before task 5 + +### Task 5: Update `xclid.py` and remove `fake_useragent` dependency + +**Files:** +- Modify: `twscrape/xclid.py` +- Modify: `pyproject.toml` + +- [ ] replace `headers={"user-agent": UserAgent().chrome}` in `xclid._make_client()` with + `headers={"user-agent": "@chrome"}` — preserves explicit Chrome intent +- [ ] remove `from fake_useragent import UserAgent` from `xclid.py` +- [ ] remove `fake-useragent>=1.4.0` from `dependencies` in `pyproject.toml` +- [ ] run `uv sync` to verify dependency graph is clean +- [ ] verify: `grep -r fake_useragent .` (excluding `.venv`) returns no matches +- [ ] run full test suite — must pass + +### Task 6: Verify acceptance criteria + +- [ ] `_latest_profile("chrome")` returns the highest-versioned `chrome\d+` entry in current curl_cffi +- [ ] `"@safari"` → `CurlClient` impersonates a `safari*` profile; `HttpxClient` headers contain Safari UA +- [ ] real Chrome UA → both clients resolve to `chrome*` profile / Chrome UA +- [ ] `add_account(user_agent=None)` stores `"@"`, not a real UA string +- [ ] no `fake_useragent` import anywhere in `twscrape/` +- [ ] run `uv run pytest` — all tests pass + +### Task 7: [Final] Tidy up + +- [ ] document `@` meta-string convention in `CLAUDE.md` +- [ ] move this plan to `docs/plans/completed/` + +## Post-Completion + +**Manual verification:** +- Run a real login/request flow with an account that has an old `fake_useragent`-generated UA + string in the DB (e.g. `"Mozilla/5.0 ... Safari/605.1.15 ..."`); confirm `_resolve_browser` + falls back correctly and the request succeeds +- Confirm with debug logging that `CurlClient` sends the correct UA for its impersonated profile + (not any `@chrome` literal or old Safari string) diff --git a/scripts/update_gql_ops.py b/scripts/update_gql_ops.py index e6cae5b4..999ec425 100644 --- a/scripts/update_gql_ops.py +++ b/scripts/update_gql_ops.py @@ -13,7 +13,7 @@ import sys from typing import Any -from twscrape.http import make_client +from twscrape.http import HttpClient, make_client from twscrape.xclid import get_scripts_list, get_tw_page_text, script_url API_FILE = "twscrape/api.py" @@ -50,7 +50,7 @@ async def fetch_scripts(scripts: list[tuple[str, str]], force: bool) -> None: print(f"Downloading {len(todo)} scripts.") sem = asyncio.Semaphore(10) - async def fetch(clt: Any, i: int, url: str, path: str) -> None: + async def fetch(clt: HttpClient, i: int, url: str, path: str) -> None: async with sem: print(f" ({i:3d}/{len(todo):3d}) {url}") rep = await clt.get(url) diff --git a/tests/test_http.py b/tests/test_http.py index 08f2be83..82d98f4c 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -9,36 +9,19 @@ HttpStatusError, NetworkError, Response, + _CURL_MAX_RETRIES, _detect_backend, make_client, ) - -def _mock_response(status_code=200, text="ok", json_data=None, headers=None): - rep = MagicMock() - rep.status_code = status_code - rep.text = text - rep.content = text.encode() - rep.headers = headers or {} - rep.url = "https://example.com" - rep.request = MagicMock() - rep.request.method = "GET" - rep.request.url = "https://example.com" - rep.json.return_value = json_data or {} - - if status_code >= 400: - rep.raise_for_status.side_effect = Exception(f"HTTP {status_code}") - else: - rep.raise_for_status.return_value = None - - return rep +from .mock_http import _raw # --- Response wrapper --- def test_response_delegates_attributes(): - raw = _mock_response(200, "hello", {"key": "val"}) + raw = _raw(status_code=200, text="hello", json_data={"key": "val"}) rep = Response(raw) assert rep.status_code == 200 assert rep.text == "hello" @@ -47,12 +30,12 @@ def test_response_delegates_attributes(): def test_response_raise_for_status_ok(): - rep = Response(_mock_response(200)) + rep = Response(_raw(status_code=200)) rep.raise_for_status() # should not raise def test_response_raise_for_status_error(): - rep = Response(_mock_response(403)) + rep = Response(_raw(status_code=403, text="ok")) with pytest.raises(HttpStatusError) as exc_info: rep.raise_for_status() err = exc_info.value @@ -62,7 +45,7 @@ def test_response_raise_for_status_error(): def test_response_allows_setattr(): - rep = Response(_mock_response(200)) + rep = Response(_raw(status_code=200)) setattr(rep, "__username", "alice") assert getattr(rep, "__username") == "alice" @@ -77,7 +60,7 @@ def test_exception_hierarchy(): def test_http_status_error_carries_response(): - raw = _mock_response(500, "server error") + raw = _raw(status_code=500, text="server error") resp = Response(raw) err = HttpStatusError("fail", response=resp) assert err.response is resp @@ -142,7 +125,7 @@ class MinimalClient(HttpClient): closed = False async def request(self, method, url, **kwargs): - return Response(_mock_response()) + return Response(_raw()) async def aclose(self): self.closed = True @@ -387,6 +370,26 @@ async def test_curl_client_cookies_and_headers(): await client.aclose() +async def test_curl_client_retries_network_error(): + from unittest.mock import AsyncMock, patch + + from curl_cffi.const import CurlECode + from curl_cffi.requests.errors import RequestsError + + from twscrape.http import CurlClient + + client = CurlClient() + err = RequestsError("timeout", code=CurlECode(28)) + mock_req = AsyncMock(side_effect=err) + with ( + patch.object(client._session, "request", mock_req), + pytest.raises(NetworkError), + ): + await client.get("https://example.com") + assert mock_req.call_count == _CURL_MAX_RETRIES + 1 + await client.aclose() + + async def test_curl_client_non_curl_error_propagates(): from unittest.mock import AsyncMock, patch @@ -405,7 +408,7 @@ async def test_curl_client_non_curl_error_propagates(): async def test_http_client_post_delegates_to_request(): - rep_mock = Response(_mock_response(201)) + rep_mock = Response(_raw(status_code=201)) class PostableClient(HttpClient): async def request(self, method, url, **kwargs): @@ -432,8 +435,16 @@ def headers(self): def test_response_content_and_headers(): - raw = _mock_response(200, "body", headers={"x-custom": "val"}) + raw = _raw(status_code=200, text="body", headers={"x-custom": "val"}) rep = Response(raw) assert rep.content == b"body" assert rep.headers == {"x-custom": "val"} - assert rep.url == "https://example.com" + assert rep.url == "https://mock.local" + + +def test_response_json_is_cached(): + raw = _raw(status_code=200, json_data={"x": 1}) + rep = Response(raw) + _ = rep.json() + _ = rep.json() + assert raw.json.call_count == 1 diff --git a/twscrape/http.py b/twscrape/http.py index 7457b0e6..522b9074 100644 --- a/twscrape/http.py +++ b/twscrape/http.py @@ -2,7 +2,11 @@ import os from typing import Any, Literal -HttpMethod = Literal["GET", "POST", "PUT", "DELETE", "OPTIONS", "HEAD", "TRACE", "PATCH", "QUERY"] +HttpMethod = Literal["GET", "POST", "PUT", "DELETE", "OPTIONS", "HEAD", "TRACE", "PATCH"] + +_UNSET = object() +_CURL_IMPERSONATE = "chrome" +_CURL_MAX_RETRIES = 3 class Response: @@ -10,6 +14,7 @@ class Response: def __init__(self, rep: Any): self._rep = rep + self._json: Any = _UNSET @property def status_code(self) -> int: @@ -36,7 +41,9 @@ def request(self) -> Any: return self._rep.request def json(self) -> Any: - return self._rep.json() + if self._json is _UNSET: + self._json = self._rep.json() + return self._json def raise_for_status(self) -> None: try: @@ -132,7 +139,7 @@ def __init__( from curl_cffi.requests import AsyncSession self._session = AsyncSession( - impersonate="chrome", proxy=proxy, allow_redirects=True, headers=headers or {} + impersonate=_CURL_IMPERSONATE, proxy=proxy, allow_redirects=True, headers=headers or {} ) if cookies: self._session.cookies.update(cookies) @@ -146,7 +153,13 @@ def headers(self) -> Any: return self._session.headers async def request(self, method: HttpMethod, url: str, **kwargs) -> Response: - return await self._wrap(self._session.request(method, url, **kwargs)) + last_err: Exception | None = None + for _ in range(_CURL_MAX_RETRIES + 1): + try: + return await self._wrap(self._session.request(method, url, **kwargs)) + except NetworkError as e: + last_err = e + raise last_err # type: ignore[misc] async def aclose(self) -> None: await self._session.close() From 50440649703fb59a2daedc4fe558390362a33d9d Mon Sep 17 00:00:00 2001 From: vladkens Date: Fri, 22 May 2026 20:42:15 +0300 Subject: [PATCH 7/9] feat: browser impersonation & deterministic UA resolution --- scripts/update_gql_ops.py | 1 - tests/test_http.py | 147 +++++++++++++++++++++----------------- twscrape/account.py | 4 +- twscrape/accounts_pool.py | 6 +- twscrape/http.py | 50 +++++++++++-- twscrape/xclid.py | 3 +- 6 files changed, 132 insertions(+), 79 deletions(-) diff --git a/scripts/update_gql_ops.py b/scripts/update_gql_ops.py index 999ec425..0861fd92 100644 --- a/scripts/update_gql_ops.py +++ b/scripts/update_gql_ops.py @@ -11,7 +11,6 @@ import os import re import sys -from typing import Any from twscrape.http import HttpClient, make_client from twscrape.xclid import get_scripts_list, get_tw_page_text, script_url diff --git a/tests/test_http.py b/tests/test_http.py index 82d98f4c..546c24b8 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -1,22 +1,27 @@ -from unittest.mock import MagicMock, patch +from unittest.mock import AsyncMock, MagicMock, patch +import httpx import pytest +from curl_cffi.const import CurlECode +from curl_cffi.requests.errors import RequestsError from twscrape.http import ( + _CURL_MAX_RETRIES, ConnectError, + CurlClient, HttpClient, HttpError, HttpStatusError, + HttpxClient, NetworkError, Response, - _CURL_MAX_RETRIES, _detect_backend, + _resolve_browser, make_client, ) from .mock_http import _raw - # --- Response wrapper --- @@ -149,15 +154,11 @@ def headers(self): def test_make_client_httpx_returns_httpx_client(monkeypatch): monkeypatch.delenv("TWS_HTTP_BACKEND", raising=False) - from twscrape.http import HttpxClient - client = make_client("httpx") assert isinstance(client, HttpxClient) async def test_httpx_client_cookies_and_headers_are_mutable(): - from twscrape.http import HttpxClient - client = HttpxClient(headers={"x-foo": "bar"}, cookies={"ct0": "token"}) # headers support __setitem__ client.headers["x-new"] = "value" @@ -167,12 +168,6 @@ async def test_httpx_client_cookies_and_headers_are_mutable(): async def test_httpx_client_maps_network_errors(): - from unittest.mock import AsyncMock, patch - - import httpx - - from twscrape.http import HttpxClient - client = HttpxClient() with ( patch.object( @@ -185,12 +180,6 @@ async def test_httpx_client_maps_network_errors(): async def test_httpx_client_maps_connect_errors(): - from unittest.mock import AsyncMock, patch - - import httpx - - from twscrape.http import HttpxClient - client = HttpxClient() with ( patch.object( @@ -203,11 +192,6 @@ async def test_httpx_client_maps_connect_errors(): async def test_httpx_client_returns_response_wrapper(): - from unittest.mock import AsyncMock, patch - - import httpx - - from twscrape.http import HttpxClient raw = httpx.Response( 200, json={"ok": True}, request=httpx.Request("GET", "https://example.com") @@ -222,11 +206,6 @@ async def test_httpx_client_returns_response_wrapper(): async def test_httpx_client_maps_connect_timeout(): - from unittest.mock import AsyncMock, patch - - import httpx - - from twscrape.http import HttpxClient client = HttpxClient() with ( @@ -240,11 +219,6 @@ async def test_httpx_client_maps_connect_timeout(): async def test_httpx_client_maps_write_and_pool_timeout(): - from unittest.mock import AsyncMock, patch - - import httpx - - from twscrape.http import HttpxClient for exc_cls in (httpx.WriteTimeout, httpx.PoolTimeout, httpx.ProxyError): client = HttpxClient() @@ -276,7 +250,6 @@ def test_detect_backend_env_curl_installed(monkeypatch): def test_make_client_curl_returns_curl_client(): - from twscrape.http import CurlClient client = make_client("curl") assert isinstance(client, CurlClient) @@ -289,7 +262,6 @@ def test_make_client_unknown_backend_raises(): def test_make_client_none_uses_auto_detect(monkeypatch): monkeypatch.delenv("TWS_HTTP_BACKEND", raising=False) - from twscrape.http import CurlClient client = make_client(None) assert isinstance(client, CurlClient) @@ -299,9 +271,6 @@ def test_make_client_none_uses_auto_detect(monkeypatch): async def test_curl_client_returns_response_wrapper(): - from unittest.mock import AsyncMock, MagicMock, patch - - from twscrape.http import CurlClient raw = MagicMock() raw.status_code = 200 @@ -324,12 +293,6 @@ async def test_curl_client_returns_response_wrapper(): async def test_curl_client_connect_error_codes(): - from unittest.mock import AsyncMock, patch - - from curl_cffi.const import CurlECode - from curl_cffi.requests.errors import RequestsError - - from twscrape.http import CurlClient for code in (CurlECode(5), CurlECode(6), CurlECode(7)): client = CurlClient() @@ -343,13 +306,6 @@ async def test_curl_client_connect_error_codes(): async def test_curl_client_network_error(): - from unittest.mock import AsyncMock, patch - - from curl_cffi.const import CurlECode - from curl_cffi.requests.errors import RequestsError - - from twscrape.http import CurlClient - client = CurlClient() err = RequestsError("operation timed out", code=CurlECode(28)) @@ -362,7 +318,6 @@ async def test_curl_client_network_error(): async def test_curl_client_cookies_and_headers(): - from twscrape.http import CurlClient client = CurlClient(headers={"x-foo": "bar"}, cookies={"ct0": "token"}) assert "ct0" in client.cookies @@ -371,13 +326,6 @@ async def test_curl_client_cookies_and_headers(): async def test_curl_client_retries_network_error(): - from unittest.mock import AsyncMock, patch - - from curl_cffi.const import CurlECode - from curl_cffi.requests.errors import RequestsError - - from twscrape.http import CurlClient - client = CurlClient() err = RequestsError("timeout", code=CurlECode(28)) mock_req = AsyncMock(side_effect=err) @@ -391,10 +339,6 @@ async def test_curl_client_retries_network_error(): async def test_curl_client_non_curl_error_propagates(): - from unittest.mock import AsyncMock, patch - - from twscrape.http import CurlClient - client = CurlClient() with ( patch.object(client._session, "request", AsyncMock(side_effect=ValueError("unexpected"))), @@ -448,3 +392,78 @@ def test_response_json_is_cached(): _ = rep.json() _ = rep.json() assert raw.json.call_count == 1 + + +# --- Browser resolution helpers --- + + +def test_resolve_browser_none_returns_chrome(): + ua, family = _resolve_browser(None) + assert family == "chrome" + assert len(ua) > 10 + + +def test_resolve_browser_at_safari(): + ua, family = _resolve_browser("@safari") + assert family == "safari" + assert len(ua) > 10 + + +def test_resolve_browser_at_firefox(): + ua, family = _resolve_browser("@firefox") + assert family == "firefox" + + +def test_resolve_browser_at_edge(): + _, family = _resolve_browser("@edge") + assert family == "edge" + + +def test_resolve_browser_unknown_at_hint_falls_back_to_chrome(): + _, family = _resolve_browser("@netscape") + assert family == "chrome" + + +def test_resolve_browser_real_ua_passes_through(): + old_ua = "Mozilla/5.0 (Macintosh) AppleWebKit/605.1.15 Version/17.0 Safari/605.1.15" + ua, family = _resolve_browser(old_ua) + assert ua == old_ua + assert family == "chrome" + + +# --- CurlClient: impersonate param and header safety --- + + +def test_curl_client_resolves_impersonate_from_ua_hint(): + client = CurlClient(headers={"user-agent": "@safari"}) + assert client._session.impersonate == "safari" + + +def test_curl_client_resolves_impersonate_from_real_ua(): + chrome_ua = "Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 Chrome/136.0.0.0 Safari/537.36" + client = CurlClient(headers={"user-agent": chrome_ua}) + assert client._session.impersonate == "chrome" + + +def test_curl_client_does_not_mutate_caller_headers(): + + original = {"user-agent": "@chrome", "x-foo": "bar"} + headers_copy = dict(original) + CurlClient(headers=headers_copy) + assert headers_copy == original # caller's dict unchanged + + +def test_curl_client_strips_user_agent_from_session(): + + client = CurlClient(headers={"user-agent": "@chrome", "x-foo": "bar"}) + session_headers = dict(client._session.headers) + assert "user-agent" not in {k.lower() for k in session_headers} + + +def test_httpx_client_resolves_ua_hint_to_real_string(): + from twscrape.http import HttpxClient + + client = HttpxClient(headers={"user-agent": "@firefox"}) + ua = dict(client._client.headers).get("user-agent", "") + assert "Firefox/" in ua + assert "@" not in ua diff --git a/twscrape/account.py b/twscrape/account.py index 28da25de..51721f1a 100644 --- a/twscrape/account.py +++ b/twscrape/account.py @@ -1,3 +1,4 @@ +import hashlib import json import os import sqlite3 @@ -64,4 +65,5 @@ def make_client(self, proxy: str | None = None) -> HttpClient: if "ct0" in self.cookies: headers["x-csrf-token"] = self.cookies["ct0"] - return _make_http_client(proxy=proxy, headers=headers, cookies=self.cookies) + seed = int(hashlib.sha256(self.username.encode()).hexdigest()[:8], 16) + return _make_http_client(proxy=proxy, headers=headers, cookies=self.cookies, seed=seed) diff --git a/twscrape/accounts_pool.py b/twscrape/accounts_pool.py index 5a19832e..f12e228b 100644 --- a/twscrape/accounts_pool.py +++ b/twscrape/accounts_pool.py @@ -4,8 +4,6 @@ from datetime import datetime, timezone from typing import TypedDict -from fake_useragent import UserAgent - from .account import Account from .db import execute, fetchall, fetchone from .http import HttpStatusError @@ -93,7 +91,7 @@ async def add_account( password=password, email=email, email_password=email_password, - user_agent=user_agent or UserAgent().safari, + user_agent=user_agent or "@chrome", active=False, locks={}, stats={}, @@ -210,7 +208,7 @@ async def relogin(self, usernames: str | list[str]): error_msg = NULL, headers = json_object(), cookies = json_object(), - user_agent = "{UserAgent().safari}" + user_agent = "@chrome" WHERE username IN ({",".join([f'"{x}"' for x in usernames])}) """ diff --git a/twscrape/http.py b/twscrape/http.py index 522b9074..078ffa24 100644 --- a/twscrape/http.py +++ b/twscrape/http.py @@ -1,13 +1,34 @@ import importlib.util import os -from typing import Any, Literal +import random +from typing import Any, Literal, cast + +from fake_useragent import UserAgent HttpMethod = Literal["GET", "POST", "PUT", "DELETE", "OPTIONS", "HEAD", "TRACE", "PATCH"] _UNSET = object() -_CURL_IMPERSONATE = "chrome" _CURL_MAX_RETRIES = 3 +# https://curl-impersonate.readthedocs.io/en/latest/fingerprints.html +_BROWSER_FAMILIES = {"chrome", "safari", "firefox", "edge"} + +_ua = UserAgent() + + +def _resolve_browser(hint: str | None, seed: int | None = None) -> tuple[str, str]: + """Return (ua_string, family) for a @hint or a real UA string.""" + if hint and not hint.startswith("@"): + return hint, "chrome" + family = hint[1:].lower() if hint else "chrome" + if family not in _BROWSER_FAMILIES: + family = "chrome" + if seed is not None: + uas = [x["useragent"] for x in _ua.data_browsers if family in (x["browser"] or "").lower()] + if uas: + return random.Random(seed).choice(uas), family + return getattr(_ua, family, _ua.chrome), family + class Response: """Thin wrapper around httpx.Response or curl_cffi.Response.""" @@ -93,18 +114,26 @@ def headers(self) -> Any: ... class HttpxClient(HttpClient): def __init__( - self, *, proxy: str | None = None, headers: dict | None = None, cookies: dict | None = None + self, + *, + proxy: str | None = None, + headers: dict | None = None, + cookies: dict | None = None, + seed: int | None = None, ): import httpx from httpx import AsyncHTTPTransport self._httpx = httpx transport = AsyncHTTPTransport(retries=3) + resolved_headers = dict(headers or {}) + ua_string, _ = _resolve_browser(resolved_headers.get("user-agent"), seed=seed) + resolved_headers["user-agent"] = ua_string self._client = httpx.AsyncClient( proxy=proxy, follow_redirects=True, transport=transport, - headers=headers or {}, + headers=resolved_headers, cookies=cookies or {}, ) @@ -136,10 +165,16 @@ class CurlClient(HttpClient): def __init__( self, *, proxy: str | None = None, headers: dict | None = None, cookies: dict | None = None ): - from curl_cffi.requests import AsyncSession + from curl_cffi.requests import AsyncSession, BrowserTypeLiteral + _, family = _resolve_browser((headers or {}).get("user-agent")) + # strip user-agent — curl_cffi sets its own UA for the impersonated profile + safe_headers = {k: v for k, v in (headers or {}).items() if k.lower() != "user-agent"} self._session = AsyncSession( - impersonate=_CURL_IMPERSONATE, proxy=proxy, allow_redirects=True, headers=headers or {} + impersonate=cast(BrowserTypeLiteral, family), + proxy=proxy, + allow_redirects=True, + headers=safe_headers, ) if cookies: self._session.cookies.update(cookies) @@ -215,6 +250,7 @@ def make_client( proxy: str | None = None, headers: dict | None = None, cookies: dict | None = None, + seed: int | None = None, ) -> HttpClient: if backend is None: backend = _detect_backend() @@ -222,6 +258,6 @@ def make_client( if backend == "curl": return CurlClient(proxy=proxy, headers=headers, cookies=cookies) if backend == "httpx": - return HttpxClient(proxy=proxy, headers=headers, cookies=cookies) + return HttpxClient(proxy=proxy, headers=headers, cookies=cookies, seed=seed) raise ValueError(f"Unknown backend: {backend!r}. Expected 'curl' or 'httpx'.") diff --git a/twscrape/xclid.py b/twscrape/xclid.py index 9c523755..32f1eb70 100644 --- a/twscrape/xclid.py +++ b/twscrape/xclid.py @@ -7,14 +7,13 @@ from typing import Iterator import bs4 -from fake_useragent import UserAgent from .http import HttpClient from .http import make_client as _make_http_client def _make_client() -> HttpClient: - return _make_http_client(headers={"user-agent": UserAgent().chrome}) + return _make_http_client(headers={"user-agent": "@chrome"}) async def get_tw_page_text(url: str, clt: HttpClient): From 0fb2bf1dc7ab0d0a07e1788b282bf5c823f20531 Mon Sep 17 00:00:00 2001 From: vladkens Date: Fri, 22 May 2026 21:00:37 +0300 Subject: [PATCH 8/9] release: v0.18.0 --- .github/workflows/ci.yml | 5 ++--- changelog.md | 25 +++++++++++++++++++++++++ pyproject.toml | 2 +- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5412b60d..95e37d30 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,6 +27,7 @@ jobs: needs: test permissions: contents: write + id-token: write steps: - uses: actions/checkout@v6 - uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 @@ -52,9 +53,7 @@ jobs: fi echo "$NOTES" > release_notes.txt - - run: uv publish - env: - UV_PUBLISH_TOKEN: ${{ secrets.PYPI_API_TOKEN }} + - uses: pypa/gh-action-pypi-publish@release/v1 - run: gh release create ${{ github.ref_name }} --title ${{ github.ref_name }} --notes-file release_notes.txt env: diff --git a/changelog.md b/changelog.md index 63d1da93..f1b4cafa 100644 --- a/changelog.md +++ b/changelog.md @@ -1,3 +1,28 @@ +## v0.18.0 – 2026-05-22 + +### Breaking Changes +- Removed `user_by_id` API as X/Twitter no longer supports this endpoint + +### Features +- Added `add_cookie` CLI command (#301, by @sakhnenkoff) +- Added API for fetching all tweets in a conversation thread (#252, by @Khanzadeh-AH) +- Added community scraping support (#275) +- Added `list_members` API for retrieving Twitter list members +- Added new fields to `Tweet` model (#279) +- Added user `about` info field (#277, by @terencedignon) + +### Fixes +- Restored scraping compatibility after X platform changes in May 2026 (#306, #307, by @mar0ls) +- Fixed JS bundle parsing for `x-client-transaction-id` generation (#303, by @Flaburgan) +- Fixed HTTP client not being properly closed, resolving resource warnings (#304, by @Flaburgan) +- Fixed pagination to continue past empty pages (#265, #247) +- Improved robustness of GQL pagination handling +- Improved proxy handling and `xclid` calculation + +**Full Changelog**: https://github.com/vladkens/twscrape/compare/v0.17.0...v0.18.0 + +--- + ## v0.17.0 – 2025-04-29 ### Fixes diff --git a/pyproject.toml b/pyproject.toml index db4c3171..17fffaab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "twscrape" -version = "0.17.0" +version = "0.18.0" authors = [{ name = "vladkens", email = "v.pronsky@gmail.com" }] description = "Twitter GraphQL and Search API implementation with SNScrape data models" readme = "readme.md" From 2419c6ee911103b2456e4f705d2e0787611caf03 Mon Sep 17 00:00:00 2001 From: vladkens Date: Fri, 22 May 2026 22:28:02 +0300 Subject: [PATCH 9/9] fix --- .github/workflows/pr.yml | 4 + docs/plans/20260522-browser-impersonate.md | 294 --------------------- twscrape/http.py | 6 +- uv.lock | 2 +- 4 files changed, 7 insertions(+), 299 deletions(-) delete mode 100644 docs/plans/20260522-browser-impersonate.md diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index 4fbab286..217b4864 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -2,11 +2,14 @@ name: pr on: pull_request: + pull_request_target: + types: [opened, reopened] permissions: read-all jobs: test: + if: github.event_name == 'pull_request' runs-on: ubuntu-latest strategy: matrix: @@ -20,3 +23,4 @@ jobs: - run: uv sync --frozen - run: make check - run: make test + diff --git a/docs/plans/20260522-browser-impersonate.md b/docs/plans/20260522-browser-impersonate.md deleted file mode 100644 index be7ad9f1..00000000 --- a/docs/plans/20260522-browser-impersonate.md +++ /dev/null @@ -1,294 +0,0 @@ -# Browser Impersonate & UA Resolution - -## Overview - -Replace `fake_useragent` dependency and hardcoded `impersonate="chrome"` (not a valid -`BrowserType` value) with a `_resolve_browser(hint)` function that maps meta-strings -(`"@chrome"`, `"@safari"`, `"@firefox"`, `"@edge"`) or real UA strings to a -`(ua_string, impersonate_profile)` pair. - -**Problem it solves:** -- `fake_useragent` is deprecated and will be removed -- `impersonate="chrome"` is not a valid `BrowserType` value — could silently break -- Safari UA (from `fake_useragent`) + Chrome TLS fingerprint = bot-detection red flag -- No way to control the browser profile without patching source - -**Key benefits:** -- `_latest_profile(family)` queries `BrowserType` at runtime → new curl_cffi versions auto-picked -- Both backends use consistent browser identity -- `fake_useragent` removed from all source files and `pyproject.toml` -- Accounts get a stable `"@chrome"` / `"@safari"` etc. stored in DB at creation time - -## Context (from discovery) - -- **Files involved:** `twscrape/http.py`, `twscrape/account.py`, `twscrape/accounts_pool.py`, - `twscrape/xclid.py`, `pyproject.toml`, `tests/test_http.py` -- **`fake_useragent` used in:** - - `accounts_pool.py:7` — import; `line 96` — `UserAgent().safari` in `add_account` default; - `line 213` — `UserAgent().safari` inlined in raw SQL inside `relogin` - - `xclid.py:10` — import; `line 17` — `UserAgent().chrome` in `_make_client` -- **curl_cffi 0.11.4 `BrowserType` desktop profiles:** `chrome99`…`chrome136` (note `chrome133a` - variant), `firefox133`, `firefox135`, `safari15_3`, `safari15_5`, `safari17_0`, `safari18_0`, - `edge99`, `edge101`; excluded: `*_android`, `*_ios`, `tor145` - -## Development Approach - -- **Testing approach:** Regular (code → tests) -- Complete each task fully before moving to the next; all tests must pass - -## Solution Overview - -**Single resolution point:** `Account.make_client()` and `xclid._make_client()` call -`_resolve_browser(hint)` and pass already-resolved values to `make_client()`. -`CurlClient` and `HttpxClient` receive final values; no resolution inside constructors. - -``` -hint: "@chrome" | "@safari" | "@firefox" | "@edge" | real-UA-string - └→ family: "chrome" | "safari" | "firefox" | "edge" - ├─ _latest_profile(family) → impersonate_profile (e.g. "chrome136") - └─ _ua_for_profile(profile) → ua_string -``` - -`_pick_browser_hint()` (used by `add_account` default): random family by weight -`[chrome 60%, safari 20%, firefox 15%, edge 5%]`, returns `"@chrome"` etc. — called once at -account creation so each account has a **stable** browser identity in the DB. - -**HttpxClient:** receives `headers` with resolved `ua_string`; `impersonate` dropped in -`make_client()` — not passed to `HttpxClient` at all. - -**CurlClient:** receives `impersonate` profile directly; `user-agent` key stripped from headers -copy (not mutating caller dict) so curl_cffi sets its own matching UA automatically. - -## Technical Details - -```python -_BROWSER_FAMILIES = {"chrome", "safari", "firefox", "edge"} - -_BROWSER_WEIGHTS = [("chrome", 60), ("safari", 20), ("firefox", 15), ("edge", 5)] - -def _pick_browser_hint() -> str: - """Return a random "@" hint using browser market-share weights.""" - import random - families, weights = zip(*_BROWSER_WEIGHTS) - return "@" + random.choices(families, weights=weights)[0] - -def _latest_profile(family: str) -> str: - """Find the highest-versioned desktop BrowserType entry for family. - - Uses strict allow-list regex ^\\d+(_\\d+)?$ to reject: - - chrome133a (alpha/beta suffix) - - chrome131_android, safari17_2_ios (mobile) - - tor145 (different family) - """ - import re - from curl_cffi.requests import BrowserType - pattern = re.compile(rf"^{family}\d+(_\d+)?$") - candidates = [name for name in dir(BrowserType) if pattern.match(name)] - if not candidates: - raise ValueError(f"No BrowserType entry found for family {family!r}") - # sort by numeric components: safari18_0 > safari17_0 > safari15_5 - candidates.sort(key=lambda s: [int(x) for x in re.findall(r"\d+", s)]) - return candidates[-1] - -# UA templates — version number extracted from profile name and injected. -# Platform: Windows for Chrome/Firefox/Edge, macOS for Safari. -def _ua_for_profile(profile: str) -> str: - """Return a matching User-Agent string for the given impersonate profile. - - profile examples: "chrome136", "safari18_0", "firefox135", "edge101" - """ - import re - nums = re.findall(r"\d+", profile) - major = nums[0] # e.g. "136" from "chrome136", "18" from "safari18_0" - if profile.startswith("chrome"): - return (f"Mozilla/5.0 (Windows NT 10.0; Win64; x64) " - f"AppleWebKit/537.36 (KHTML, like Gecko) " - f"Chrome/{major}.0.0.0 Safari/537.36") - if profile.startswith("edge"): - return (f"Mozilla/5.0 (Windows NT 10.0; Win64; x64) " - f"AppleWebKit/537.36 (KHTML, like Gecko) " - f"Chrome/{major}.0.0.0 Safari/537.36 Edg/{major}.0.0.0") - if profile.startswith("firefox"): - return (f"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:{major}.0) " - f"Gecko/20100101 Firefox/{major}.0") - if profile.startswith("safari"): - minor = nums[1] if len(nums) > 1 else "0" - wk = "605.1.15" - return (f"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " - f"AppleWebKit/{wk} (KHTML, like Gecko) " - f"Version/{major}.{minor} Safari/{wk}") - raise ValueError(f"Unknown profile {profile!r}") - -# Family detection from a real UA string (for existing DB rows with fake_useragent UA). -# Order matters: check Edge before Chrome (Edge UA contains "Chrome/"). -_UA_FAMILY_RULES = [ - ("Edg/", "edge"), - ("Firefox/", "firefox"), - ("Chrome/", "chrome"), - ("Safari/", "safari"), # Safari UA does NOT contain "Chrome/" -] - -def _detect_family(ua: str) -> str: - for marker, family in _UA_FAMILY_RULES: - if marker in ua: - return family - return "chrome" # fallback - -def _resolve_browser(hint: str | None) -> tuple[str, str]: - """Return (ua_string, impersonate_profile) for the given hint. - - hint=None or "@chrome"/"@safari"/"@firefox"/"@edge" → use that family. - hint=real UA string → detect family, use latest profile for it. - Unknown "@xxx" hint → fallback to "@chrome". - """ - if hint is None: - hint = "@chrome" - if hint.startswith("@"): - family = hint[1:].lower() - if family not in _BROWSER_FAMILIES: - family = "chrome" - else: - family = _detect_family(hint) - profile = _latest_profile(family) - ua = _ua_for_profile(profile) - return ua, profile -``` - -`make_client()` updated signature: -```python -def make_client(backend=None, *, proxy=None, headers=None, cookies=None, - impersonate: str | None = None) -> HttpClient: - ... - if backend == "curl": - return CurlClient(proxy=proxy, headers=headers, cookies=cookies, - impersonate=impersonate) - if backend == "httpx": - # impersonate silently dropped; httpx has no TLS fingerprinting - return HttpxClient(proxy=proxy, headers=headers, cookies=cookies) -``` - -`CurlClient.__init__` strips `user-agent` from a **copy** of headers: -```python -safe_headers = {k: v for k, v in (headers or {}).items() - if k.lower() != "user-agent"} -self._session = AsyncSession( - impersonate=impersonate, proxy=proxy, allow_redirects=True, - headers=safe_headers, -) -``` - -## What Goes Where - -**Implementation Steps** — all code changes and their tests. - -**Post-Completion** — real login flow verification with accounts carrying old -`fake_useragent`-generated UA strings already in the DB. - -## Implementation Steps - -### Task 1: Core helpers in `http.py` - -**Files:** -- Modify: `twscrape/http.py` -- Modify: `tests/test_http.py` - -- [ ] add `_BROWSER_FAMILIES`, `_BROWSER_WEIGHTS`, `_UA_FAMILY_RULES` constants -- [ ] implement `_pick_browser_hint()` using `random.choices` -- [ ] implement `_latest_profile(family)` with strict allow-list regex (see Technical Details) -- [ ] implement `_ua_for_profile(profile)` with template per family (see Technical Details) -- [ ] implement `_detect_family(ua)` using `_UA_FAMILY_RULES` ordered detection -- [ ] implement `_resolve_browser(hint)` composing the above -- [ ] add `impersonate: str | None = None` to `make_client()` signature; pass to `CurlClient`; - silently drop for `HttpxClient` -- [ ] tests: `_latest_profile("chrome")` returns a `chrome*` member of `BrowserType` -- [ ] tests: `_latest_profile("safari")` returns `safari18_0` (current latest desktop) -- [ ] tests: `_latest_profile` excludes `chrome133a`, `chrome131_android`, `safari17_2_ios` -- [ ] tests: `_ua_for_profile` produces correct UA for chrome, safari, firefox, edge profiles -- [ ] tests: `_detect_family` correctly identifies Edge/Firefox/Chrome/Safari + fallback -- [ ] tests: `_resolve_browser` for all hint variants (None, @chrome, @safari, real UA, unknown @hint) -- [ ] tests: `_pick_browser_hint` with monkeypatched `random.choices` returns `"@"` form -- [ ] run tests — must pass before task 2 - -### Task 2: Update `CurlClient` to use `impersonate` param - -**Files:** -- Modify: `twscrape/http.py` -- Modify: `tests/test_http.py` - -- [ ] add `impersonate: str | None = None` to `CurlClient.__init__` -- [ ] strip `user-agent` from a copy of `headers` (do not mutate caller's dict) -- [ ] pass `impersonate` to `AsyncSession(impersonate=impersonate, ...)` -- [ ] remove `_CURL_IMPERSONATE` constant usage (already replaced in Task 1) -- [ ] update existing `test_curl_client_*` tests to pass an explicit `impersonate` or verify - the existing auto-resolution path no longer lives in `CurlClient` -- [ ] test: `CurlClient(impersonate="chrome136")` passes `"chrome136"` to `AsyncSession` -- [ ] test: headers dict passed to `CurlClient` with `user-agent` key is not mutated by caller -- [ ] run tests — must pass before task 3 - -### Task 3: Update `Account.make_client()` — resolution entry point - -**Files:** -- Modify: `twscrape/account.py` -- Modify: `tests/test_http.py` or `tests/test_queue_client.py` - -- [ ] import `_resolve_browser` from `.http` -- [ ] call `ua_string, impersonate = _resolve_browser(self.user_agent)` at top of `make_client` -- [ ] set `headers["user-agent"] = ua_string` (for httpx backend to use) -- [ ] pass `impersonate=impersonate` to `_make_http_client(...)` -- [ ] test: account with `user_agent="@safari"` → `make_client` creates a `CurlClient` with - `impersonate` matching `safari*` -- [ ] test: account with `user_agent="@chrome"` → `HttpxClient` headers contain a Chrome UA string -- [ ] test: account with old `fake_useragent`-style Safari UA string → resolves to `safari*` profile -- [ ] run tests — must pass before task 4 - -### Task 4: Update `AccountsPool` — remove `fake_useragent` - -**Files:** -- Modify: `twscrape/accounts_pool.py` - -- [ ] import `_pick_browser_hint` from `.http` -- [ ] in `add_account`: replace `user_agent or UserAgent().safari` with - `user_agent or _pick_browser_hint()` -- [ ] in `relogin` SQL (line 213): replace `"{UserAgent().safari}"` with `"@chrome"` - (reset accounts to explicit Chrome; no runtime call inside SQL string) -- [ ] remove `from fake_useragent import UserAgent` from `accounts_pool.py` -- [ ] test: `add_account(user_agent=None)` stores a `"@"` meta-string (not a real UA) -- [ ] test: `add_account(user_agent="@safari")` stores `"@safari"` verbatim -- [ ] run tests — must pass before task 5 - -### Task 5: Update `xclid.py` and remove `fake_useragent` dependency - -**Files:** -- Modify: `twscrape/xclid.py` -- Modify: `pyproject.toml` - -- [ ] replace `headers={"user-agent": UserAgent().chrome}` in `xclid._make_client()` with - `headers={"user-agent": "@chrome"}` — preserves explicit Chrome intent -- [ ] remove `from fake_useragent import UserAgent` from `xclid.py` -- [ ] remove `fake-useragent>=1.4.0` from `dependencies` in `pyproject.toml` -- [ ] run `uv sync` to verify dependency graph is clean -- [ ] verify: `grep -r fake_useragent .` (excluding `.venv`) returns no matches -- [ ] run full test suite — must pass - -### Task 6: Verify acceptance criteria - -- [ ] `_latest_profile("chrome")` returns the highest-versioned `chrome\d+` entry in current curl_cffi -- [ ] `"@safari"` → `CurlClient` impersonates a `safari*` profile; `HttpxClient` headers contain Safari UA -- [ ] real Chrome UA → both clients resolve to `chrome*` profile / Chrome UA -- [ ] `add_account(user_agent=None)` stores `"@"`, not a real UA string -- [ ] no `fake_useragent` import anywhere in `twscrape/` -- [ ] run `uv run pytest` — all tests pass - -### Task 7: [Final] Tidy up - -- [ ] document `@` meta-string convention in `CLAUDE.md` -- [ ] move this plan to `docs/plans/completed/` - -## Post-Completion - -**Manual verification:** -- Run a real login/request flow with an account that has an old `fake_useragent`-generated UA - string in the DB (e.g. `"Mozilla/5.0 ... Safari/605.1.15 ..."`); confirm `_resolve_browser` - falls back correctly and the request succeeds -- Confirm with debug logging that `CurlClient` sends the correct UA for its impersonated profile - (not any `@chrome` literal or old Safari string) diff --git a/twscrape/http.py b/twscrape/http.py index 078ffa24..01f4d083 100644 --- a/twscrape/http.py +++ b/twscrape/http.py @@ -67,10 +67,8 @@ def json(self) -> Any: return self._json def raise_for_status(self) -> None: - try: - self._rep.raise_for_status() - except Exception as e: - raise HttpStatusError(str(e), response=self) from e + if self._rep.status_code >= 400: + raise HttpStatusError(f"HTTP {self._rep.status_code}", response=self) class HttpError(Exception): ... diff --git a/uv.lock b/uv.lock index 19382a8e..7d6328bd 100644 --- a/uv.lock +++ b/uv.lock @@ -628,7 +628,7 @@ wheels = [ [[package]] name = "twscrape" -version = "0.17.0" +version = "0.18.0" source = { editable = "." } dependencies = [ { name = "aiosqlite" },