Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ name: build

on:
push:
branches: [main]
tags: ["v*"]

jobs:
test:
Expand All @@ -25,6 +27,7 @@ jobs:
needs: test
permissions:
contents: write
id-token: write
steps:
- uses: actions/checkout@v6
- uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
Expand All @@ -50,9 +53,7 @@ jobs:
fi
echo "$NOTES" > release_notes.txt

- run: uv publish
env:
UV_PUBLISH_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
- uses: pypa/gh-action-pypi-publish@release/v1

- run: gh release create ${{ github.ref_name }} --title ${{ github.ref_name }} --notes-file release_notes.txt
env:
Expand Down
17 changes: 0 additions & 17 deletions .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,3 @@ jobs:
- run: make check
- run: make test

maintainer-edits:
if: github.event_name == 'pull_request_target' && github.event.pull_request.head.repo.full_name != github.repository
runs-on: ubuntu-latest
permissions:
pull-requests: write
steps:
- name: Warn if maintainer edits disabled
if: "!github.event.pull_request.maintainer_can_modify"
uses: actions/github-script@v7
with:
script: |
await github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: 'Please enable **[Allow edits from maintainers](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/allowing-changes-to-a-pull-request-branch-created-from-a-fork#enabling-repository-maintainer-permissions-on-existing-pull-requests)** on this PR — this lets us push fixes directly to your branch without waiting for you to resolve conflicts manually.'
})
25 changes: 25 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,28 @@
## v0.18.0 – 2026-05-22

### Breaking Changes
- Removed `user_by_id` API as X/Twitter no longer supports this endpoint

### Features
- Added `add_cookie` CLI command (#301, by @sakhnenkoff)
- Added API for fetching all tweets in a conversation thread (#252, by @Khanzadeh-AH)
- Added community scraping support (#275)
- Added `list_members` API for retrieving Twitter list members
- Added new fields to `Tweet` model (#279)
- Added user `about` info field (#277, by @terencedignon)

### Fixes
- Restored scraping compatibility after X platform changes in May 2026 (#306, #307, by @mar0ls)
- Fixed JS bundle parsing for `x-client-transaction-id` generation (#303, by @Flaburgan)
- Fixed HTTP client not being properly closed, resolving resource warnings (#304, by @Flaburgan)
- Fixed pagination to continue past empty pages (#265, #247)
- Improved robustness of GQL pagination handling
- Improved proxy handling and `xclid` calculation

**Full Changelog**: https://github.com/vladkens/twscrape/compare/v0.17.0...v0.18.0

---

## v0.17.0 – 2025-04-29

### Fixes
Expand Down
10 changes: 7 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "twscrape"
version = "0.17.0"
version = "0.18.0"
authors = [{ name = "vladkens", email = "v.pronsky@gmail.com" }]
description = "Twitter GraphQL and Search API implementation with SNScrape data models"
readme = "readme.md"
Expand All @@ -25,12 +25,16 @@ dependencies = [
"beautifulsoup4>=4.13.0",
]

[project.optional-dependencies]
curl = ["curl-cffi>=0.7.0"]

[dependency-groups]
dev = [
"curl-cffi>=0.7.0",
"httpx>=0.26.0",
"pyright>=1.1.369",
"pytest-asyncio>=0.23.3",
"pytest-cov>=4.1.0",
"pytest-httpx>=0.28.0",
"pytest>=7.4.4",
"ruff>=0.1.11",
]
Expand Down Expand Up @@ -66,5 +70,5 @@ select = ["E", "F", "I", "UP", "C4", "SIM"]
ignore = ["E501", "UP035", "SIM105"]

[tool.pyright]
include = ["apps", "clients", "strategy", "lib", "scripts"]
include = ["twscrape"]
typeCheckingMode = "standard"
13 changes: 9 additions & 4 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,15 @@ Twitter GraphQL API implementation with [SNScrape](https://github.com/JustAnothe
```bash
pip install twscrape
```
Or development version:

`httpx` is included by default. For better Cloudflare/bot-detection bypass, install `curl-cffi` as well — it uses libcurl with browser-level TLS fingerprint spoofing and is preferred automatically when present:

```bash
pip install git+https://github.com/vladkens/twscrape.git
pip install twscrape[curl]
```

Override the backend explicitly with `TWS_HTTP_BACKEND=httpx` or `TWS_HTTP_BACKEND=curl`.

## Features
- Support both Search & GraphQL Twitter API
- Async/Await functions (can run multiple scrapers in parallel at the same time)
Expand Down Expand Up @@ -122,9 +126,9 @@ async def main():
async for tweet in api.search("elon musk"):
print(tweet.id, tweet.user.username, tweet.rawContent) # tweet is `Tweet` object

# NOTE 2: all methods have `raw` version (returns `httpx.Response` object):
# NOTE 2: all methods have `raw` version (returns `twscrape.Response` object):
async for rep in api.search_raw("elon musk"):
print(rep.status_code, rep.json()) # rep is `httpx.Response` object
print(rep.status_code, rep.json()) # rep is `twscrape.Response` object

# change log level, default info
set_log_level("DEBUG")
Expand Down Expand Up @@ -360,6 +364,7 @@ _Note:_ If proxy not working, exception will be raised from API class.
- `TWS_PROXY` - global proxy for all accounts (e.g. `socks5://user:pass@127.0.0.1:1080`)
- `TWS_WAIT_EMAIL_CODE` - timeout for email verification code during login (default: `30`, in seconds)
- `TWS_RAISE_WHEN_NO_ACCOUNT` - raise `NoAccountError` exception when no available accounts, instead of waiting (default: `false`, values: `false`/`0`/`true`/`1`)
- `TWS_HTTP_BACKEND` - force HTTP backend: `httpx` or `curl` (default: `curl` if installed, otherwise `httpx`)

## Limitations

Expand Down
9 changes: 4 additions & 5 deletions scripts/update_gql_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
import re
import sys

import httpx

from twscrape.http import HttpClient, make_client
from twscrape.xclid import get_scripts_list, get_tw_page_text, script_url

API_FILE = "twscrape/api.py"
Expand All @@ -28,7 +27,7 @@ def _is_relevant_script(url: str) -> bool:
async def get_scripts() -> list[tuple[str, str]]:
os.makedirs(CACHE_DIR, exist_ok=True)

async with httpx.AsyncClient(follow_redirects=True) as clt:
async with make_client() as clt:
text = await get_tw_page_text("https://x.com/elonmusk", clt)

urls = list(get_scripts_list(text))
Expand All @@ -50,7 +49,7 @@ async def fetch_scripts(scripts: list[tuple[str, str]], force: bool) -> None:
print(f"Downloading {len(todo)} scripts.")
sem = asyncio.Semaphore(10)

async def fetch(clt: httpx.AsyncClient, i: int, url: str, path: str) -> None:
async def fetch(clt: HttpClient, i: int, url: str, path: str) -> None:
async with sem:
print(f" ({i:3d}/{len(todo):3d}) {url}")
rep = await clt.get(url)
Expand All @@ -61,7 +60,7 @@ async def fetch(clt: httpx.AsyncClient, i: int, url: str, path: str) -> None:
with open(path, "w", encoding="utf-8") as fp:
fp.write(rep.text)

async with httpx.AsyncClient(follow_redirects=True) as clt:
async with make_client() as clt:
await asyncio.gather(*[fetch(clt, i, url, path) for i, (url, path) in enumerate(todo, 1)])


Expand Down
11 changes: 8 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import pytest

from twscrape.account import Account
from twscrape.accounts_pool import AccountsPool
from twscrape.api import API
from twscrape.logger import set_log_level
from twscrape.queue_client import QueueClient, XClIdGenStore

set_log_level("ERROR")
from .mock_http import MockClient

set_log_level("CRITICAL")


class ClIdGenMock:
Expand All @@ -28,15 +31,17 @@ def pool_mock(tmp_path):


@pytest.fixture
async def client_fixture(pool_mock: AccountsPool):
async def client_fixture(pool_mock: AccountsPool, monkeypatch):
mock_clt = MockClient()
monkeypatch.setattr(Account, "make_client", lambda self, proxy=None: mock_clt)
pool_mock._order_by = "username"

for x in range(1, 3):
await pool_mock.add_account(f"user{x}", f"pass{x}", f"email{x}", f"email_pass{x}")
await pool_mock.set_active(f"user{x}", True)

client = QueueClient(pool_mock, "SearchTimeline")
yield pool_mock, client
yield pool_mock, client, mock_clt


@pytest.fixture
Expand Down
77 changes: 77 additions & 0 deletions tests/mock_http.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import json as _json
from unittest.mock import MagicMock

from twscrape.http import HttpClient, HttpMethod, Response


def _raw(*, status_code: int = 200, json_data=None, text: str = "", headers: dict | None = None):
raw = MagicMock()
raw.status_code = status_code
raw.text = text
raw.content = text.encode()
raw.headers = headers or {}
raw.url = "https://mock.local"
raw.request = MagicMock()
raw.request.method = "GET"
raw.request.url = "https://mock.local"
raw.json.return_value = json_data if json_data is not None else {}
if status_code >= 400:
raw.raise_for_status.side_effect = Exception(f"HTTP {status_code}")
else:
raw.raise_for_status.return_value = None
return raw


class MockClient(HttpClient):
def __init__(self):
self._queue: list = []
self._cookies: dict = {}
self._headers: dict = {}

def add_response(
self,
*,
status_code: int = 200,
json: dict | list | None = None,
text: str = "",
headers: dict | None = None,
) -> "MockClient":
self._queue.append(("response", status_code, json, text, headers))
return self

def add_exception(self, exc: Exception) -> "MockClient":
self._queue.append(("exc", exc))
return self

def add_invalid_json_response(
self, *, status_code: int = 200, text: str = "not-json", headers: dict | None = None
) -> "MockClient":
self._queue.append(("invalid_json", status_code, text, headers))
return self

@property
def cookies(self):
return self._cookies

@property
def headers(self):
return self._headers

async def request(self, method: HttpMethod, url: str, **kwargs) -> Response:
if not self._queue:
raise RuntimeError("MockClient: no more queued responses")
item = self._queue.pop(0)
if item[0] == "exc":
raise item[1]
if item[0] == "invalid_json":
_, status_code, text, headers = item
raw = _raw(status_code=status_code, text=text, headers=headers)
raw.json.side_effect = _json.JSONDecodeError("no json", "", 0)
return Response(raw)
_, status_code, json_data, text, headers = item
return Response(
_raw(status_code=status_code, json_data=json_data, text=text, headers=headers)
)

async def aclose(self) -> None:
pass
Loading
Loading