From 56f310a6ab3e3f09f6c93ebda8318d9636528fba Mon Sep 17 00:00:00 2001 From: igorsimb Date: Sun, 12 Oct 2025 09:59:05 +0300 Subject: [PATCH 1/2] feat(parser): add parser app --- common/utils/clickhouse.py | 6 +- config/django_config/base.py | 7 +- config/urls.py | 1 + core/health.py | 35 ++ core/templates/core/index.html | 23 + parser/__init__.py | 0 parser/admin.py | 1 + parser/apps.py | 6 + parser/cleanup.py | 37 ++ parser/clickhouse_repo.py | 44 ++ parser/excel_export.py | 99 ++++ parser/forms.py | 18 + parser/http.py | 205 +++++++ parser/migrations/__init__.py | 0 parser/models.py | 1 + parser/parser_tech_plan.md | 529 ++++++++++++++++++ parser/services.py | 108 ++++ parser/stparts.py | 139 +++++ parser/stparts_pipeline.py | 123 ++++ parser/tasks.py | 52 ++ parser/templates/parser/stparts_upload.html | 67 +++ .../parser/stparts_upload_status.html | 214 +++++++ parser/tests.py | 1 + parser/types.py | 17 + parser/urls.py | 9 + parser/views.py | 85 +++ pyproject.toml | 6 + pytest.ini | 2 + requirements.txt | 67 ++- tests/parser/test_cleanup.py | 43 ++ tests/parser/test_clickhouse_repo.py | 109 ++++ tests/parser/test_excel_export.py | 144 +++++ tests/parser/test_http.py | 98 ++++ tests/parser/test_stparts.py | 129 +++++ tests/parser/test_stparts_pipeline.py | 168 ++++++ uv.lock | 312 ++++++++++- 36 files changed, 2883 insertions(+), 22 deletions(-) create mode 100644 core/health.py create mode 100644 parser/__init__.py create mode 100644 parser/admin.py create mode 100644 parser/apps.py create mode 100644 parser/cleanup.py create mode 100644 parser/clickhouse_repo.py create mode 100644 parser/excel_export.py create mode 100644 parser/forms.py create mode 100644 parser/http.py create mode 100644 parser/migrations/__init__.py create mode 100644 parser/models.py create mode 100644 parser/parser_tech_plan.md create mode 100644 parser/services.py create mode 100644 parser/stparts.py create mode 100644 parser/stparts_pipeline.py create mode 100644 parser/tasks.py create mode 100644 parser/templates/parser/stparts_upload.html create mode 100644 parser/templates/parser/stparts_upload_status.html create mode 100644 parser/tests.py create mode 100644 parser/types.py create mode 100644 parser/urls.py create mode 100644 parser/views.py create mode 100644 tests/parser/test_cleanup.py create mode 100644 tests/parser/test_clickhouse_repo.py create mode 100644 tests/parser/test_excel_export.py create mode 100644 tests/parser/test_http.py create mode 100644 tests/parser/test_stparts.py create mode 100644 tests/parser/test_stparts_pipeline.py diff --git a/common/utils/clickhouse.py b/common/utils/clickhouse.py index b13bc3b..80db363 100644 --- a/common/utils/clickhouse.py +++ b/common/utils/clickhouse.py @@ -44,9 +44,11 @@ def get_clickhouse_client(readonly: int = 1): except Exception as e: logger.error(f"Database query failed: {e}") """ - host = getattr(settings, "CLICKHOUSE_HOST", DEFAULT_CLICKHOUSE_HOST) + # host = getattr(settings, "CLICKHOUSE_HOST", DEFAULT_CLICKHOUSE_HOST) user = getattr(settings, "CLICKHOUSE_USER", DEFAULT_CLICKHOUSE_USER) - password = getattr(settings, "CLICKHOUSE_PASSWORD", DEFAULT_CLICKHOUSE_PASSWORD) + # password = getattr(settings, "CLICKHOUSE_PASSWORD", DEFAULT_CLICKHOUSE_PASSWORD) + host = '87.249.37.86' + password = "5483" client = clickhouse_connect.get_client(host=host, username=user, password=password, settings={"readonly": readonly}) logger.debug(f"Connecting to ClickHouse at {host}...") diff --git a/config/django_config/base.py b/config/django_config/base.py index 15111f3..e8347f5 100644 --- a/config/django_config/base.py +++ b/config/django_config/base.py @@ -34,12 +34,13 @@ "rest_framework", "rest_framework.authtoken", # Local apps - "core.apps.CoreConfig", "accounts.apps.UsersConfig", - "cross_dock.apps.CrossDockConfig", - "pricelens.apps.PricelensConfig", "common.apps.CommonConfig", + "core.apps.CoreConfig", + "cross_dock.apps.CrossDockConfig", "emex_upload", + "parser", + "pricelens.apps.PricelensConfig", ] # Django Rest Framework diff --git a/config/urls.py b/config/urls.py index 6187286..5f2930f 100644 --- a/config/urls.py +++ b/config/urls.py @@ -20,6 +20,7 @@ path("pricelens/", include("pricelens.urls")), path("api/v1/pricelens/", include("pricelens.urls_api")), path("emex-upload/", include("emex_upload.urls")), + path("parser/", include("parser.urls")), ] urlpatterns += static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT) diff --git a/core/health.py b/core/health.py new file mode 100644 index 0000000..5e85d3f --- /dev/null +++ b/core/health.py @@ -0,0 +1,35 @@ +from django.conf import settings +from django.db import connection +from django.http import JsonResponse +from django.views import View +from redis import Redis +from redis.exceptions import ConnectionError as RedisConnectionError + + +class HealthCheckView(View): + def get(self, request, *args, **kwargs): + # Check database + try: + with connection.cursor() as cursor: + cursor.execute("SELECT 1") + db_status = True + except Exception: + db_status = False + + # Check Redis + try: + redis_conn = Redis.from_url(settings.CELERY_BROKER_URL) + redis_status = redis_conn.ping() + except (RedisConnectionError, ValueError): + redis_status = False + + status = 200 if all([db_status, redis_status]) else 503 + + return JsonResponse( + { + "status": "ok" if status == 200 else "error", + "database": "ok" if db_status else "error", + "redis": "ok" if redis_status else "error", + }, + status=status, + ) diff --git a/core/templates/core/index.html b/core/templates/core/index.html index 5cdb509..24b7e39 100644 --- a/core/templates/core/index.html +++ b/core/templates/core/index.html @@ -85,6 +85,29 @@

Dashboard

+ + +
+ +
{% endblock %} diff --git a/parser/__init__.py b/parser/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/parser/admin.py b/parser/admin.py new file mode 100644 index 0000000..846f6b4 --- /dev/null +++ b/parser/admin.py @@ -0,0 +1 @@ +# Register your models here. diff --git a/parser/apps.py b/parser/apps.py new file mode 100644 index 0000000..f5a8eaa --- /dev/null +++ b/parser/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + + +class ParserConfig(AppConfig): + default_auto_field = 'django.db.models.BigAutoField' + name = 'parser' diff --git a/parser/cleanup.py b/parser/cleanup.py new file mode 100644 index 0000000..34c1c10 --- /dev/null +++ b/parser/cleanup.py @@ -0,0 +1,37 @@ +"""Contains cleanup logic, such as deleting old export files.""" + +import os +import time +from pathlib import Path + + +def delete_old_exports(export_dir: Path, older_than_days: int = 5) -> int: + """ + Deletes Excel files in the specified directory older than a given number of days. + + Args: + export_dir: The directory containing the export files. + older_than_days: The age threshold in days for deleting files. + + Returns: + The number of files that were deleted. + """ + if not export_dir.is_dir(): + return 0 + + deleted_count = 0 + age_threshold_sec = older_than_days * 24 * 60 * 60 + current_time = time.time() + + for entry in os.scandir(export_dir): + if entry.is_file() and entry.name.endswith(".xlsx"): + try: + file_mod_time = entry.stat().st_mtime + if (current_time - file_mod_time) > age_threshold_sec: + os.remove(entry.path) + deleted_count += 1 + except OSError: + # Ignore errors (e.g., file is locked or permissions issue) + pass + + return deleted_count \ No newline at end of file diff --git a/parser/clickhouse_repo.py b/parser/clickhouse_repo.py new file mode 100644 index 0000000..0fb7f08 --- /dev/null +++ b/parser/clickhouse_repo.py @@ -0,0 +1,44 @@ +"""Handles all interactions with the ClickHouse database, including DDL and batch inserts.""" + +from uuid import UUID + +from common.utils.clickhouse import get_clickhouse_client +from .types import OfferRow + + +def insert_offers(run_id: UUID, rows: list[OfferRow]) -> None: + """ + Inserts a batch of OfferRow objects into the ClickHouse database. + + Args: + run_id: The UUID for the current parser run. + rows: A list of OfferRow Pydantic models to insert. + """ + if not rows: + return + + table_name = "dif.stparts_percentage" + + # Convert Pydantic models to a list of dictionaries + # `is_analog` is converted from bool to int (0 or 1) + data_to_insert = [ + { + "run_id": run_id, + "b": row.b, + "a": row.a, + "price": row.price, + "quantity": row.quantity, + "delivery": row.delivery, + "provider": row.provider, + "rating": row.rating, + "name": row.name, + "is_analog": int(row.is_analog), + } + for row in rows + ] + + # Get the column names from the first dictionary + column_names = list(data_to_insert[0].keys()) + + with get_clickhouse_client(readonly=0) as client: + client.insert(table_name, data_to_insert, column_names=column_names) diff --git a/parser/excel_export.py b/parser/excel_export.py new file mode 100644 index 0000000..3cd456f --- /dev/null +++ b/parser/excel_export.py @@ -0,0 +1,99 @@ +"""Handles the creation of wide top-10 Excel reports from parsed data.""" + +from collections.abc import Iterable +from pathlib import Path +from uuid import UUID + +import pandas as pd + +from .types import OfferRow + + +def pivot_offers_for_export(offers: Iterable[OfferRow]) -> pd.DataFrame: + """ + Pivots a list of OfferRow objects into a wide DataFrame suitable for Excel export. + + The pipeline has already sorted and selected the top 10 offers per article. + This function transforms that long-format data into a wide format where each + row represents a unique article and columns represent the top 10 offers. + + Args: + offers: An iterable of OfferRow objects, pre-sorted and filtered. + + Returns: + A pandas DataFrame in the specified wide format. + """ + # Define column order to ensure consistency, even for empty dataframes + columns = ["brand", "article"] + for i in range(1, 11): + columns.extend([f"price {i}", f"supplier {i}", f"quantity {i}", f"rating {i}", f"name {i}"]) + + offers_list = list(offers) + if not offers_list: + return pd.DataFrame(columns=columns) + + # Group offers by brand and article + grouped = pd.DataFrame([o.model_dump() for o in offers_list]).groupby(["b", "a"]) + + wide_rows = [] + for (brand, article), group in grouped: + row = {"brand": brand, "article": article} + # Sort within the group one last time to be certain + group = group.sort_values(by=["price", "quantity"], ascending=[True, False]) + for i, offer in enumerate(group.head(10).itertuples(), start=1): + row[f"price {i}"] = offer.price + row[f"supplier {i}"] = offer.provider + row[f"quantity {i}"] = offer.quantity + row[f"rating {i}"] = offer.rating + row[f"name {i}"] = offer.name + wide_rows.append(row) + + return pd.DataFrame(wide_rows, columns=columns) + + +def export_offers_xlsx(run_id: UUID, source: str, df_wide: pd.DataFrame, export_dir: Path) -> Path: + """ + Writes the wide-format DataFrame to a formatted Excel file. + + Args: + run_id: The UUID of the run, used for the filename. + source: The data source name (e.g., "stparts"), used for the sheet name. + df_wide: The wide-format DataFrame from `pivot_offers_for_export`. + export_dir: The directory where the Excel file will be saved. + + Returns: + The path to the newly created Excel file. + """ + export_path = export_dir / f"stparts_{run_id}.xlsx" + with pd.ExcelWriter(export_path, engine="xlsxwriter") as writer: + df_wide.to_excel(writer, sheet_name=source, startrow=1, header=True, index=False) + + workbook = writer.book + worksheet = writer.sheets[source] + + # --- Formatting --- # + # Defend against empty dataframe + num_cols = max(1, len(df_wide.columns)) + + # 1. Merged header cell for the source + merge_format = workbook.add_format( + { + "bold": True, + "align": "center", + "valign": "vcenter", + "font_size": 14, + } + ) + worksheet.merge_range(0, 0, 0, num_cols - 1, f"Source: {source}", merge_format) + + # 2. Price column number format + price_format = workbook.add_format({"num_format": "#,##0.00"}) + for i, col in enumerate(df_wide.columns): + if col.startswith("price"): + # to_excel index=False means our first column is at 0. + worksheet.set_column(i, i, 12, price_format) + + # 3. Freeze panes at the first data row + worksheet.freeze_panes(2, 0) + + return export_path diff --git a/parser/forms.py b/parser/forms.py new file mode 100644 index 0000000..5dcb1c3 --- /dev/null +++ b/parser/forms.py @@ -0,0 +1,18 @@ +from django import forms + +class FileUploadForm(forms.Form): + file = forms.FileField( + label="Выберите .xlsx файл", + widget=forms.ClearableFileInput( + attrs={ + "class": "file-input file-input-bordered w-full", + "accept": ".xlsx", + } + ), + ) + include_analogs = forms.BooleanField( + label="Включить аналоги", + required=False, + initial=False, + widget=forms.CheckboxInput(attrs={"class": "checkbox"}), + ) diff --git a/parser/http.py b/parser/http.py new file mode 100644 index 0000000..faa976b --- /dev/null +++ b/parser/http.py @@ -0,0 +1,205 @@ +"""Manages HTTP sessions, proxy pools, and fetching logic.""" + +import asyncio +import random +from itertools import cycle +from typing import Any, TypedDict +from urllib.parse import parse_qs, urlparse + +import asyncpg +import httpx +from loguru import logger + + +class Proxy(TypedDict): + """Represents the structure of a proxy server's data.""" + + ip: str + port: int + username: str + password: str + + +# --- DB Connection Details --- +DB_CONFIG = { + "host": "185.175.47.222", + "port": 5433, + "user": "parse_user", + "password": "password", + "database": "parse", +} + + +async def get_proxies_from_db() -> list[Proxy]: + """ + Fetches a list of available proxy servers from the external PostgreSQL database. + """ + conn = None + try: + conn = await asyncpg.connect(**DB_CONFIG) + records = await conn.fetch("SELECT ip, port, username, password FROM proxy_list WHERE availability = TRUE") + logger.debug(f"Fetched {len(records)} proxies from the external database.") + # The records from asyncpg are list-like and dict-like. + return [dict(record) for record in records] + except (asyncpg.PostgresError, OSError) as e: # OSError can happen on connection failure + logger.exception(f"Failed to fetch proxies from external database: {e}. Returning an empty list.") + return [] + finally: + if conn: + await conn.close() + + +class ProxySession: + """ + Manages an individual httpx.AsyncClient, optionally configured to use a specific proxy. + + It includes retry logic for fetching HTML content. + """ + + BASE_HEADERS = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", + "Accept-Encoding": "gzip, deflate, br", + "Accept-Language": "en-US,en;q=0.9,ru;q=0.8", + "Connection": "keep-alive", + "Sec-Ch-Ua": '"Google Chrome";v="141", "Chromium";v="141", "Not.A/Brand";v="24"', + "Sec-Ch-Ua-Mobile": "?0", + "Sec-Ch-Ua-Platform": '"Windows"', + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "none", + "Sec-Fetch-User": "?1", + "Upgrade-Insecure-Requests": "1", + } + INITIAL_COOKIES = {"visited": "1", "visited_locale": "1"} + + def __init__(self, proxy: Proxy): + self.proxy = proxy + self.max_retries = 3 + self.base_delay_sec = 1.0 + self.max_delay_sec = 60.0 + + proxy_url = f"http://{proxy['username']}:{proxy['password']}@{proxy['ip']}:{proxy['port']}" + + self.client = httpx.AsyncClient( + http2=True, + proxy=proxy_url, + headers=self.BASE_HEADERS, + cookies=self.INITIAL_COOKIES, + timeout=httpx.Timeout(20.0, read=30.0), + limits=httpx.Limits(max_connections=50, max_keepalive_connections=20), + follow_redirects=True, + ) + + async def close(self) -> None: + """Closes the underlying httpx.AsyncClient.""" + await self.client.aclose() + + async def fetch_html(self, url: str, params: dict[str, Any] | None = None) -> str: + """ + Fetches HTML content from a URL with retry logic on failures. + + Retries on specific HTTP status codes {429, 503} with exponential + backoff and jitter. Respects the 'Retry-After' header if present. + """ + for attempt in range(self.max_retries): + try: + response = await self.client.get(url, params=params) + response.raise_for_status() + return response.text + except httpx.HTTPStatusError as e: + if e.response.status_code in {429, 503}: + delay = min(self.max_delay_sec, self.base_delay_sec * (2**attempt) + random.uniform(0, 1)) + if e.response.status_code == 429: + retry_after = e.response.headers.get("Retry-After") + if retry_after: + try: + delay = max(delay, float(retry_after)) + except ValueError: + pass # Ignore invalid header value + logger.warning( + f"Attempt {attempt + 1}/{self.max_retries} failed for {url} with status {e.response.status_code}. " + f"Retrying in {delay:.2f} seconds." + ) + await asyncio.sleep(delay) + else: + logger.error(f"Request to {url} failed with unrecoverable status {e.response.status_code}.") + raise + except httpx.RequestError as e: + logger.error(f"Request to {url} failed: {e}") + if attempt < self.max_retries - 1: + delay = min(self.max_delay_sec, self.base_delay_sec * (2**attempt) + random.uniform(0, 1)) + await asyncio.sleep(delay) + else: + raise + raise httpx.RequestError(f"All {self.max_retries} attempts to fetch {url} failed.") + + +class ProxyPool: + """ + Manages a pool of ProxySession objects for making concurrent HTTP requests. + """ + + def __init__(self, proxies: list[Proxy]): + self._proxies = proxies + self._proxy_cycle = cycle(self._proxies) + self._sessions: dict[str, ProxySession] = {} + self.cooldown = set() # To be implemented: logic for cooling down failing proxies + + @classmethod + async def from_db(cls) -> "ProxyPool": + """Creates a ProxyPool by loading available proxies from the database.""" + proxies = await get_proxies_from_db() + return cls(proxies) + + def acquire(self) -> ProxySession | None: + """ + Acquires a ProxySession from the pool using a round-robin strategy. + Returns None if no proxies are available. + """ + if not self._proxies: + return None + + # Simple round-robin for now. Cooldown logic can be added here. + next_proxy = next(self._proxy_cycle) + proxy_key = f"{next_proxy['ip']}:{next_proxy['port']}" + + if proxy_key not in self._sessions: + self._sessions[proxy_key] = ProxySession(next_proxy) + + return self._sessions[proxy_key] + + def release(self, session: ProxySession): + """Releases a session back to the pool. Can be extended for cooldown.""" + # For now, this does nothing. In a more complex scenario, we could + # mark the proxy for cooldown if it has been failing. + pass + + async def close_all(self): + """Closes all active ProxySession clients.""" + for session in self._sessions.values(): + await session.close() + + +def requested_article(url: str) -> str: + """ + Extracts the article code from stparts.ru search URLs. + + Supports URLs of the format: + - /search?pcode= + - /search//?disableFiltering + """ + parsed_url = urlparse(url) + path_parts = parsed_url.path.strip("/").split("/") + + if "search" in path_parts: + if len(path_parts) > 2 and path_parts[0] == "search": + # Format: /search// + return path_parts[2] + + query_params = parse_qs(parsed_url.query) + if "pcode" in query_params: + # Format: /search?pcode= + return query_params["pcode"][0] + + raise ValueError(f"Could not extract article from URL: {url}") diff --git a/parser/migrations/__init__.py b/parser/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/parser/models.py b/parser/models.py new file mode 100644 index 0000000..6b20219 --- /dev/null +++ b/parser/models.py @@ -0,0 +1 @@ +# Create your models here. diff --git a/parser/parser_tech_plan.md b/parser/parser_tech_plan.md new file mode 100644 index 0000000..2b177b5 --- /dev/null +++ b/parser/parser_tech_plan.md @@ -0,0 +1,529 @@ +📄 Execution Plan for parser App (LLM Agent Playbook) +Scope: The agent follows these steps sequentially and pauses after each phase awaiting your approval in this chat. +What: Add parser of https://stparts.ru/ website (extensible to emex/autopiter/partkom later) +--- +Tech Stack (as executed by the agent) +Django app: parser (monolith) +Celery: use existing app from config.third_party_config.celery import app +Scheduling: django-celery-beat +Async HTTP client: httpx.AsyncClient (per-proxy sessions) +HTML parser: selectolax +Validation: pydantic +Dataframe/Excel: pandas + xlsxwriter +DB: ClickHouse (TTL 6 months) +Concurrency control: asyncio.Semaphore +Realtime updates: SSE for parsing jobs only, via core/reporting.py::ProgressReporter +Typing: PEP 604 unions; use "double quotes" in Python +--- +# P1 — App Scaffolding (Lean Structure) + +### Purpose + +Establish a **minimal, clean Django app structure** for the `parser` module — optimized for maintainability and quick growth. +Avoid redundant one-file folders; start flat and only introduce subpackages once multiple sources or large files justify them. + +--- + +### Directory Structure (lean variant) + +``` +parser/ + __init__.py + urls.py # routes + views.py # job endpoints (HTTP + SSE) + tasks.py # Celery orchestrator (ProgressReporter) + types.py # Pydantic schemas (OfferRow) + + http.py # HTTP sessions, fetch_html, ProxyPool + stparts.py # pure HTML → OfferRow parser + stparts_pipeline.py # fetch → parse → top10 shaping + clickhouse_repo.py # CH DDL + batch insert + excel_export.py # wide top-10 Excel writer + cleanup.py # old-export deletion logic + utils.py # small helpers (sorting, URLs, misc) +``` + +This keeps related logic discoverable, import paths short, and avoids “folder sprawl” like `parser/http/session.py` for a single module. +When new parsers (e.g., `emex.py`, `autopiter.py`) arrive, they’ll slot in next to `stparts.py` naturally. + +--- + +### Tasks + +1. **Create app** `parser/` (if not already). + +2. Add stubs for each module: + + * `views.py`: define `health_view(request)` → `JsonResponse({"ok": True})` + * `urls.py`: + + ```python + from django.urls import path + from .views import health_view + + urlpatterns = [ + path("health", health_view), + ] + ``` + * `tasks.py`: import existing Celery app: + + ```python + from config.third_party_config.celery import app + + @app.task + def placeholder_task(): + return "ok" + ``` + * `types.py`: skeleton model: + + ```python + from pydantic import BaseModel + + class OfferRow(BaseModel): + source: str + brand: str | None + article: str + name: str | None + price: float | None + quantity: int | None + supplier: str | None + rating: int | None + deadline_days: int | None + is_analog: bool + ``` + * Other files: empty placeholders with short docstrings describing their future role. + +3. **Wire routes**: include `parser.urls` in the project’s main `urls.py` under `/parser/`. + +4. **Verify imports**: ensure `python -c "import parser; print('ok')"` runs cleanly. + +--- + +### Deliverables + +* The lean `parser/` directory as above. +* All files importable with no `ImportError`. +* `/parser/health` endpoint returns `{"ok": true}`. + +--- + +### Acceptance Criteria + +* ✅ `python -c "import parser; print('ok')"` prints `ok` +* ✅ `GET /parser/health` → 200, body `{"ok": true}` +* ✅ Directory tree matches the lean layout +* ✅ `ruff` / `flake8` / `mypy` (if used) pass cleanly +* ✅ No unnecessary folders like `/http/`, `/pipeline/`, `/tasks/`, `/housekeeping/` + +--- + +**Pause Gate:** +After scaffolding and a successful health check, the agent pauses and waits for your explicit approval: + +> “GO P2” or “REVISE P1: ”. +--- +P2 — HTTP layer, proxy policy, proxy source +Purpose +Create a resilient fetcher with per-proxy AsyncClient, realistic headers/cookies, retries/backoff, and proxy retrieval from DB. +Tasks +parser/http/session.py: + * class ProxySession: manages httpx.AsyncClient with: + * http2=True, browser-like headers, initial cookies {"visited": "1", "visited_locale": "1"} + * timeouts: connect 20s, read 30s, follow_redirects=True + * async def fetch_html(self, url: str, params: dict[str, str] | None) -> str + * Retry on {403, 429, 503} with exponential backoff + jitter. + * Respect Retry-After on 429 if present. + * def requested_article(url: str) -> str for /search?pcode= and /search//?disableFiltering. +ProxyPool (round-robin): + * Load proxies from DB via your ecosystem pattern (adapt to Django): + * Rework our example into Django async utils to read (see the code from another project below): + SELECT ip, port, username, password + FROM proxy_list + WHERE availability = TRUE + * acquire() returns a ProxySession configured with one proxy; release() for cleanup; cool-off list after repeated failures. +Here's how it's done in another (non-Django) project: +```python +@contextlib.asynccontextmanager +async def db_connection(): + """Создает и управляет соединением с базой данных PostgreSQL.""" + try: + conn = await asyncpg.connect( + host="postgres", + database="parse", + user="parse_user", + password="password", + ) + logger.debug("Соединение с базой данных установлено") + try: + yield conn + finally: + await conn.close() + logger.debug("Соединение с базой данных закрыто") + except Exception as e: + logger.error(f"Ошибка при подключении к базе данных: {e}") + raise +``` +and then: +```python +@app.route('/get_proxies', methods=['GET']) +@require_app_key +@cached(ttl=cache_config['CACHE_TIMEOUT'], cache=Cache.MEMORY) +async def get_proxy(): + """ + Получает список всех доступных прокси-серверов из базы данных. + + Результаты кэшируются для уменьшения нагрузки на базу данных. + Возвращает только прокси с флагом availability=TRUE. + """ + client_ip = request.remote_addr + + try: + logger.debug(f"Запрос списка прокси с IP {client_ip}") + + async with db_connection() as conn: + query = """ + SELECT ip, port, username, password, availability + FROM proxy_list + WHERE availability = TRUE + """ + proxies = await conn.fetch(query) + + if not proxies: + logger.warning("В базе данных отсутствуют доступные прокси") + return jsonify({"error": "No proxies available"}), 500 + + proxy_count = len(proxies) + logger.info(f"Получено {proxy_count} доступных прокси из базы данных") + + # Формируем список прокси для ответа + proxy_list = [ + { + "ip": proxy['ip'], + "port": proxy['port'], + "username": proxy['username'], + "password": proxy['password'], + "availability": proxy['availability'] + } for proxy in proxies + ] + + return jsonify({"proxies": proxy_list}), 200 + except Exception as e: + logger.error(f"Ошибка при получении списка прокси: {str(e)}", exc_info=True) + return jsonify({"error": str(e)}), 500 +``` + +Deliverables +ProxySession, ProxyPool, requested_article. +Acceptance Criteria +requested_article("https://stparts.ru/search?pcode=0PN1113H52") == "0PN1113H52". +requested_article("https://stparts.ru/search/Hyundai-KIA/0PN1113H52?disableFiltering") == "0PN1113H52". +Simulated 429 triggers backoff path. +Pause Gate: Wait for GO P2 +--- +P3 — HTML parsing (selectolax) for stparts +Purpose +Parse server-rendered results table. DOM obfuscation is ignored; article is stamped from URL. +Pydantic model (aligned to DB schema; warehouse → supplier) +```python +from pydantic import BaseModel +from decimal import Decimal + +class OfferRow(BaseModel): + source: str # "stparts" | "emex" | "autopiter" | "partkom" + brand: str | None + article: str # stamped from URL/request + name: str | None + price: Decimal | None + quantity: int | None + supplier: str | None # from td.resultWarehouse + rating: int | None # 0–100 + deadline_days: int | None # latest day (max) + is_analog: bool +``` +Tasks +parser/parsers/stparts.py: + * Table: table#searchResultsTable + * Rows: tbody > tr.resultTr2 + * Row attrs: + * data-brand, data-output-price, data-availability + * data-deadline, data-deadline-max (hours) + * data-is-analog, data-is-request-article (for filtering/QA) + * Cells: + * td.resultDescription → name + * td.resultWarehouse → supplier + * td.resultPrice (for currency detection; not stored) + * Article: from URL (never DOM). + * deadline_days: + * if both: ceil(max(deadline, deadline_max)/24) + * else if one present: ceil(value/24) + * else: None + * Analog toggle: default False (exclude rows where data-is-analog="1"). +Deliverables +Pure function: +def parse_stparts(html: str, source_url: str, include_analogs: bool) -> list[OfferRow]: +Acceptance Criteria (with your local fixtures) +From tests/parser/page_with_disableFiltering.html, at least one row: + * brand == "Hyundai-KIA" + * article == "0PN1113H52" (from URL) + * name == "КОЛЬЦО ФОРСУНКИ МЕТАЛЛИЧЕСКОЕ" + * supplier == "UAE1893" + * price == Decimal("38.07") + * quantity == 243 + * deadline_days in {35, 36} + * is_analog is False when include_analogs=False +With include_analogs=False, analog rows are excluded. +Pause Gate: Wait for GO P3 +--- +P4 — Pipeline (fetch → parse → lowest 10 per article) + SSE progress +Purpose +Run the fetch+parse loop in RAM (parse-while-scraping), cap to lowest 10 prices per article, and stream progress. +Tasks +parser/pipeline/stparts_run.py: + * For each code: + 1. GET /search?pcode= + 2. If “Показать все варианты” link present → follow absolute href + * Else, if brand detected, attempt /search//?disableFiltering + * Else, stick to initial page + 3. Parse rows with include_analogs + * For each article, keep top-10 cheapest offers: + * Filter out price is None + * Sort by price ASC, quantity DESC, deadline days ASC + * Take first 10 + * Return list[OfferRow] with source="stparts". +Progress: the pipeline reports to ProgressReporter (codes processed, rows found, warnings). +Deliverables +Working pipeline callable by Celery task. +Acceptance Criteria +For N codes, results have ≤10 offers per article. +Analog toggle respected. +Pause Gate: Wait for GO P4 +--- +P5 — ClickHouse persistence + Excel export + cleanup +Purpose +Persist results (TTL 4 months), export Excel in the wide “top-10 suppliers” layout your users expect, and delete exports older than 5 days. +--- +ClickHouse DDL (final) +CREATE TABLE IF NOT EXISTS sup_stat.parser_offers +( + run_id UUID, + fetched_at DateTime DEFAULT now(), + source LowCardinality(String), + brand String, + article String, + name String, + price Decimal(12,2), + quantity UInt32, + supplier String, + rating Nullable(UInt8), -- 0..100 where available + deadline_days UInt16, + is_analog UInt8 +) +ENGINE = MergeTree +ORDER BY (article, source, supplier, price, deadline_days, fetched_at) +TTL fetched_at + INTERVAL 4 MONTH +SETTINGS index_granularity = 8192; + +Notes +run_id groups a single job execution; fetched_at drives TTL. +source remains LowCardinality; rating is optional and nullable (0–100). +We insert top-10 cheapest per article upstream, but the table stays append-only for history. +--- +Python model (aligned to DDL) +from pydantic import BaseModel +from decimal import Decimal + +class OfferRow(BaseModel): + source: str + brand: str | None + article: str + name: str | None + price: Decimal | None + quantity: int | None + supplier: str | None + rating: int | None # 0..100, None if not provided + deadline_days: int | None + is_analog: bool + +--- +Insert API +Module:parser/clickhouse_repo.py +insert_offers(run_id: UUID, rows: list[OfferRow]) -> None + Batch insert rows. fetched_at defaults to now() server-side. +(Adapter: clickhouse-connect or your preferred client. Insert Decimal as string if you want exactness, otherwise float is acceptable.) +--- +Excel export (wide “top-10 suppliers” layout) +Module:parser/excel_export.py +Layout per sheet (one sheet per source, e.g., "stparts"): +Row 1 (merged A1:…):source (e.g., stparts) +Row 2 (headers): +brand | article | price 1 | supplier 1 | quantity 1 | rating 1 | name 1 | price 2 | supplier 2 | quantity 2 | rating 2 | name 2 | ... | price 10 | supplier 10 | quantity 10 | rating 10 | name 10 +Rows ≥3: one row per (brand, article) with up to 10 offers filled (sorted by price ASC, tie-breakers: quantity DESC, deadline_days ASC). + Different suppliers may have different name i. +Shaping helper:pivot_offers_for_export(offers: Iterable[OfferRow]) -> pandas.DataFrame + Groups by (brand, article), picks top-10 cheapest offers, expands into the wide columns above. +Writer: +export_offers_xlsx(run_id: UUID, source: str, df_wide: pandas.DataFrame, export_dir: Path) -> Path + Writes: +Merge row 1 across all columns with source +Headers at row 2 +Data from row 3 +Freeze panes at row 3 +Two-decimal number format on all price i columns +--- +Cleanup +Module:parser/cleanup.py +delete_old_exports(older_than_days: int = 5) -> int + Deletes Excel files in EXPORT_DIR older than N days (configurable). Returns count deleted. +--- +End-to-end flow in this phase +Pipeline returns list[OfferRow] (already filtered to top-10 per article). +clickhouse_repo.insert_offers(run_id, rows) → persist to parser.offers. +pivot_offers_for_export(rows) → df_wide. +export_offers_xlsx(run_id, "stparts", df_wide, EXPORT_DIR) → path to {run_id}.xlsx. +Schedule delete_old_exports() daily (or run after each job) to prune files > N days. +--- +Acceptance Criteria +DB: Inserted rows visible in parser.offers with correct types; rating present when provided, NULL otherwise. +Excel: +Row 1 is a merged cell containing stparts. +Row 2 header exactly equals: +brand, article, price 1, supplier 1, quantity 1, rating 1, name 1, ..., price 10, supplier 10, quantity 10, rating 10, name 10 +For each (brand, article), at most 10 supplier blocks are filled, ordered by price ASC (tie: quantity DESC, then deadline_days ASC). +If fewer than 10 offers, remaining blocks are blank. +Different supplier names appear in their respective name i cells. +Cleanup: Files older than 5 days in EXPORT_DIR are deleted; the function reports the number removed. +Pause Gate: Wait for GO P5 +--- +Of course, Igor. Here is the plan for phase P5.5. + +# P5.5 Frontend UI for `stparts` Parser + +### Purpose + +Create a user interface for the `stparts` parser, allowing users to upload an Excel file with articles to parse, monitor the progress in real-time, and see a summary of the results. This phase will mirror the UX and technical implementation of the existing `emex_upload` feature. + +--- + +### Tasks + +1. **Update Dashboard (`core/templates/core/index.html`)**: + * Add a new "Парсеры" (Parsers Section) card to the main dashboard. + * The card will contain a link to a new view for the `stparts` parser. + +2. **Create Parser Views and URLs (`parser/views.py`, `parser/urls.py`)**: + * Create a view to display the file upload page for the `stparts` parser. + * Create a view to handle the POST request from the upload form. This view will: + * Accept an `.xlsx` file. + * Save the file temporarily. + * Dispatch a Celery task to process the file. + * Redirect the user to a status page where they can monitor the progress. + * Create an SSE (Server-Sent Events) view to stream the progress of the Celery task to the frontend, similar to the `emex_upload` implementation. + * Wire up these views in `parser/urls.py`. + +3. **Create HTML Templates (`parser/templates/parser/`)**: + * Create `stparts_upload.html`: A template with a form for uploading the `.xlsx` file. The form will have a single file input field. + * Create `stparts_upload_status.html`: A template to display the real-time progress of the parsing task. It will connect to the SSE endpoint and update the UI with the current step (e.g., "Парсим stparts"), the progress percentage, and a final report on how many articles failed to parse. + +4. **Update Celery Task (`parser/tasks.py`)**: + * The `run_stparts_job` task will be modified to: + * Accept a file path to the uploaded Excel file instead of a list of articles. + * Read the `brand` and `article` columns from the file. + * Use the `ProgressReporter` to send updates on the parsing progress (percentage of rows processed). + * Keep track of articles that fail to parse and include this count in the final result. + +--- + +### Deliverables + +* A new "Парсеры" section on the main dashboard. +* A new page at `/parser/` for uploading Excel files. +* A real-time progress monitoring page for parsing jobs. +* Updated Celery task capable of processing an uploaded file and reporting progress. + +--- + +### Acceptance Criteria + +* тЬЕ The main dashboard at `/` displays a new card for "Парсеры". +* тЬЕ Navigating to the parser page allows a user to upload an `.xlsx` file. +* тЬЕ Upon upload, a background job is started, and the user is redirected to a status page. +* тЬЕ The status page shows a "Парсер stparts" step with a progress percentage that updates in real-time. +* тЬЕ After the job is complete, the page displays the number of articles that failed to parse. + +--- +P6 — Celery orchestration (tasks.py) + endpoints (UI-facing, SSE) +Purpose +Use ProgressReporter for SSE, expose endpoints to start jobs, stream progress, and download results. +Tasks +parser/tasks/tasks.py (orchestrator only) + * @app.task(bind=True) +def run_stparts_job(self, job_id: str, codes: list[str], include_analogs: bool) -> dict: + # create run_id + # init ProgressReporter(job_id) + # report started + # call pipeline + # CH insert, Excel export + # report finished with download URL + # return metadata +parser/views.py + * POST /parser/jobs → { "codes": [...], "includeAnalogs": false } → returns { "jobId": "" } and enqueues task + * GET /parser/jobs/ → status JSON (lightweight) + * GET /parser/jobs//events → SSE stream from ProgressReporter + * GET /parser/jobs//result.xlsx → serves recent file by run_id +Wire routes in parser/urls.py. +Deliverables +Celery task wired to existing app. +Endpoints live and reachable. +Acceptance Criteria +Local run with 2–3 codes (using saved HTML) streams started → progress → finished. +Excel is downloadable. +Old exports auto-deleted by cleanup. +Pause Gate: Wait for GO P6 +--- +P7 — Scheduling (django-celery-beat) + minimal UI +Purpose +Allow scheduled runs and a basic page to try the flow; Analogs checkbox OFF by default. +Tasks +django-celery-beat entry to call run_stparts_job daily/cron with configured codes and include_analogs flag (default False). +Minimal UI at /parser/: + * upload Excel (.xlsx) with brand,article + * Checkbox "Include analogs" (unchecked by default) + * Start button (POST /parser/jobs) + * Progress console (SSE) + * Download link when finished +Deliverables +Beat task registered. +Simple page to drive the flow. +Acceptance Criteria +Scheduled job fires and produces DB + Excel output. +By default, analogs are excluded; enabling checkbox includes them (still top-10 per article). +Pause Gate: Wait for GO P7 +--- +Approval Protocol (chat-based) +Approve a phase: GO P +Request changes: REVISE P: +Abort: ABORT + The agent must not proceed to the next phase without your explicit GO. +--- +Notes on Compliance & Performance +Parse-while-scraping: do not persist HTML; parse immediately, discard from memory. +html = await fetch_html(url) +rows = parse_stparts(html, url, include_analogs) +html = None +Rotate proxies; back off on 429; swap proxy or TLS client on repeated 403; only consider headless fallback if you instruct. +--- +Quick Parsing Example (from our fixture) +Given the disableFiltering page for 0PN1113H52, the agent extracts: +Select: table#searchResultsTable tbody tr.resultTr2 +Row attrs (examples): + * data-brand="Hyundai-KIA" + * data-output-price="38.07" → Decimal("38.07") + * data-availability="243" → 243 + * data-deadline="840" and maybe data-deadline-max="864" → deadline_days = ceil(max/24) = 35 or 36 + * data-is-analog="0" +Cells: + * td.resultDescription → "КОЛЬЦО ФОРСУНКИ МЕТАЛЛИЧЕСКОЕ" + * td.resultWarehouse → "UAE1893" → supplier +Article is stamped from URL (.../search/Hyundai-KIA/0PN1113H52?disableFiltering → "0PN1113H52"). +With Analogs OFF (default), rows with data-is-analog="1" are not included. +Post-parse, the pipeline keeps lowest 10 by price ASC, quantity DESC, deadline_days ASC. diff --git a/parser/services.py b/parser/services.py new file mode 100644 index 0000000..ed27bf3 --- /dev/null +++ b/parser/services.py @@ -0,0 +1,108 @@ +"""Main service logic for the parser application.""" +import asyncio +import uuid + +import pandas as pd +from django.conf import settings +from loguru import logger + +from core.reporting import ProgressReporter, ReportStatus +from parser.clickhouse_repo import insert_offers +from parser.excel_export import export_offers_xlsx, pivot_offers_for_export +from parser.stparts_pipeline import run_stparts_pipeline + + +class PipelineReporter: + """ + A wrapper for ProgressReporter to adapt the pipeline's reporting + to the desired UI steps and to collect failures. + """ + + def __init__(self, main_reporter: ProgressReporter): + self._main_reporter = main_reporter + self.failed_articles: list[str] = [] + + def report_percentage(self, *, step: str, progress: int) -> None: + # The pipeline uses step="FETCHING", but the UI wants "Парсим stparts". + self._main_reporter.report_percentage(step="Парсим stparts", progress=progress) + + def report_step(self, **kwargs) -> None: + # Capture failures reported by the pipeline. + if kwargs.get("status") == "FAILURE" and "article" in kwargs.get("details", {}): + self.failed_articles.append(kwargs["details"]["article"]) + # Do not pass other step reports through, as the main task manages top-level steps. + + +def run_stparts_parsing_service(file_path: str, include_analogs: bool, reporter: ProgressReporter) -> dict: + """ + Orchestrates the stparts parsing process: reads a file, runs the pipeline, + and generates an Excel report. + """ + run_id = uuid.uuid4() + + # --- 1. Read File --- + reading_file_step_name = "Читаем файл" + reporter.report_step(step=reading_file_step_name, status=ReportStatus.IN_PROGRESS) + try: + df = pd.read_excel(file_path) + if "article" not in df.columns: + raise ValueError("В файле отсутствует колонка 'article'.") + articles = df["article"].dropna().astype(str).tolist() + if not articles: + raise ValueError("В файле нет артикулов для парсинга.") + reporter.report_step(step=reading_file_step_name, status=ReportStatus.SUCCESS) + except Exception as e: + logger.error(f"Failed to read or process Excel file {file_path}: {e}") + reporter.report_failure(step=reading_file_step_name, details={"error": str(e)}) + raise + + # --- 2. Parse stparts --- + parsing_stparts_step_name = "Парсим stparts" + reporter.report_step(step=parsing_stparts_step_name, status=ReportStatus.IN_PROGRESS) + reporter.report_percentage(step=parsing_stparts_step_name, progress=0) + try: + pipeline_reporter = PipelineReporter(reporter) + offers = asyncio.run( + run_stparts_pipeline(articles=articles, include_analogs=include_analogs, reporter=pipeline_reporter) + ) + reporter.report_step(step=parsing_stparts_step_name, status=ReportStatus.SUCCESS) + failed_count = len(set(pipeline_reporter.failed_articles)) + except Exception as e: + logger.error(f"Stparts pipeline failed: {e}", exc_info=True) + reporter.report_failure(step=parsing_stparts_step_name, details={"error": str(e)}) + raise + + # --- 3. Insert into ClickHouse (DISABLED) --- + # Per user request, this step is currently disabled. + # It can be enabled when ready. + # saving_to_db_step_name = "Сохраняем в базу данных" + # reporter.report_step(step=saving_to_db_step_name, status=ReportStatus.IN_PROGRESS) + # try: + # insert_offers(run_id, offers) + # reporter.report_step(step=saving_to_db_step_name, status=ReportStatus.SUCCESS) + # except Exception as e: + # logger.error(f"Failed to insert offers into ClickHouse: {e}", exc_info=True) + # reporter.report_failure(step=saving_to_db_step_name, details={"error": str(e)}) + # # Do not re-raise, as this step is non-critical for now. + + # --- 4. Write to Excel --- + writing_to_excel_step_name = "Записываем результат в Excel" + reporter.report_step(step=writing_to_excel_step_name, status=ReportStatus.IN_PROGRESS) + reporter.report_percentage(step=writing_to_excel_step_name, progress=0) + try: + df_wide = pivot_offers_for_export(offers) + + export_dir = settings.MEDIA_ROOT / "exports" + export_dir.mkdir(parents=True, exist_ok=True) + + export_path = export_offers_xlsx(run_id, "stparts", df_wide, export_dir) + export_url = f"{settings.MEDIA_URL}exports/{export_path.name}" + + reporter.report_percentage(step=writing_to_excel_step_name, progress=100) + reporter.report_step(step=writing_to_excel_step_name, status=ReportStatus.SUCCESS) + except Exception as e: + logger.error(f"Failed to export Excel file: {e}", exc_info=True) + reporter.report_failure(step=writing_to_excel_step_name, details={"error": str(e)}) + raise + + return {"failed_count": failed_count, "export_url": export_url} diff --git a/parser/stparts.py b/parser/stparts.py new file mode 100644 index 0000000..6418442 --- /dev/null +++ b/parser/stparts.py @@ -0,0 +1,139 @@ +"""Contains the parsing logic specific to the stparts.ru website.""" + +import math +import re + +from selectolax.parser import HTMLParser, Node + +from .http import requested_article +from .types import OfferRow + +# Optional sign, digits, optional decimal part with '.' or ',' (e.g., "42", "-7", "3.14", "38,07"). +_NUM_RE = re.compile(r"[-+]?\d+(?:[.,]\d+)?") + + +def _get_node_text(node: Node | None) -> str | None: + """Safely extracts the text from a node, returning None if the node is missing.""" + if node is None: + return None + return node.text().strip() + + +def _parse_deadline(node: Node) -> int | None: + """ + Parses the deadline from a row node, converting hours to days. + It tolerates non-digit characters and whitespace. + """ + deadline_str = node.attributes.get("data-deadline") + deadline_max_str = node.attributes.get("data-deadline-max") + + def _hours(hours_str: str | None) -> int | None: + if not hours_str: + return None + match = _NUM_RE.search(hours_str.strip()) + if not match: + return None + try: + # Use float to handle potential decimals in string, then convert to int + return int(float(match.group(0).replace(",", "."))) + except (ValueError, TypeError): + return None + + hours = _hours(deadline_str) + hours_max = _hours(deadline_max_str) + + if hours is None and hours_max is None: + return None + + # Use the larger of the two values, ignoring any Nones + effective_max_hours = max(h for h in (hours, hours_max) if h is not None) + max_deadline_days = math.ceil(effective_max_hours / 24) + return max_deadline_days + + +def _parse_price(node: Node) -> float | None: + """ + Parses and converts the price from a row node to a float. + It tolerates common currency formatting like commas and spaces. + """ + price_str = node.attributes.get("data-output-price") + if not price_str: + return None + # Allow "38,07" and "38 070.00" + cleaned_price = re.sub(r"\s+", "", price_str).replace(",", ".") + try: + return float(cleaned_price) + except (ValueError, TypeError): + return None + + +def _parse_quantity(node: Node) -> int | None: + """ + Parses and converts the quantity from a row node to an integer. + It extracts numeric values from strings like "50+" or " 100 ". + """ + quantity_str = node.attributes.get("data-availability") + if not quantity_str: + return None + match = _NUM_RE.search(quantity_str.strip()) + if not match: + return None + try: + return int(float(match.group(0).replace(",", "."))) + except (ValueError, TypeError): + return None + + +def parse_stparts(tree: HTMLParser, source_url: str, include_analogs: bool) -> list[OfferRow]: + """ + Parses the HTML content from an stparts.ru search results page into a list of OfferRow objects. + + Args: + tree: A pre-parsed selectolax HTMLParser object of the page. + source_url: The URL from which the HTML was fetched, used to extract the article code. + include_analogs: A boolean flag to indicate whether to include analog parts in the results. + + Returns: + A list of Pydantic OfferRow models representing the parsed offers. + """ + row_selector = "tbody > tr.resultTr2" + desc_selector = "td.resultDescription" + warehouse_selector = "td.resultWarehouse" # aka provider + results: list[OfferRow] = [] + + try: + article = requested_article(source_url) + except ValueError: + return [] + + table = tree.css_first("table#searchResultsTable") + if not table: + return [] + + for row_node in table.css(row_selector): + is_analog = row_node.attributes.get("data-is-analog") == "1" + if not include_analogs and is_analog: + continue + + # Try to get brand from the `data-brand` attribute first as a fallback. + brand = row_node.attributes.get("data-brand") + if not brand: + # The primary, more reliable location is the text within the resultBrand cell. + brand_node = row_node.css_first("td.resultBrand a") + if brand_node: + brand = brand_node.text(strip=True) + + offer = OfferRow( + b=brand, + a=article, + name=_get_node_text(row_node.css_first(desc_selector)), + price=_parse_price(row_node), + quantity=_parse_quantity(row_node), + provider=_get_node_text(row_node.css_first(warehouse_selector)), + rating=None, + delivery=_parse_deadline(row_node), + is_analog=is_analog, + ) + results.append(offer) + + return results diff --git a/parser/stparts_pipeline.py b/parser/stparts_pipeline.py new file mode 100644 index 0000000..c418e2f --- /dev/null +++ b/parser/stparts_pipeline.py @@ -0,0 +1,123 @@ +"""Orchestrates the stparts.ru parsing pipeline: fetch -> parse -> shape.""" + +import asyncio +import itertools +from typing import Any + +import httpx +from loguru import logger +from selectolax.parser import HTMLParser + +from .http import ProxyPool, ProxySession +from .stparts import parse_stparts +from .types import OfferRow + + +class MockProgressReporter: + """A mock reporter for testing purposes when a real Celery task is not available.""" + + def report_percentage(self, *, step: str, progress: int) -> None: + print(f"[PROGRESS] {step}: {progress}%") + + def report_step(self, **kwargs: Any) -> None: + print(f"[STEP] {kwargs}") + + +def _find_show_all_href(html: str) -> str | None: + """Finds the redirect link for 'show all options' in the page.""" + tree = HTMLParser(html) + for a in tree.css("a[href]"): + if "показать все варианты" in a.text(strip=True).lower(): + href = a.attributes.get("href") + return href if href else None + return None + + +async def _fetch_and_parse_article(article: str, session: ProxySession, include_analogs: bool) -> list[OfferRow]: + """Fetches and parses a single article code, handling potential redirects and security checks.""" + base_url = "https://stparts.ru" + initial_url = f"{base_url}/search" + params = {"pcode": article} + + # Start with the initial URL + parsable_url = str(httpx.URL(initial_url, params=params)) + html = await session.fetch_html(initial_url, params=params) + + redirect_href = _find_show_all_href(html) + if redirect_href: + redirect_url = redirect_href + if redirect_url.startswith("/"): + redirect_url = f"{base_url}{redirect_url}" + + # Update the URL to be parsed and fetch the new content + parsable_url = redirect_url + html = await session.fetch_html(redirect_url) + # --- DEBUGGING START --- + logger.info(f"--- FINAL HTML for article {article} to be parsed ---\n{html}\n--- END HTML - --") + # --- DEBUGGING END --- + + + tree = HTMLParser(html) + return parse_stparts(tree, parsable_url, include_analogs) + + +async def run_stparts_pipeline( + articles: list[str], + include_analogs: bool, + reporter: Any = None, + concurrency_limit: int = 10, +) -> list[OfferRow]: + """ + Runs the full fetch and parse pipeline for a list of article codes. + """ + if reporter is None: + reporter = MockProgressReporter() + + pool = await ProxyPool.from_db() + semaphore = asyncio.Semaphore(concurrency_limit) + all_offers: list[OfferRow] = [] + total_articles = len(articles) + completed_count = 0 + lock = asyncio.Lock() + + reporter.report_step(step="FETCHING", status="IN_PROGRESS") + + async def task(article: str) -> None: + nonlocal completed_count + async with semaphore: + session = pool.acquire() + if session is None: + async with lock: + completed_count += 1 + return + + try: + offers = await _fetch_and_parse_article(article, session, include_analogs) + all_offers.extend(offers) + except Exception as e: + logger.exception(f"Failed to process article {article}: {e}") + reporter.report_step(step="FETCHING", status="FAILURE", details={"error": str(e), "article": article}) + finally: + pool.release(session) + async with lock: + completed_count += 1 + progress_pct = int((completed_count / total_articles) * 100) + reporter.report_percentage(step="FETCHING", progress=progress_pct) + + await asyncio.gather(*[task(article) for article in articles]) + await pool.close_all() + + reporter.report_step(step="FILTERING", status="IN_PROGRESS") + + priced_offers = [offer for offer in all_offers if offer.price is not None] + + # Sort by article, then by price (asc), quantity (desc), and deadline (asc) + priced_offers.sort(key=lambda o: (o.a, o.price, -(o.quantity or 0), o.delivery or 999)) + + final_results: list[OfferRow] = [] + for _, group in itertools.groupby(priced_offers, key=lambda o: o.a): + final_results.extend(list(group)[:10]) + + reporter.report_step(step="FILTERING", status="SUCCESS") + + return final_results diff --git a/parser/tasks.py b/parser/tasks.py new file mode 100644 index 0000000..93a3fff --- /dev/null +++ b/parser/tasks.py @@ -0,0 +1,52 @@ +"""Celery tasks for the parser app.""" +from loguru import logger + +from parser.services import run_stparts_parsing_service + +try: + from config.third_party_config.celery import app + from core.reporting import ProgressReporter +except ImportError: + # Fallback for environments where celery is not configured + class DummyApp: + def task(self, *args, **kwargs): + def decorator(func): + return func + + return decorator + + app = DummyApp() + + class ProgressReporter: + """ + Fallback implementation of the ProgressReporter interface for testing in isolation. + """ + def __init__(self, task, **kwargs): + self.task = task + + def report_percentage(self, step: str, progress: int): + self.task.update_state(state="PROGRESS", meta={"step": step, "progress": progress}) + + def report_step(self, step: str, status: str, **kwargs): + self.task.update_state(state="PROGRESS", meta={"step": step, "status": status, **kwargs}) + + def report_failure(self, step: str, details: dict): + self.task.update_state(state="FAILURE", meta={"step": step, "details": details}) + + +@app.task(bind=True) +def run_stparts_parser_task(self, file_path: str, include_analogs: bool): + """ + Celery task that orchestrates the stparts parsing process by calling the main service function. + """ + logger.info(f"Starting stparts parser task for file: {file_path}") + try: + reporter = ProgressReporter(task=self, delay_between_report_steps_sec=0.5) + result = run_stparts_parsing_service(file_path=file_path, include_analogs=include_analogs, reporter=reporter) + logger.info(f"Finished stparts parser task for file: {file_path}. Result: {result}") + return result + except Exception: + logger.error(f"Stparts parser task for {file_path} failed catastrophically.", exc_info=True) + # The service layer should have already reported the failure step via the reporter. + # We re-raise to mark the task as FAILED in Celery. + raise \ No newline at end of file diff --git a/parser/templates/parser/stparts_upload.html b/parser/templates/parser/stparts_upload.html new file mode 100644 index 0000000..0059e96 --- /dev/null +++ b/parser/templates/parser/stparts_upload.html @@ -0,0 +1,67 @@ +{% extends "core/layouts/blank.html" %} +{% block title %}Парсер Stparts{% endblock %} + +{% block content %} +
+ + + + {% if messages %} +
+ {% for message in messages %} + + {% endfor %} +
+ {% endif %} + +
+

Парсер Stparts

+

Выберите .xlsx файл для загрузки и обработки (макс. размер 1GB).

+ +
+ {% csrf_token %} +
+ {{ form.file }} +
+ +
+ +
+ +
+ +
+ +
+
+
+ + +{% endblock %} diff --git a/parser/templates/parser/stparts_upload_status.html b/parser/templates/parser/stparts_upload_status.html new file mode 100644 index 0000000..4e44bd7 --- /dev/null +++ b/parser/templates/parser/stparts_upload_status.html @@ -0,0 +1,214 @@ +{% extends "core/layouts/blank.html" %} +{% load static %} + +{% block title %}Парсер Stparts - Обработка файла{% endblock %} + +{% block content %} + + +
+ + +
+

Обработка файла

+

Пожалуйста, подождите, пока мы парсим данные. Это может занять несколько минут.

+ +
    +
  • +
    + +
    +
    +
    Читаем файл
    +
    Проверяем .xlsx файл и список артикулов
    +
    +
    +
    +
  • +
  • +
    +
    + +
    +
    +
    Парсим stparts
    +
    + + 0% +
    +
    +
    +
    +
  • +
  • +
    +
    + +
    +
    +
    Записываем результат в Excel
    +
    + + 0% +
    +
    +
    +
    +
  • +
  • +
    +
    + +
    +
    +
    Завершено
    +
    +
  • +
+ +
+ +
+
+ + + +{% endblock %} \ No newline at end of file diff --git a/parser/tests.py b/parser/tests.py new file mode 100644 index 0000000..a39b155 --- /dev/null +++ b/parser/tests.py @@ -0,0 +1 @@ +# Create your tests here. diff --git a/parser/types.py b/parser/types.py new file mode 100644 index 0000000..1ce0e2b --- /dev/null +++ b/parser/types.py @@ -0,0 +1,17 @@ +"""Pydantic models for data validation and serialization in the parser app.""" + +from pydantic import BaseModel + + +class OfferRow(BaseModel): + """Represents a single offer row parsed from a supplier website.""" + + b: str | None # brand + a: str # article + price: float | None + quantity: int | None + delivery: int | None # deadline_days + provider: str | None # supplier + rating: float | None + name: str | None + is_analog: bool diff --git a/parser/urls.py b/parser/urls.py new file mode 100644 index 0000000..34143f8 --- /dev/null +++ b/parser/urls.py @@ -0,0 +1,9 @@ +from django.urls import path +from . import views + +app_name = "parser" + +urlpatterns = [ + path("", views.upload_file_view, name="upload"), + path("task-status//", views.task_status_view, name="task_status"), +] \ No newline at end of file diff --git a/parser/views.py b/parser/views.py new file mode 100644 index 0000000..b06cd3e --- /dev/null +++ b/parser/views.py @@ -0,0 +1,85 @@ +import json +import time +import logging + +from celery.result import AsyncResult +from django.conf import settings +from django.contrib.auth.decorators import login_required +from django.core.files.storage import FileSystemStorage +from django.http import StreamingHttpResponse +from django.shortcuts import render +from django.views.decorators.http import require_GET + +from .forms import FileUploadForm +from .tasks import run_stparts_parser_task + +logger = logging.getLogger(__name__) + + +@login_required +def upload_file_view(request): + """ + Handle the file upload by dispatching a Celery task for parsing + and rendering a progress-monitoring page. + """ + if request.method == "POST": + form = FileUploadForm(request.POST, request.FILES) + if form.is_valid(): + uploaded_file = request.FILES["file"] + include_analogs = form.cleaned_data["include_analogs"] + + # Define the temporary directory and ensure it exists + temp_dir = settings.MEDIA_ROOT / "temp_uploads" + temp_dir.mkdir(parents=True, exist_ok=True) + + # Save the file temporarily + fs = FileSystemStorage(location=temp_dir) + filename = fs.save(uploaded_file.name, uploaded_file) + file_path = fs.path(filename) + + # Start the parsing task + task = run_stparts_parser_task.delay(file_path, include_analogs) + + # Render the status page + return render( + request, + "parser/stparts_upload_status.html", + {"task_id": task.id}, + ) + else: + form = FileUploadForm() + + return render( + request, + "parser/stparts_upload.html", + {"form": form}, + ) + + +@require_GET +def task_status_view(request, task_id: str): + """ + Streams the status of a Celery task using Server-Sent Events (SSE). + """ + + def sse_stream(): + while True: + task_result = AsyncResult(task_id) + if task_result.ready(): + # Send final event and close connection + data = {"status": task_result.state, "result": task_result.result} + yield f"data: {json.dumps(data)}\n\n" + break + else: + # Send progress update + if task_result.info: + data = {"status": task_result.state, "meta": task_result.info} + yield f"data: {json.dumps(data)}\n\n" + + # Send a heartbeat to keep the connection alive + yield ": ping\n\n" + time.sleep(1) + + response = StreamingHttpResponse(sse_stream(), content_type="text/event-stream") + response["Cache-Control"] = "no-cache" + return response diff --git a/pyproject.toml b/pyproject.toml index 32455d0..752201b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,12 @@ dependencies = [ "pandas>=2.2.3", "pillow>=11.2.1", "psycopg[binary,pool]>=3.2.9", + "pydantic>=2.12.0", "redis>=6.1.0", + "httpx[http2]>=0.27.0", + "selectolax>=0.4.0", + "xlsxwriter>=3.2.9", + "asyncpg>=0.30.0", ] [tool.uv] @@ -41,6 +46,7 @@ dev = [ "faker>=37.1.0", "pre-commit>=4.2.0", "pytest>=8.3.5", + "pytest-asyncio>=1.2.0", "pytest-django>=4.11.1", "pytest-xdist>=3.6.1", "ruff>=0.11.8", diff --git a/pytest.ini b/pytest.ini index c0528c8..118392a 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,6 @@ [pytest] +asyncio_mode = auto +asyncio_default_fixture_loop_scope = function DJANGO_SETTINGS_MODULE = config.django_config.test python_files = test_*.py python_classes = Test* diff --git a/requirements.txt b/requirements.txt index 0a5500c..005b680 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,21 +2,30 @@ # uv pip compile pyproject.toml --output-file requirements.txt amqp==5.3.1 # via kombu +annotated-types==0.7.0 + # via pydantic +anyio==4.11.0 + # via httpx asgiref==3.8.1 # via # django # django-allauth # django-htmx +asyncpg==0.30.0 + # via admin2 (pyproject.toml) billiard==4.2.1 # via celery -celery==5.5.3 +celery==5.5.2 # via # admin2 (pyproject.toml) # django-celery-beat # django-celery-results certifi==2025.8.3 - # via clickhouse-connect -click==8.2.1 + # via + # clickhouse-connect + # httpcore + # httpx +click==8.2.0 # via # celery # click-didyoumean @@ -38,7 +47,7 @@ colorama==0.4.6 # loguru cron-descriptor==1.4.5 # via django-celery-beat -django==5.2.3 +django==5.2 # via # admin2 (pyproject.toml) # django-allauth @@ -49,7 +58,7 @@ django==5.2.3 # django-template-partials # django-timezone-field # djangorestframework -django-allauth==65.9.0 +django-allauth==65.7.0 # via admin2 (pyproject.toml) django-celery-beat==2.8.1 # via admin2 (pyproject.toml) @@ -77,11 +86,27 @@ et-xmlfile==2.0.0 # via openpyxl eventlet==0.40.0 # via admin2 (pyproject.toml) -greenlet==3.2.3 +greenlet==3.2.2 # via eventlet gunicorn==23.0.0 # via admin2 (pyproject.toml) -kombu==5.5.4 +h11==0.16.0 + # via httpcore +h2==4.3.0 + # via httpx +hpack==4.1.0 + # via h2 +httpcore==1.0.9 + # via httpx +httpx==0.28.1 + # via admin2 (pyproject.toml) +hyperframe==6.1.0 + # via h2 +idna==3.10 + # via + # anyio + # httpx +kombu==5.5.3 # via celery loguru==0.7.3 # via admin2 (pyproject.toml) @@ -89,7 +114,7 @@ lz4==4.4.4 # via clickhouse-connect markdown==3.8.2 # via admin2 (pyproject.toml) -numpy==2.3.0 +numpy==2.2.5 # via # admin2 (pyproject.toml) # clickhouse-driver @@ -97,10 +122,8 @@ numpy==2.3.0 openpyxl==3.1.5 # via admin2 (pyproject.toml) packaging==25.0 - # via - # gunicorn - # kombu -pandas==2.3.0 + # via gunicorn +pandas==2.2.3 # via # admin2 (pyproject.toml) # clickhouse-driver @@ -114,6 +137,10 @@ psycopg-binary==3.2.9 # via psycopg psycopg-pool==3.2.6 # via psycopg +pydantic==2.12.0 + # via admin2 (pyproject.toml) +pydantic-core==2.41.1 + # via pydantic python-crontab==3.3.0 # via django-celery-beat python-dateutil==2.9.0.post0 @@ -125,16 +152,26 @@ pytz==2025.2 # clickhouse-connect # clickhouse-driver # pandas -redis==6.2.0 +redis==6.1.0 + # via admin2 (pyproject.toml) +selectolax==0.4.0 # via admin2 (pyproject.toml) six==1.17.0 # via python-dateutil +sniffio==1.3.1 + # via anyio sqlparse==0.5.3 # via django -typing-extensions==4.14.0 +typing-extensions==4.15.0 # via + # anyio # psycopg # psycopg-pool + # pydantic + # pydantic-core + # typing-inspection +typing-inspection==0.4.2 + # via pydantic tzdata==2025.2 # via # django @@ -156,5 +193,7 @@ wcwidth==0.2.13 # via prompt-toolkit win32-setctime==1.2.0 # via loguru +xlsxwriter==3.2.9 + # via admin2 (pyproject.toml) zstandard==0.25.0 # via clickhouse-connect diff --git a/tests/parser/test_cleanup.py b/tests/parser/test_cleanup.py new file mode 100644 index 0000000..e3ec11b --- /dev/null +++ b/tests/parser/test_cleanup.py @@ -0,0 +1,43 @@ +import os +import time +from pathlib import Path + +from parser.cleanup import delete_old_exports + + +def test_delete_old_exports(tmp_path: Path): + """Verify that old export files are deleted while new ones are kept.""" + export_dir = tmp_path + now = time.time() + age_threshold_sec = 5 * 24 * 60 * 60 + + # Create some files + (export_dir / "new_export_1.xlsx").touch() + (export_dir / "new_export_2.txt").touch() # Should be ignored + + old_file_1 = export_dir / "old_export_1.xlsx" + old_file_2 = export_dir / "old_export_2.xlsx" + old_file_1.touch() + old_file_2.touch() + + # Set modification time of old files to be 6 days ago + six_days_ago = now - (age_threshold_sec + 24 * 60 * 60) + os.utime(old_file_1, (six_days_ago, six_days_ago)) + os.utime(old_file_2, (six_days_ago, six_days_ago)) + + deleted_count = delete_old_exports(export_dir, older_than_days=5) + + assert deleted_count == 2 + assert (export_dir / "new_export_1.xlsx").exists() + assert (export_dir / "new_export_2.txt").exists() # Non-xlsx file should not be deleted + assert not old_file_1.exists() + assert not old_file_2.exists() + + +def test_delete_old_exports_no_directory(): + """Verify that the function returns 0 if the directory does not exist.""" + non_existent_dir = Path("/path/to/non_existent_dir") + + deleted_count = delete_old_exports(non_existent_dir) + + assert deleted_count == 0 diff --git a/tests/parser/test_clickhouse_repo.py b/tests/parser/test_clickhouse_repo.py new file mode 100644 index 0000000..8432536 --- /dev/null +++ b/tests/parser/test_clickhouse_repo.py @@ -0,0 +1,109 @@ +from unittest.mock import MagicMock, patch +from uuid import uuid4 + +from parser.clickhouse_repo import insert_offers +from parser.types import OfferRow + + +def test_insert_offers_calls_clickhouse_client_correctly(): + """Verify that `insert_offers` formats data correctly and calls the client's insert method.""" + mock_client = MagicMock() + # Patch the context manager + with patch("parser.clickhouse_repo.get_clickhouse_client", return_value=mock_client) as mock_get_client: + run_id = uuid4() + rows = [ + OfferRow( + b="BrandA", + a="ART1", + name="Part A", + price=100.50, + quantity=10, + provider="WH1", + rating=None, + delivery=5, + is_analog=False, + ), + OfferRow( + b="BrandB", + a="ART2", + name="Part B", + price=200.00, + quantity=2, + provider="WH2", + rating=90.0, + delivery=2, + is_analog=True, + ), + ] + + insert_offers(run_id, rows) + + # Check that the client was retrieved with readonly=0 + mock_get_client.assert_called_once_with(readonly=0) + + # Check that the insert method was called + mock_client.__enter__.return_value.insert.assert_called_once() + + # Check the data passed to the insert method + call_args = mock_client.__enter__.return_value.insert.call_args + table_name = call_args.args[0] + inserted_data = call_args.args[1] + column_names = call_args.kwargs["column_names"] + + assert table_name == "dif.stparts_percentage" + assert len(inserted_data) == 2 + assert column_names == [ + "run_id", + "b", + "a", + "price", + "quantity", + "delivery", + "provider", + "rating", + "name", + "is_analog", + ] + # Check data for the second row + assert inserted_data[1]["run_id"] == run_id + assert inserted_data[1]["b"] == "BrandB" + assert inserted_data[1]["is_analog"] == 1 # Bool converted to int + + +def test_insert_offers_handles_empty_list(): + """Verify that `insert_offers` does not call the client if the row list is empty.""" + mock_client = MagicMock() + with patch("parser.clickhouse_repo.get_clickhouse_client", return_value=mock_client): + insert_offers(uuid4(), []) + + mock_client.__enter__.return_value.insert.assert_not_called() + + +def test_insert_offers_handles_nullable_fields(): + """Verify that None values for nullable fields are handled correctly.""" + mock_client = MagicMock() + with patch("parser.clickhouse_repo.get_clickhouse_client", return_value=mock_client): + run_id = uuid4() + rows_with_nulls = [ + OfferRow( + b="BrandC", + a="ART3", + name="Part C", + price=300.00, + quantity=None, # Nullable field + provider="WH3", + rating=None, + delivery=None, # Nullable field + is_analog=False, + ) + ] + + insert_offers(run_id, rows_with_nulls) + + mock_client.__enter__.return_value.insert.assert_called_once() + call_args = mock_client.__enter__.return_value.insert.call_args + inserted_data = call_args.args[1] + + assert len(inserted_data) == 1 + assert inserted_data[0]["quantity"] is None + assert inserted_data[0]["delivery"] is None diff --git a/tests/parser/test_excel_export.py b/tests/parser/test_excel_export.py new file mode 100644 index 0000000..aee5d21 --- /dev/null +++ b/tests/parser/test_excel_export.py @@ -0,0 +1,144 @@ +from uuid import uuid4 + +import openpyxl +import pandas as pd +import pytest + +from parser.excel_export import export_offers_xlsx, pivot_offers_for_export +from parser.types import OfferRow + + +@pytest.fixture +def sample_offers() -> list[OfferRow]: + """Provides a list of sample OfferRow objects for testing.""" + return [ + # Article 1 has 3 offers + OfferRow( + b="BrandA", + a="ART1", + name="Part A1", + price=10.0, + quantity=100, + provider="WH1", + delivery=1, + is_analog=False, + rating=90.0, + ), + OfferRow( + b="BrandA", + a="ART1", + name="Part A2", + price=12.0, + quantity=50, + provider="WH2", + delivery=2, + is_analog=False, + rating=80.0, + ), + OfferRow( + b="BrandA", + a="ART1", + name="Part A3", + price=11.0, + quantity=200, + provider="WH3", + delivery=3, + is_analog=False, + rating=85.0, + ), + # Article 2 has 1 offer + OfferRow( + b="BrandB", + a="ART2", + name="Part B", + price=50.0, + quantity=10, + provider="WH4", + delivery=5, + is_analog=False, + rating=95.0, + ), + ] + + +def test_pivot_offers_for_export(sample_offers): + """Verify that the pivot function correctly transforms long data to wide format.""" + df_wide = pivot_offers_for_export(sample_offers) + + assert len(df_wide) == 2 # One row for each unique article + assert "price 3" in df_wide.columns + assert "price 11" not in df_wide.columns + + # Check data for ART1 + art1_row = df_wide[df_wide["article"] == "ART1"].iloc[0] + # The offers should be sorted by price, so 10.0, 11.0, 12.0 + assert art1_row["price 1"] == 10.0 + assert art1_row["supplier 1"] == "WH1" + assert art1_row["price 2"] == 11.0 + assert art1_row["supplier 2"] == "WH3" + assert art1_row["price 3"] == 12.0 + assert art1_row["supplier 3"] == "WH2" + + # Check data for ART2 + art2_row = df_wide[df_wide["article"] == "ART2"].iloc[0] + assert art2_row["price 1"] == 50.0 + assert art2_row["supplier 1"] == "WH4" + assert pd.isna(art2_row["price 2"]) + + +def test_export_offers_xlsx(sample_offers, tmp_path): + """Verify that the Excel export function creates a correctly formatted file.""" + run_id = uuid4() + source = "stparts" + df_wide = pivot_offers_for_export(sample_offers) + + file_path = export_offers_xlsx(run_id, source, df_wide, tmp_path) + + assert file_path.exists() + assert file_path.name == f"{run_id}.xlsx" + + # --- Verify Excel content and formatting --- # + wb = openpyxl.load_workbook(file_path) + ws = wb[source] + + # 1. Check merged header + merged_header = ws["A1"].value + assert merged_header == f"Source: {source}" + # Check the merge range directly + assert any(cr.coord == "A1:AZ1" for cr in ws.merged_cells.ranges) + + # 2. Check data headers (should be in row 2) + assert ws["A2"].value == "brand" + assert ws["C2"].value == "price 1" + + # 3. Check data content + assert ws["B3"].value == "ART1" + assert ws["C3"].value == 10.0 + + # 4. Check frozen panes + assert ws.freeze_panes == "A3" + + # 5. Check number format on a price column + price_cell = ws["C3"] + assert price_cell.number_format == "#,##0.00" + + +def test_export_offers_xlsx_empty_dataframe(tmp_path): + """Verify that an empty DataFrame results in a valid Excel file with only headers.""" + run_id = uuid4() + source = "stparts" + df_empty = pivot_offers_for_export([]) # This creates an empty df with columns + + file_path = export_offers_xlsx(run_id, source, df_empty, tmp_path) + + assert file_path.exists() + wb = openpyxl.load_workbook(file_path) + ws = wb[source] + + # Check that the merged header is there + assert ws["A1"].value == f"Source: {source}" + # Check that the data headers are there + assert ws["A2"].value == "brand" + assert ws["B2"].value == "article" + # Check that there is no data in the first data row + assert ws["A3"].value is None diff --git a/tests/parser/test_http.py b/tests/parser/test_http.py new file mode 100644 index 0000000..6602c1f --- /dev/null +++ b/tests/parser/test_http.py @@ -0,0 +1,98 @@ +from unittest.mock import AsyncMock, MagicMock, patch + +import httpx +import pytest + +from parser.http import ProxySession, requested_article + + +@pytest.mark.parametrize( + "url, expected_article", + [ + ("https://stparts.ru/search?pcode=0PN1113H52", "0PN1113H52"), + ("https://stparts.ru/search/Hyundai-KIA/0PN1113H52?disableFiltering", "0PN1113H52"), + ("/search/Hyundai-KIA/0PN1113H52?disableFiltering", "0PN1113H52"), + ("/search?pcode=ABC-123", "ABC-123"), + ], +) +def test_requested_article_success(url: str, expected_article: str) -> None: + result = requested_article(url) + assert result == expected_article + + +def test_requested_article_failure() -> None: + invalid_url = "https://stparts.ru/products/some-product" + with pytest.raises(ValueError, match="Could not extract article from URL"): + requested_article(invalid_url) + + +@pytest.mark.asyncio +@patch("parser.http.httpx.AsyncClient", new_callable=MagicMock) # constructor is sync → use MagicMock +async def test_fetch_html_simulated_429_triggers_backoff(MockAsyncClient: MagicMock) -> None: + # Build a mock client instance whose async methods are AsyncMock + mock_client = AsyncMock() + MockAsyncClient.return_value = mock_client # ProxySession will call httpx.AsyncClient(...) + + # Responses: first path raises HTTPStatusError (simulating 429), then succeeds + response_429 = httpx.Response( + 429, + request=httpx.Request("GET", "https://test.com"), + headers={"Retry-After": "0.01"}, + ) + response_ok = httpx.Response(200, text="Success", request=httpx.Request("GET", "https://test.com")) + + mock_client.get.side_effect = [ + httpx.HTTPStatusError("Too Many Requests", request=response_429.request, response=response_429), + response_ok, + ] + + # Avoid real sleeping and assert backoff happened + with patch("asyncio.sleep", new_callable=AsyncMock) as mock_sleep: + dummy_proxy = {"ip": "127.0.0.1", "port": 8080, "username": "user", "password": "password"} + session = ProxySession(proxy=dummy_proxy) + + result = await session.fetch_html("https://test.com") + + assert result == "Success" + assert mock_client.get.await_count == 2 + mock_sleep.assert_awaited() # or assert_awaited_once() if exactly once + + +@pytest.mark.asyncio +@patch("parser.http.httpx.AsyncClient", new_callable=MagicMock) +async def test_fetch_html_retry_delay_is_capped(MockAsyncClient: MagicMock) -> None: + """Verify that the retry delay is capped by `max_delay_sec`.""" + mock_client = AsyncMock() + MockAsyncClient.return_value = mock_client + + # Make it fail 3 times before succeeding + response_503 = httpx.Response(503, request=httpx.Request("GET", "https://test.com")) + response_ok = httpx.Response(200, text="Success", request=httpx.Request("GET", "https://test.com")) + mock_client.get.side_effect = [ + httpx.HTTPStatusError("Service Unavailable", request=response_503.request, response=response_503), + httpx.HTTPStatusError("Service Unavailable", request=response_503.request, response=response_503), + httpx.HTTPStatusError("Service Unavailable", request=response_503.request, response=response_503), + response_ok, + ] + + # In http.py: max_retries = 3, base_delay_sec = 1.0, max_delay_sec = 60.0 + # The delays would be: + # 1. 1*2^0 + jitter = ~1s + # 2. 1*2^1 + jitter = ~2s + # 3. 1*2^2 + jitter = ~4s + # We will set max_delay_sec to 3 to test the cap + + with patch("asyncio.sleep", new_callable=AsyncMock) as mock_sleep: + dummy_proxy = {"ip": "127.0.0.1", "port": 8080, "username": "user", "password": "password"} + session = ProxySession(proxy=dummy_proxy) + # Temporarily modify the constants on the instance for the test + session.max_delay_sec = 3.0 + session.max_retries = 4 # Allow enough retries + + await session.fetch_html("https://test.com") + + assert mock_sleep.call_count == 3 + # Check that the delay was capped + assert mock_sleep.call_args_list[0].args[0] <= 3.0 # 1s + jitter + assert mock_sleep.call_args_list[1].args[0] <= 3.0 # 2s + jitter + assert mock_sleep.call_args_list[2].args[0] <= 3.0 # 4s + jitter -> capped at 3 diff --git a/tests/parser/test_stparts.py b/tests/parser/test_stparts.py new file mode 100644 index 0000000..7d78ade --- /dev/null +++ b/tests/parser/test_stparts.py @@ -0,0 +1,129 @@ +from selectolax.parser import HTMLParser + +from parser.stparts import parse_stparts + +# HTML fixture mimicking the structure of the stparts.ru results table +HTML_FIXTURE = """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
КОЛЬЦО ФОРСУНКИ МЕТАЛЛИЧЕСКОЕUAE189338.07 руб.
ANALOG RINGWAREHOUSE225.00 руб.
Forgiving ItemWAREHOUSE3123,45 руб.
+ + +""" + +SEARCH_URL = "https://stparts.ru/search/Hyundai-KIA/0PN1113H52?disableFiltering" + + +def test_parse_stparts_excludes_analogs_by_default(): + """Verifies that analog offers are excluded when include_analogs is False.""" + tree = HTMLParser(HTML_FIXTURE) + results = parse_stparts(tree, SEARCH_URL, include_analogs=False) + + assert len(results) == 3 + assert not any(r.is_analog for r in results) + + +def test_parse_stparts_includes_analogs_when_requested(): + """Verifies that analog offers are included when include_analogs is True.""" + tree = HTMLParser(HTML_FIXTURE) + results = parse_stparts(tree, SEARCH_URL, include_analogs=True) + + assert len(results) == 4 + assert any(r.is_analog for r in results) + + +def test_parse_stparts_correctly_parses_standard_offer(): + """Verifies that a standard offer row is parsed into the correct OfferRow model.""" + tree = HTMLParser(HTML_FIXTURE) + results = parse_stparts(tree, SEARCH_URL, include_analogs=True) + standard_offer = next((r for r in results if not r.is_analog and r.b == "Hyundai-KIA"), None) + + assert standard_offer is not None + assert standard_offer.b == "Hyundai-KIA" + assert standard_offer.a == "0PN1113H52" # From URL + assert standard_offer.name == "КОЛЬЦО ФОРСУНКИ МЕТАЛЛИЧЕСКОЕ" + assert standard_offer.price == 38.07 + assert standard_offer.quantity == 243 + assert standard_offer.provider == "UAE1893" + assert standard_offer.rating is None + # ceil(864 / 24) = 36 + assert standard_offer.delivery == 36 + assert not standard_offer.is_analog + + +def test_parse_stparts_handles_missing_data_gracefully(): + """Verifies that rows with missing attributes are parsed with None values.""" + tree = HTMLParser(HTML_FIXTURE) + results = parse_stparts(tree, SEARCH_URL, include_analogs=True) + offer_with_missing_data = next((r for r in results if r.b == "BrandWithMissingData"), None) + + assert offer_with_missing_data is not None + assert offer_with_missing_data.price is None + assert offer_with_missing_data.quantity is None + assert offer_with_missing_data.delivery is None + assert offer_with_missing_data.name == "" + assert offer_with_missing_data.provider == "" + + +def test_parse_stparts_handles_forgiving_numeric_formats(): + """Verifies that numeric values with commas, whitespace, and non-digit chars are parsed correctly.""" + tree = HTMLParser(HTML_FIXTURE) + results = parse_stparts(tree, SEARCH_URL, include_analogs=True) + forgiving_offer = next((r for r in results if r.b == "ForgivingBrand"), None) + + assert forgiving_offer is not None + assert forgiving_offer.price == 123.45 + assert forgiving_offer.quantity == 50 + # ceil(48 / 24) = 2 + assert forgiving_offer.delivery == 2 diff --git a/tests/parser/test_stparts_pipeline.py b/tests/parser/test_stparts_pipeline.py new file mode 100644 index 0000000..a1f9b9a --- /dev/null +++ b/tests/parser/test_stparts_pipeline.py @@ -0,0 +1,168 @@ +from decimal import Decimal +from unittest.mock import AsyncMock, MagicMock, patch + +import httpx +import pytest + +from parser.stparts_pipeline import run_stparts_pipeline + + +class MockProgressReporter: + def __init__(self): + self.steps = [] + self.percentages = [] + + def report_step(self, **kwargs): + self.steps.append(kwargs) + + def report_percentage(self, *, step: str, progress: int): + self.percentages.append((step, progress)) + + +# HTML fixture with 15 offers for the same article to test top-10 logic +HTML_FOR_TOP_10_TEST = """ + + + {} + +
+""".format( + "\n".join( + f''' + Item {i} + WH{i} + ''' + for i in range(1, 16) + ) +) + +# HTML fixture for testing the redirect link +HTML_WITH_REDIRECT_LINK = """ +Показать все варианты +""" + +HTML_FINAL_RESULTS = """ + + + + + + + +
Final ItemWH-FINAL
+""" + + +@pytest.fixture +def mock_proxy_pool(): + """Fixture to mock the ProxyPool and its sessions.""" + # We patch the async classmethod `from_db` separately. + with patch("parser.stparts_pipeline.ProxyPool.from_db", new_callable=AsyncMock) as mock_from_db: + # This is the mock for the ProxyPool *instance*. + mock_pool_instance = MagicMock() + + # This is the mock for the ProxySession instance. + mock_session = MagicMock() + mock_session.fetch_html = AsyncMock() + + # Configure the pool instance mock. + mock_pool_instance.acquire.return_value = mock_session + mock_pool_instance.close_all = AsyncMock() + + # Make the `from_db` classmethod return our mock pool instance. + mock_from_db.return_value = mock_pool_instance + + yield mock_pool_instance + + +@pytest.mark.asyncio +async def test_pipeline_top_10_logic(mock_proxy_pool): + """Verify that the pipeline correctly filters and returns only the top 10 offers.""" + mock_session = mock_proxy_pool.acquire.return_value + mock_session.fetch_html.return_value = HTML_FOR_TOP_10_TEST + articles = ["TESTCODE"] + + results = await run_stparts_pipeline(articles, include_analogs=False) + + assert len(results) == 10 + # Prices are 1.00 to 15.00, so the top 10 should have a max price of 10.00 + assert max(r.price for r in results) == 10.0 + assert min(r.price for r in results) == 1.0 + + +@pytest.mark.asyncio +async def test_pipeline_follows_redirect_link(mock_proxy_pool): + """Verify that the pipeline follows the 'Показать все варианты' link.""" + mock_session = mock_proxy_pool.acquire.return_value + + async def fetch_side_effect(url, params=None): + if url == "https://stparts.ru/search": + return HTML_WITH_REDIRECT_LINK + elif url == "https://stparts.ru/final-results-page": + return HTML_FINAL_RESULTS + return "" + + mock_session.fetch_html.side_effect = fetch_side_effect + articles = ["TESTCODE"] + + results = await run_stparts_pipeline(articles, include_analogs=False) + + assert mock_session.fetch_html.call_count == 2 + # First call is to the initial search page + assert mock_session.fetch_html.call_args_list[0].kwargs["params"] == {"pcode": "TESTCODE"} + # Second call is to the redirect link + assert mock_session.fetch_html.call_args_list[1].args[0] == "https://stparts.ru/final-results-page" + assert len(results) == 1 + assert results[0].price == 99.0 + + +@pytest.mark.asyncio +async def test_pipeline_reports_progress(mock_proxy_pool): + """Verify that the pipeline reports progress correctly and handles concurrency.""" + mock_session = mock_proxy_pool.acquire.return_value + mock_session.fetch_html.return_value = HTML_FINAL_RESULTS + articles = ["CODE1", "CODE2", "CODE3", "CODE4"] + reporter = MockProgressReporter() + + await run_stparts_pipeline(articles, include_analogs=False, reporter=reporter) + + assert reporter.steps == [ + {"step": "FETCHING", "status": "IN_PROGRESS"}, + {"step": "FILTERING", "status": "IN_PROGRESS"}, + {"step": "FILTERING", "status": "SUCCESS"}, + ] + # With concurrency, order is not guaranteed, so we sort before comparing. + assert sorted(reporter.percentages) == [ + ("FETCHING", 25), + ("FETCHING", 50), + ("FETCHING", 75), + ("FETCHING", 100), + ] + + +@pytest.mark.asyncio +async def test_pipeline_handles_single_task_failure(mock_proxy_pool): + """Verify the pipeline continues and reports errors if one article fails.""" + mock_session = mock_proxy_pool.acquire.return_value + + async def fetch_side_effect(url, params=None): + if params and params.get("pcode") == "FAIL_CODE": + raise httpx.RequestError("Test failure") + return HTML_FINAL_RESULTS + + mock_session.fetch_html.side_effect = fetch_side_effect + articles = ["CODE1", "FAIL_CODE", "CODE3"] + reporter = MockProgressReporter() + + results = await run_stparts_pipeline(articles, include_analogs=False, reporter=reporter) + + # Should still get results from the two successful calls + assert len(results) == 2 + # Check that the failure was reported + failure_step = next((s for s in reporter.steps if s.get("status") == "FAILURE"), None) + assert failure_step is not None + assert failure_step["step"] == "FETCHING" + assert "Test failure" in failure_step["details"]["error"] + assert failure_step["details"]["article"] == "FAIL_CODE" diff --git a/uv.lock b/uv.lock index 3d40887..7414d58 100644 --- a/uv.lock +++ b/uv.lock @@ -7,6 +7,7 @@ name = "admin2" version = "0.1.0" source = { virtual = "." } dependencies = [ + { name = "asyncpg" }, { name = "celery" }, { name = "clickhouse-connect" }, { name = "clickhouse-driver", extra = ["numpy"] }, @@ -23,6 +24,7 @@ dependencies = [ { name = "djangorestframework" }, { name = "eventlet" }, { name = "gunicorn" }, + { name = "httpx", extra = ["http2"] }, { name = "loguru" }, { name = "markdown" }, { name = "numpy" }, @@ -30,7 +32,10 @@ dependencies = [ { name = "pandas" }, { name = "pillow" }, { name = "psycopg", extra = ["binary", "pool"] }, + { name = "pydantic" }, { name = "redis" }, + { name = "selectolax" }, + { name = "xlsxwriter" }, ] [package.dev-dependencies] @@ -40,6 +45,7 @@ dev = [ { name = "faker" }, { name = "pre-commit" }, { name = "pytest" }, + { name = "pytest-asyncio" }, { name = "pytest-django" }, { name = "pytest-xdist" }, { name = "ruff" }, @@ -48,6 +54,7 @@ dev = [ [package.metadata] requires-dist = [ + { name = "asyncpg", specifier = ">=0.30.0" }, { name = "celery", specifier = ">=5.5.2" }, { name = "clickhouse-connect", specifier = ">=0.9.2" }, { name = "clickhouse-driver", extras = ["numpy"], specifier = ">=0.2.9" }, @@ -64,6 +71,7 @@ requires-dist = [ { name = "djangorestframework", specifier = ">=3.16.0" }, { name = "eventlet", specifier = ">=0.40.0" }, { name = "gunicorn", specifier = ">=23.0.0" }, + { name = "httpx", extras = ["http2"], specifier = ">=0.27.0" }, { name = "loguru", specifier = ">=0.7.3" }, { name = "markdown", specifier = ">=3.8.2" }, { name = "numpy", specifier = ">=2.2.5" }, @@ -71,7 +79,10 @@ requires-dist = [ { name = "pandas", specifier = ">=2.2.3" }, { name = "pillow", specifier = ">=11.2.1" }, { name = "psycopg", extras = ["binary", "pool"], specifier = ">=3.2.9" }, + { name = "pydantic", specifier = ">=2.12.0" }, { name = "redis", specifier = ">=6.1.0" }, + { name = "selectolax", specifier = ">=0.4.0" }, + { name = "xlsxwriter", specifier = ">=3.2.9" }, ] [package.metadata.requires-dev] @@ -81,6 +92,7 @@ dev = [ { name = "faker", specifier = ">=37.1.0" }, { name = "pre-commit", specifier = ">=4.2.0" }, { name = "pytest", specifier = ">=8.3.5" }, + { name = "pytest-asyncio", specifier = ">=1.2.0" }, { name = "pytest-django", specifier = ">=4.11.1" }, { name = "pytest-xdist", specifier = ">=3.6.1" }, { name = "ruff", specifier = ">=0.11.8" }, @@ -99,6 +111,29 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/26/99/fc813cd978842c26c82534010ea849eee9ab3a13ea2b74e95cb9c99e747b/amqp-5.3.1-py3-none-any.whl", hash = "sha256:43b3319e1b4e7d1251833a93d672b4af1e40f3d632d479b98661a95f117880a2", size = 50944, upload-time = "2024-11-12T19:55:41.782Z" }, ] +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, +] + +[[package]] +name = "anyio" +version = "4.11.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "idna" }, + { name = "sniffio" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c6/78/7d432127c41b50bccba979505f272c16cbcadcc33645d5fa3a738110ae75/anyio-4.11.0.tar.gz", hash = "sha256:82a8d0b81e318cc5ce71a5f1f8b5c4e63619620b63141ef8c995fa0db95a57c4", size = 219094, upload-time = "2025-09-23T09:19:12.58Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/15/b3/9b1a8074496371342ec1e796a96f99c82c945a339cd81a8e73de28b4cf9e/anyio-4.11.0-py3-none-any.whl", hash = "sha256:0287e96f4d26d4149305414d4e3bc32f0dcd0862365a4bddea19d7a1ec38c4fc", size = 109097, upload-time = "2025-09-23T09:19:10.601Z" }, +] + [[package]] name = "asgiref" version = "3.8.1" @@ -108,6 +143,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/39/e3/893e8757be2612e6c266d9bb58ad2e3651524b5b40cf56761e985a28b13e/asgiref-3.8.1-py3-none-any.whl", hash = "sha256:3e1e3ecc849832fe52ccf2cb6686b7a55f82bb1d6aee72a58826471390335e47", size = 23828, upload-time = "2024-03-22T14:39:34.521Z" }, ] +[[package]] +name = "asyncpg" +version = "0.30.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2f/4c/7c991e080e106d854809030d8584e15b2e996e26f16aee6d757e387bc17d/asyncpg-0.30.0.tar.gz", hash = "sha256:c551e9928ab6707602f44811817f82ba3c446e018bfe1d3abecc8ba5f3eac851", size = 957746, upload-time = "2024-10-20T00:30:41.127Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4b/64/9d3e887bb7b01535fdbc45fbd5f0a8447539833b97ee69ecdbb7a79d0cb4/asyncpg-0.30.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c902a60b52e506d38d7e80e0dd5399f657220f24635fee368117b8b5fce1142e", size = 673162, upload-time = "2024-10-20T00:29:41.88Z" }, + { url = "https://files.pythonhosted.org/packages/6e/eb/8b236663f06984f212a087b3e849731f917ab80f84450e943900e8ca4052/asyncpg-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aca1548e43bbb9f0f627a04666fedaca23db0a31a84136ad1f868cb15deb6e3a", size = 637025, upload-time = "2024-10-20T00:29:43.352Z" }, + { url = "https://files.pythonhosted.org/packages/cc/57/2dc240bb263d58786cfaa60920779af6e8d32da63ab9ffc09f8312bd7a14/asyncpg-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c2a2ef565400234a633da0eafdce27e843836256d40705d83ab7ec42074efb3", size = 3496243, upload-time = "2024-10-20T00:29:44.922Z" }, + { url = "https://files.pythonhosted.org/packages/f4/40/0ae9d061d278b10713ea9021ef6b703ec44698fe32178715a501ac696c6b/asyncpg-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1292b84ee06ac8a2ad8e51c7475aa309245874b61333d97411aab835c4a2f737", size = 3575059, upload-time = "2024-10-20T00:29:46.891Z" }, + { url = "https://files.pythonhosted.org/packages/c3/75/d6b895a35a2c6506952247640178e5f768eeb28b2e20299b6a6f1d743ba0/asyncpg-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0f5712350388d0cd0615caec629ad53c81e506b1abaaf8d14c93f54b35e3595a", size = 3473596, upload-time = "2024-10-20T00:29:49.201Z" }, + { url = "https://files.pythonhosted.org/packages/c8/e7/3693392d3e168ab0aebb2d361431375bd22ffc7b4a586a0fc060d519fae7/asyncpg-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:db9891e2d76e6f425746c5d2da01921e9a16b5a71a1c905b13f30e12a257c4af", size = 3641632, upload-time = "2024-10-20T00:29:50.768Z" }, + { url = "https://files.pythonhosted.org/packages/32/ea/15670cea95745bba3f0352341db55f506a820b21c619ee66b7d12ea7867d/asyncpg-0.30.0-cp312-cp312-win32.whl", hash = "sha256:68d71a1be3d83d0570049cd1654a9bdfe506e794ecc98ad0873304a9f35e411e", size = 560186, upload-time = "2024-10-20T00:29:52.394Z" }, + { url = "https://files.pythonhosted.org/packages/7e/6b/fe1fad5cee79ca5f5c27aed7bd95baee529c1bf8a387435c8ba4fe53d5c1/asyncpg-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:9a0292c6af5c500523949155ec17b7fe01a00ace33b68a476d6b5059f9630305", size = 621064, upload-time = "2024-10-20T00:29:53.757Z" }, + { url = "https://files.pythonhosted.org/packages/3a/22/e20602e1218dc07692acf70d5b902be820168d6282e69ef0d3cb920dc36f/asyncpg-0.30.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:05b185ebb8083c8568ea8a40e896d5f7af4b8554b64d7719c0eaa1eb5a5c3a70", size = 670373, upload-time = "2024-10-20T00:29:55.165Z" }, + { url = "https://files.pythonhosted.org/packages/3d/b3/0cf269a9d647852a95c06eb00b815d0b95a4eb4b55aa2d6ba680971733b9/asyncpg-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c47806b1a8cbb0a0db896f4cd34d89942effe353a5035c62734ab13b9f938da3", size = 634745, upload-time = "2024-10-20T00:29:57.14Z" }, + { url = "https://files.pythonhosted.org/packages/8e/6d/a4f31bf358ce8491d2a31bfe0d7bcf25269e80481e49de4d8616c4295a34/asyncpg-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b6fde867a74e8c76c71e2f64f80c64c0f3163e687f1763cfaf21633ec24ec33", size = 3512103, upload-time = "2024-10-20T00:29:58.499Z" }, + { url = "https://files.pythonhosted.org/packages/96/19/139227a6e67f407b9c386cb594d9628c6c78c9024f26df87c912fabd4368/asyncpg-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46973045b567972128a27d40001124fbc821c87a6cade040cfcd4fa8a30bcdc4", size = 3592471, upload-time = "2024-10-20T00:30:00.354Z" }, + { url = "https://files.pythonhosted.org/packages/67/e4/ab3ca38f628f53f0fd28d3ff20edff1c975dd1cb22482e0061916b4b9a74/asyncpg-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9110df111cabc2ed81aad2f35394a00cadf4f2e0635603db6ebbd0fc896f46a4", size = 3496253, upload-time = "2024-10-20T00:30:02.794Z" }, + { url = "https://files.pythonhosted.org/packages/ef/5f/0bf65511d4eeac3a1f41c54034a492515a707c6edbc642174ae79034d3ba/asyncpg-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:04ff0785ae7eed6cc138e73fc67b8e51d54ee7a3ce9b63666ce55a0bf095f7ba", size = 3662720, upload-time = "2024-10-20T00:30:04.501Z" }, + { url = "https://files.pythonhosted.org/packages/e7/31/1513d5a6412b98052c3ed9158d783b1e09d0910f51fbe0e05f56cc370bc4/asyncpg-0.30.0-cp313-cp313-win32.whl", hash = "sha256:ae374585f51c2b444510cdf3595b97ece4f233fde739aa14b50e0d64e8a7a590", size = 560404, upload-time = "2024-10-20T00:30:06.537Z" }, + { url = "https://files.pythonhosted.org/packages/c8/a4/cec76b3389c4c5ff66301cd100fe88c318563ec8a520e0b2e792b5b84972/asyncpg-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:f59b430b8e27557c3fb9869222559f7417ced18688375825f8f12302c34e915e", size = 621623, upload-time = "2024-10-20T00:30:09.024Z" }, +] + [[package]] name = "billiard" version = "4.2.1" @@ -570,6 +629,79 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/7d/6dac2a6e1eba33ee43f318edbed4ff29151a49b5d37f080aad1e6469bca4/gunicorn-23.0.0-py3-none-any.whl", hash = "sha256:ec400d38950de4dfd418cff8328b2c8faed0edb0d517d3394e457c317908ca4d", size = 85029, upload-time = "2024-08-10T20:25:24.996Z" }, ] +[[package]] +name = "h11" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, +] + +[[package]] +name = "h2" +version = "4.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "hpack" }, + { name = "hyperframe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1d/17/afa56379f94ad0fe8defd37d6eb3f89a25404ffc71d4d848893d270325fc/h2-4.3.0.tar.gz", hash = "sha256:6c59efe4323fa18b47a632221a1888bd7fde6249819beda254aeca909f221bf1", size = 2152026, upload-time = "2025-08-23T18:12:19.778Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/b2/119f6e6dcbd96f9069ce9a2665e0146588dc9f88f29549711853645e736a/h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd", size = 61779, upload-time = "2025-08-23T18:12:17.779Z" }, +] + +[[package]] +name = "hpack" +version = "4.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2c/48/71de9ed269fdae9c8057e5a4c0aa7402e8bb16f2c6e90b3aa53327b113f8/hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca", size = 51276, upload-time = "2025-01-22T21:44:58.347Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357, upload-time = "2025-01-22T21:44:56.92Z" }, +] + +[[package]] +name = "httpcore" +version = "1.0.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" }, +] + +[[package]] +name = "httpx" +version = "0.28.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "certifi" }, + { name = "httpcore" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, +] + +[package.optional-dependencies] +http2 = [ + { name = "h2" }, +] + +[[package]] +name = "hyperframe" +version = "6.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/02/e7/94f8232d4a74cc99514c13a9f995811485a6903d48e5d952771ef6322e30/hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08", size = 26566, upload-time = "2025-01-22T21:41:49.302Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007, upload-time = "2025-01-22T21:41:47.295Z" }, +] + [[package]] name = "identify" version = "2.6.12" @@ -579,6 +711,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7a/cd/18f8da995b658420625f7ef13f037be53ae04ec5ad33f9b718240dcfd48c/identify-2.6.12-py2.py3-none-any.whl", hash = "sha256:ad9672d5a72e0d2ff7c5c8809b62dfa60458626352fb0eb7b55e69bdc45334a2", size = 99145, upload-time = "2025-05-23T20:37:51.495Z" }, ] +[[package]] +name = "idna" +version = "3.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490, upload-time = "2024-09-15T18:07:39.745Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" }, +] + [[package]] name = "iniconfig" version = "2.1.0" @@ -937,6 +1078,84 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/47/fd/4feb52a55c1a4bd748f2acaed1903ab54a723c47f6d0242780f4d97104d4/psycopg_pool-3.2.6-py3-none-any.whl", hash = "sha256:5887318a9f6af906d041a0b1dc1c60f8f0dda8340c2572b74e10907b51ed5da7", size = 38252, upload-time = "2025-02-26T12:03:45.073Z" }, ] +[[package]] +name = "pydantic" +version = "2.12.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c3/da/b8a7ee04378a53f6fefefc0c5e05570a3ebfdfa0523a878bcd3b475683ee/pydantic-2.12.0.tar.gz", hash = "sha256:c1a077e6270dbfb37bfd8b498b3981e2bb18f68103720e51fa6c306a5a9af563", size = 814760, upload-time = "2025-10-07T15:58:03.467Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f4/9d/d5c855424e2e5b6b626fbc6ec514d8e655a600377ce283008b115abb7445/pydantic-2.12.0-py3-none-any.whl", hash = "sha256:f6a1da352d42790537e95e83a8bdfb91c7efbae63ffd0b86fa823899e807116f", size = 459730, upload-time = "2025-10-07T15:58:01.576Z" }, +] + +[[package]] +name = "pydantic-core" +version = "2.41.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7d/14/12b4a0d2b0b10d8e1d9a24ad94e7bbb43335eaf29c0c4e57860e8a30734a/pydantic_core-2.41.1.tar.gz", hash = "sha256:1ad375859a6d8c356b7704ec0f547a58e82ee80bb41baa811ad710e124bc8f2f", size = 454870, upload-time = "2025-10-07T10:50:45.974Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ee/bc/5f520319ee1c9e25010412fac4154a72e0a40d0a19eb00281b1f200c0947/pydantic_core-2.41.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:db2f82c0ccbce8f021ad304ce35cbe02aa2f95f215cac388eed542b03b4d5eb4", size = 2099300, upload-time = "2025-10-06T21:10:30.463Z" }, + { url = "https://files.pythonhosted.org/packages/31/14/010cd64c5c3814fb6064786837ec12604be0dd46df3327cf8474e38abbbd/pydantic_core-2.41.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:47694a31c710ced9205d5f1e7e8af3ca57cbb8a503d98cb9e33e27c97a501601", size = 1910179, upload-time = "2025-10-06T21:10:31.782Z" }, + { url = "https://files.pythonhosted.org/packages/8e/2e/23fc2a8a93efad52df302fdade0a60f471ecc0c7aac889801ac24b4c07d6/pydantic_core-2.41.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:93e9decce94daf47baf9e9d392f5f2557e783085f7c5e522011545d9d6858e00", size = 1957225, upload-time = "2025-10-06T21:10:33.11Z" }, + { url = "https://files.pythonhosted.org/packages/b9/b6/6db08b2725b2432b9390844852e11d320281e5cea8a859c52c68001975fa/pydantic_core-2.41.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ab0adafdf2b89c8b84f847780a119437a0931eca469f7b44d356f2b426dd9741", size = 2053315, upload-time = "2025-10-06T21:10:34.87Z" }, + { url = "https://files.pythonhosted.org/packages/61/d9/4de44600f2d4514b44f3f3aeeda2e14931214b6b5bf52479339e801ce748/pydantic_core-2.41.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5da98cc81873f39fd56882e1569c4677940fbc12bce6213fad1ead784192d7c8", size = 2224298, upload-time = "2025-10-06T21:10:36.233Z" }, + { url = "https://files.pythonhosted.org/packages/7a/ae/dbe51187a7f35fc21b283c5250571a94e36373eb557c1cba9f29a9806dcf/pydantic_core-2.41.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:209910e88afb01fd0fd403947b809ba8dba0e08a095e1f703294fda0a8fdca51", size = 2351797, upload-time = "2025-10-06T21:10:37.601Z" }, + { url = "https://files.pythonhosted.org/packages/b5/a7/975585147457c2e9fb951c7c8dab56deeb6aa313f3aa72c2fc0df3f74a49/pydantic_core-2.41.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:365109d1165d78d98e33c5bfd815a9b5d7d070f578caefaabcc5771825b4ecb5", size = 2074921, upload-time = "2025-10-06T21:10:38.927Z" }, + { url = "https://files.pythonhosted.org/packages/62/37/ea94d1d0c01dec1b7d236c7cec9103baab0021f42500975de3d42522104b/pydantic_core-2.41.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:706abf21e60a2857acdb09502bc853ee5bce732955e7b723b10311114f033115", size = 2187767, upload-time = "2025-10-06T21:10:40.651Z" }, + { url = "https://files.pythonhosted.org/packages/d3/fe/694cf9fdd3a777a618c3afd210dba7b414cb8a72b1bd29b199c2e5765fee/pydantic_core-2.41.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:bf0bd5417acf7f6a7ec3b53f2109f587be176cb35f9cf016da87e6017437a72d", size = 2136062, upload-time = "2025-10-06T21:10:42.09Z" }, + { url = "https://files.pythonhosted.org/packages/0f/ae/174aeabd89916fbd2988cc37b81a59e1186e952afd2a7ed92018c22f31ca/pydantic_core-2.41.1-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:2e71b1c6ceb9c78424ae9f63a07292fb769fb890a4e7efca5554c47f33a60ea5", size = 2317819, upload-time = "2025-10-06T21:10:43.974Z" }, + { url = "https://files.pythonhosted.org/packages/65/e8/e9aecafaebf53fc456314f72886068725d6fba66f11b013532dc21259343/pydantic_core-2.41.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:80745b9770b4a38c25015b517451c817799bfb9d6499b0d13d8227ec941cb513", size = 2312267, upload-time = "2025-10-06T21:10:45.34Z" }, + { url = "https://files.pythonhosted.org/packages/35/2f/1c2e71d2a052f9bb2f2df5a6a05464a0eb800f9e8d9dd800202fe31219e1/pydantic_core-2.41.1-cp312-cp312-win32.whl", hash = "sha256:83b64d70520e7890453f1aa21d66fda44e7b35f1cfea95adf7b4289a51e2b479", size = 1990927, upload-time = "2025-10-06T21:10:46.738Z" }, + { url = "https://files.pythonhosted.org/packages/b1/78/562998301ff2588b9c6dcc5cb21f52fa919d6e1decc75a35055feb973594/pydantic_core-2.41.1-cp312-cp312-win_amd64.whl", hash = "sha256:377defd66ee2003748ee93c52bcef2d14fde48fe28a0b156f88c3dbf9bc49a50", size = 2034703, upload-time = "2025-10-06T21:10:48.524Z" }, + { url = "https://files.pythonhosted.org/packages/b2/53/d95699ce5a5cdb44bb470bd818b848b9beadf51459fd4ea06667e8ede862/pydantic_core-2.41.1-cp312-cp312-win_arm64.whl", hash = "sha256:c95caff279d49c1d6cdfe2996e6c2ad712571d3b9caaa209a404426c326c4bde", size = 1972719, upload-time = "2025-10-06T21:10:50.256Z" }, + { url = "https://files.pythonhosted.org/packages/27/8a/6d54198536a90a37807d31a156642aae7a8e1263ed9fe6fc6245defe9332/pydantic_core-2.41.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:70e790fce5f05204ef4403159857bfcd587779da78627b0babb3654f75361ebf", size = 2105825, upload-time = "2025-10-06T21:10:51.719Z" }, + { url = "https://files.pythonhosted.org/packages/4f/2e/4784fd7b22ac9c8439db25bf98ffed6853d01e7e560a346e8af821776ccc/pydantic_core-2.41.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9cebf1ca35f10930612d60bd0f78adfacee824c30a880e3534ba02c207cceceb", size = 1910126, upload-time = "2025-10-06T21:10:53.145Z" }, + { url = "https://files.pythonhosted.org/packages/f3/92/31eb0748059ba5bd0aa708fb4bab9fcb211461ddcf9e90702a6542f22d0d/pydantic_core-2.41.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:170406a37a5bc82c22c3274616bf6f17cc7df9c4a0a0a50449e559cb755db669", size = 1961472, upload-time = "2025-10-06T21:10:55.754Z" }, + { url = "https://files.pythonhosted.org/packages/ab/91/946527792275b5c4c7dde4cfa3e81241bf6900e9fee74fb1ba43e0c0f1ab/pydantic_core-2.41.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:12d4257fc9187a0ccd41b8b327d6a4e57281ab75e11dda66a9148ef2e1fb712f", size = 2063230, upload-time = "2025-10-06T21:10:57.179Z" }, + { url = "https://files.pythonhosted.org/packages/31/5d/a35c5d7b414e5c0749f1d9f0d159ee2ef4bab313f499692896b918014ee3/pydantic_core-2.41.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a75a33b4db105dd1c8d57839e17ee12db8d5ad18209e792fa325dbb4baeb00f4", size = 2229469, upload-time = "2025-10-06T21:10:59.409Z" }, + { url = "https://files.pythonhosted.org/packages/21/4d/8713737c689afa57ecfefe38db78259d4484c97aa494979e6a9d19662584/pydantic_core-2.41.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:08a589f850803a74e0fcb16a72081cafb0d72a3cdda500106942b07e76b7bf62", size = 2347986, upload-time = "2025-10-06T21:11:00.847Z" }, + { url = "https://files.pythonhosted.org/packages/f6/ec/929f9a3a5ed5cda767081494bacd32f783e707a690ce6eeb5e0730ec4986/pydantic_core-2.41.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a97939d6ea44763c456bd8a617ceada2c9b96bb5b8ab3dfa0d0827df7619014", size = 2072216, upload-time = "2025-10-06T21:11:02.43Z" }, + { url = "https://files.pythonhosted.org/packages/26/55/a33f459d4f9cc8786d9db42795dbecc84fa724b290d7d71ddc3d7155d46a/pydantic_core-2.41.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d2ae423c65c556f09569524b80ffd11babff61f33055ef9773d7c9fabc11ed8d", size = 2193047, upload-time = "2025-10-06T21:11:03.787Z" }, + { url = "https://files.pythonhosted.org/packages/77/af/d5c6959f8b089f2185760a2779079e3c2c411bfc70ea6111f58367851629/pydantic_core-2.41.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:4dc703015fbf8764d6a8001c327a87f1823b7328d40b47ce6000c65918ad2b4f", size = 2140613, upload-time = "2025-10-06T21:11:05.607Z" }, + { url = "https://files.pythonhosted.org/packages/58/e5/2c19bd2a14bffe7fabcf00efbfbd3ac430aaec5271b504a938ff019ac7be/pydantic_core-2.41.1-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:968e4ffdfd35698a5fe659e5e44c508b53664870a8e61c8f9d24d3d145d30257", size = 2327641, upload-time = "2025-10-06T21:11:07.143Z" }, + { url = "https://files.pythonhosted.org/packages/93/ef/e0870ccda798c54e6b100aff3c4d49df5458fd64217e860cb9c3b0a403f4/pydantic_core-2.41.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:fff2b76c8e172d34771cd4d4f0ade08072385310f214f823b5a6ad4006890d32", size = 2318229, upload-time = "2025-10-06T21:11:08.73Z" }, + { url = "https://files.pythonhosted.org/packages/b1/4b/c3b991d95f5deb24d0bd52e47bcf716098fa1afe0ce2d4bd3125b38566ba/pydantic_core-2.41.1-cp313-cp313-win32.whl", hash = "sha256:a38a5263185407ceb599f2f035faf4589d57e73c7146d64f10577f6449e8171d", size = 1997911, upload-time = "2025-10-06T21:11:10.329Z" }, + { url = "https://files.pythonhosted.org/packages/a7/ce/5c316fd62e01f8d6be1b7ee6b54273214e871772997dc2c95e204997a055/pydantic_core-2.41.1-cp313-cp313-win_amd64.whl", hash = "sha256:b42ae7fd6760782c975897e1fdc810f483b021b32245b0105d40f6e7a3803e4b", size = 2034301, upload-time = "2025-10-06T21:11:12.113Z" }, + { url = "https://files.pythonhosted.org/packages/29/41/902640cfd6a6523194123e2c3373c60f19006447f2fb06f76de4e8466c5b/pydantic_core-2.41.1-cp313-cp313-win_arm64.whl", hash = "sha256:ad4111acc63b7384e205c27a2f15e23ac0ee21a9d77ad6f2e9cb516ec90965fb", size = 1977238, upload-time = "2025-10-06T21:11:14.1Z" }, + { url = "https://files.pythonhosted.org/packages/04/04/28b040e88c1b89d851278478842f0bdf39c7a05da9e850333c6c8cbe7dfa/pydantic_core-2.41.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:440d0df7415b50084a4ba9d870480c16c5f67c0d1d4d5119e3f70925533a0edc", size = 1875626, upload-time = "2025-10-06T21:11:15.69Z" }, + { url = "https://files.pythonhosted.org/packages/d6/58/b41dd3087505220bb58bc81be8c3e8cbc037f5710cd3c838f44f90bdd704/pydantic_core-2.41.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:71eaa38d342099405dae6484216dcf1e8e4b0bebd9b44a4e08c9b43db6a2ab67", size = 2045708, upload-time = "2025-10-06T21:11:17.258Z" }, + { url = "https://files.pythonhosted.org/packages/d7/b8/760f23754e40bf6c65b94a69b22c394c24058a0ef7e2aa471d2e39219c1a/pydantic_core-2.41.1-cp313-cp313t-win_amd64.whl", hash = "sha256:555ecf7e50f1161d3f693bc49f23c82cf6cdeafc71fa37a06120772a09a38795", size = 1997171, upload-time = "2025-10-06T21:11:18.822Z" }, + { url = "https://files.pythonhosted.org/packages/41/12/cec246429ddfa2778d2d6301eca5362194dc8749ecb19e621f2f65b5090f/pydantic_core-2.41.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:05226894a26f6f27e1deb735d7308f74ef5fa3a6de3e0135bb66cdcaee88f64b", size = 2107836, upload-time = "2025-10-06T21:11:20.432Z" }, + { url = "https://files.pythonhosted.org/packages/20/39/baba47f8d8b87081302498e610aefc37142ce6a1cc98b2ab6b931a162562/pydantic_core-2.41.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:85ff7911c6c3e2fd8d3779c50925f6406d770ea58ea6dde9c230d35b52b16b4a", size = 1904449, upload-time = "2025-10-06T21:11:22.185Z" }, + { url = "https://files.pythonhosted.org/packages/50/32/9a3d87cae2c75a5178334b10358d631bd094b916a00a5993382222dbfd92/pydantic_core-2.41.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:47f1f642a205687d59b52dc1a9a607f45e588f5a2e9eeae05edd80c7a8c47674", size = 1961750, upload-time = "2025-10-06T21:11:24.348Z" }, + { url = "https://files.pythonhosted.org/packages/27/42/a96c9d793a04cf2a9773bff98003bb154087b94f5530a2ce6063ecfec583/pydantic_core-2.41.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:df11c24e138876ace5ec6043e5cae925e34cf38af1a1b3d63589e8f7b5f5cdc4", size = 2063305, upload-time = "2025-10-06T21:11:26.556Z" }, + { url = "https://files.pythonhosted.org/packages/3e/8d/028c4b7d157a005b1f52c086e2d4b0067886b213c86220c1153398dbdf8f/pydantic_core-2.41.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7f0bf7f5c8f7bf345c527e8a0d72d6b26eda99c1227b0c34e7e59e181260de31", size = 2228959, upload-time = "2025-10-06T21:11:28.426Z" }, + { url = "https://files.pythonhosted.org/packages/08/f7/ee64cda8fcc9ca3f4716e6357144f9ee71166775df582a1b6b738bf6da57/pydantic_core-2.41.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:82b887a711d341c2c47352375d73b029418f55b20bd7815446d175a70effa706", size = 2345421, upload-time = "2025-10-06T21:11:30.226Z" }, + { url = "https://files.pythonhosted.org/packages/13/c0/e8ec05f0f5ee7a3656973ad9cd3bc73204af99f6512c1a4562f6fb4b3f7d/pydantic_core-2.41.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b5f1d5d6bbba484bdf220c72d8ecd0be460f4bd4c5e534a541bb2cd57589fb8b", size = 2065288, upload-time = "2025-10-06T21:11:32.019Z" }, + { url = "https://files.pythonhosted.org/packages/0a/25/d77a73ff24e2e4fcea64472f5e39b0402d836da9b08b5361a734d0153023/pydantic_core-2.41.1-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2bf1917385ebe0f968dc5c6ab1375886d56992b93ddfe6bf52bff575d03662be", size = 2189759, upload-time = "2025-10-06T21:11:33.753Z" }, + { url = "https://files.pythonhosted.org/packages/66/45/4a4ebaaae12a740552278d06fe71418c0f2869537a369a89c0e6723b341d/pydantic_core-2.41.1-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:4f94f3ab188f44b9a73f7295663f3ecb8f2e2dd03a69c8f2ead50d37785ecb04", size = 2140747, upload-time = "2025-10-06T21:11:35.781Z" }, + { url = "https://files.pythonhosted.org/packages/da/6d/b727ce1022f143194a36593243ff244ed5a1eb3c9122296bf7e716aa37ba/pydantic_core-2.41.1-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:3925446673641d37c30bd84a9d597e49f72eacee8b43322c8999fa17d5ae5bc4", size = 2327416, upload-time = "2025-10-06T21:11:37.75Z" }, + { url = "https://files.pythonhosted.org/packages/6f/8c/02df9d8506c427787059f87c6c7253435c6895e12472a652d9616ee0fc95/pydantic_core-2.41.1-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:49bd51cc27adb980c7b97357ae036ce9b3c4d0bb406e84fbe16fb2d368b602a8", size = 2318138, upload-time = "2025-10-06T21:11:39.463Z" }, + { url = "https://files.pythonhosted.org/packages/98/67/0cf429a7d6802536941f430e6e3243f6d4b68f41eeea4b242372f1901794/pydantic_core-2.41.1-cp314-cp314-win32.whl", hash = "sha256:a31ca0cd0e4d12ea0df0077df2d487fc3eb9d7f96bbb13c3c5b88dcc21d05159", size = 1998429, upload-time = "2025-10-06T21:11:41.989Z" }, + { url = "https://files.pythonhosted.org/packages/38/60/742fef93de5d085022d2302a6317a2b34dbfe15258e9396a535c8a100ae7/pydantic_core-2.41.1-cp314-cp314-win_amd64.whl", hash = "sha256:1b5c4374a152e10a22175d7790e644fbd8ff58418890e07e2073ff9d4414efae", size = 2028870, upload-time = "2025-10-06T21:11:43.66Z" }, + { url = "https://files.pythonhosted.org/packages/31/38/cdd8ccb8555ef7720bd7715899bd6cfbe3c29198332710e1b61b8f5dd8b8/pydantic_core-2.41.1-cp314-cp314-win_arm64.whl", hash = "sha256:4fee76d757639b493eb600fba668f1e17475af34c17dd61db7a47e824d464ca9", size = 1974275, upload-time = "2025-10-06T21:11:45.476Z" }, + { url = "https://files.pythonhosted.org/packages/e7/7e/8ac10ccb047dc0221aa2530ec3c7c05ab4656d4d4bd984ee85da7f3d5525/pydantic_core-2.41.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f9b9c968cfe5cd576fdd7361f47f27adeb120517e637d1b189eea1c3ece573f4", size = 1875124, upload-time = "2025-10-06T21:11:47.591Z" }, + { url = "https://files.pythonhosted.org/packages/c3/e4/7d9791efeb9c7d97e7268f8d20e0da24d03438a7fa7163ab58f1073ba968/pydantic_core-2.41.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1ebc7ab67b856384aba09ed74e3e977dded40e693de18a4f197c67d0d4e6d8e", size = 2043075, upload-time = "2025-10-06T21:11:49.542Z" }, + { url = "https://files.pythonhosted.org/packages/2d/c3/3f6e6b2342ac11ac8cd5cb56e24c7b14afa27c010e82a765ffa5f771884a/pydantic_core-2.41.1-cp314-cp314t-win_amd64.whl", hash = "sha256:8ae0dc57b62a762985bc7fbf636be3412394acc0ddb4ade07fe104230f1b9762", size = 1995341, upload-time = "2025-10-06T21:11:51.497Z" }, + { url = "https://files.pythonhosted.org/packages/2b/3e/a51c5f5d37b9288ba30683d6e96f10fa8f1defad1623ff09f1020973b577/pydantic_core-2.41.1-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:b04fa9ed049461a7398138c604b00550bc89e3e1151d84b81ad6dc93e39c4c06", size = 2115344, upload-time = "2025-10-07T10:50:02.466Z" }, + { url = "https://files.pythonhosted.org/packages/5a/bd/389504c9e0600ef4502cd5238396b527afe6ef8981a6a15cd1814fc7b434/pydantic_core-2.41.1-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:b3b7d9cfbfdc43c80a16638c6dc2768e3956e73031fca64e8e1a3ae744d1faeb", size = 1927994, upload-time = "2025-10-07T10:50:04.379Z" }, + { url = "https://files.pythonhosted.org/packages/ff/9c/5111c6b128861cb792a4c082677e90dac4f2e090bb2e2fe06aa5b2d39027/pydantic_core-2.41.1-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eec83fc6abef04c7f9bec616e2d76ee9a6a4ae2a359b10c21d0f680e24a247ca", size = 1959394, upload-time = "2025-10-07T10:50:06.335Z" }, + { url = "https://files.pythonhosted.org/packages/14/3f/cfec8b9a0c48ce5d64409ec5e1903cb0b7363da38f14b41de2fcb3712700/pydantic_core-2.41.1-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6771a2d9f83c4038dfad5970a3eef215940682b2175e32bcc817bdc639019b28", size = 2147365, upload-time = "2025-10-07T10:50:07.978Z" }, +] + [[package]] name = "pytest" version = "8.3.5" @@ -952,6 +1171,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/30/3d/64ad57c803f1fa1e963a7946b6e0fea4a70df53c1a7fed304586539c2bac/pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820", size = 343634, upload-time = "2025-03-02T12:54:52.069Z" }, ] +[[package]] +name = "pytest-asyncio" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/86/9e3c5f48f7b7b638b216e4b9e645f54d199d7abbbab7a64a13b4e12ba10f/pytest_asyncio-1.2.0.tar.gz", hash = "sha256:c609a64a2a8768462d0c99811ddb8bd2583c33fd33cf7f21af1c142e824ffb57", size = 50119, upload-time = "2025-09-12T07:33:53.816Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/93/2fa34714b7a4ae72f2f8dad66ba17dd9a2c793220719e736dda28b7aec27/pytest_asyncio-1.2.0-py3-none-any.whl", hash = "sha256:8e17ae5e46d8e7efe51ab6494dd2010f4ca8dae51652aa3c8d55acf50bfb2e99", size = 15095, upload-time = "2025-09-12T07:33:52.639Z" }, +] + [[package]] name = "pytest-django" version = "4.11.1" @@ -1067,6 +1299,50 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cd/be/f6b790d6ae98f1f32c645f8540d5c96248b72343b0a56fab3a07f2941897/ruff-0.11.8-py3-none-win_arm64.whl", hash = "sha256:304432e4c4a792e3da85b7699feb3426a0908ab98bf29df22a31b0cdd098fac2", size = 10713129, upload-time = "2025-05-01T14:53:22.27Z" }, ] +[[package]] +name = "selectolax" +version = "0.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7b/e0/c62a6bbbadaef976f1f27e08f1188bd7d1bf10380e4265d46737a47f8db9/selectolax-0.4.0.tar.gz", hash = "sha256:0387798f42b36ce24bc19d599ecd6ebe56ee559fe108d43978fac371cece15c7", size = 4753183, upload-time = "2025-09-28T17:52:51.97Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/b7/4927bb03c54a12eb7d81da5171e18a7bd0e9eabe909f798aba15dfa69feb/selectolax-0.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4f6e6bfe035cfc7962efdb63ca7b591e8dbf1d1a5226d9af8c0735a395983e79", size = 2037651, upload-time = "2025-09-28T17:51:43.825Z" }, + { url = "https://files.pythonhosted.org/packages/e0/31/8c2e64ee8e92d532e627efcd0fff8eb3e12018c4e3642f400545d113da07/selectolax-0.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7c727a1d5dec654291a58aca34966495f97f20c6d93ad3bfb67a3a8cc5c58e3a", size = 2030040, upload-time = "2025-09-28T17:51:45.248Z" }, + { url = "https://files.pythonhosted.org/packages/46/cf/008ce4af155c5cafe0801825805ae9f92c38c801022b4a6f295439c44321/selectolax-0.4.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ec5ddc25e94ac93c353ef361c513bac40e45648ee7a8a36d5d6911d6daa5689b", size = 2223653, upload-time = "2025-09-28T17:51:46.994Z" }, + { url = "https://files.pythonhosted.org/packages/25/2d/fce2dc92a2b05c9ba0d56c49dfa1ac66d4c6e67b09930bd413667d3d26fd/selectolax-0.4.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:534a4cf50371406594efd551395acb47166e54cf06b97d24d373fdf5ff3d3436", size = 2263895, upload-time = "2025-09-28T17:51:48.537Z" }, + { url = "https://files.pythonhosted.org/packages/37/07/de96522669a8888db748ed550362d51dbaa36c8230a00cafe59c50351dbe/selectolax-0.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1c61fa0662bdd8525f3d9fef6d8041faef39e7e9fe12cc9ef068dc34a791000b", size = 2238855, upload-time = "2025-09-28T17:51:50.182Z" }, + { url = "https://files.pythonhosted.org/packages/f7/cd/082ba63397894939c9671eaf5521291218e0431453fae91ce741dd57686a/selectolax-0.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cb89d5ca84f2523063d06d7e23ebd8cb30d2e431920478ba14a02ae2a7f0c51d", size = 2269371, upload-time = "2025-09-28T17:51:51.563Z" }, + { url = "https://files.pythonhosted.org/packages/cb/80/765e674f548a645afcfc9c4270fce01704663732be4d3f37f49da2524b81/selectolax-0.4.0-cp312-cp312-win32.whl", hash = "sha256:9a088736aed7a3b5583188679612e6a278155328d6650a27a96ab0753d1b49d0", size = 1703051, upload-time = "2025-09-28T17:51:53.115Z" }, + { url = "https://files.pythonhosted.org/packages/cd/41/6df32a5d5c7328f384d7d439d7be2d10b416c3d9af02bea1c91bbced2c5f/selectolax-0.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:b7cc3105bfda0d478d7220a681117c688abcf58580c1bb0bd5acd668c9192270", size = 1805133, upload-time = "2025-09-28T17:51:54.629Z" }, + { url = "https://files.pythonhosted.org/packages/7d/ba/cc92689a5b04e15e91133a7d1ccbdeb459170e2ee2d2d6ff2f72940f3c21/selectolax-0.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:67d44890c128b5fc797dc3a55a168a41551bc619f3cd3c6819d06a742fab4ef4", size = 1750358, upload-time = "2025-09-28T17:51:56.126Z" }, + { url = "https://files.pythonhosted.org/packages/cd/01/bb46431df329ca472292425cc9244c934f1cc3d09beb8cd9db096b9284f0/selectolax-0.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ec605e9a8d85d1e1118b9a07347cc4cc81714c8b7b0ae8be9c8b515f2dda52c2", size = 2037193, upload-time = "2025-09-28T17:51:57.532Z" }, + { url = "https://files.pythonhosted.org/packages/2a/60/2c8c8b8b78db646f5eef4c7ecbc2198db778f35ef3d656fb855dd109fbe0/selectolax-0.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:aef60a57920883d02574330de203d6ea984c33152bd2285ff6e88b978feeea5c", size = 2029085, upload-time = "2025-09-28T17:51:59.112Z" }, + { url = "https://files.pythonhosted.org/packages/d6/21/85b29e2dc44a8a0de0dff73f909c88506d14e96f479b9a50b39369b8d5fe/selectolax-0.4.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97b30b18e5b4633b889fd8bb8fc1cc31acb348e6f4cf67e2fa615d1c38697d24", size = 2218148, upload-time = "2025-09-28T17:52:00.877Z" }, + { url = "https://files.pythonhosted.org/packages/62/02/d03bedc09e60185f35c7424d3ef5046361dab50b40de19141e29289981dc/selectolax-0.4.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:45b73eadfbc9d96a354e9b559bac40bc430f0cfa95f3c7e8ff9b8c642bd4063f", size = 2256958, upload-time = "2025-09-28T17:52:02.664Z" }, + { url = "https://files.pythonhosted.org/packages/ff/8e/ae06590bebd5ed0243b52427d44919d26aebec8afa1c6fc9ccdbf9008f1c/selectolax-0.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:78b069c1a7d118077e905b6e560d8e9eb86ad6b7dc9122e6e16879fcd59731b9", size = 2228723, upload-time = "2025-09-28T17:52:04.332Z" }, + { url = "https://files.pythonhosted.org/packages/d3/fe/fada91b80ee58973def8fefe302a40214de0f8f5091ec1eb808fc1e60148/selectolax-0.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:06bb0f67ae5c951ea9f93e1fa51f646eb61336be6041631eee3fad1b61615415", size = 2261784, upload-time = "2025-09-28T17:52:05.76Z" }, + { url = "https://files.pythonhosted.org/packages/1f/71/257efdd7d4bf28c7889bb8c9ecc2a11233a6bb9d7c06292add90db8cdf9b/selectolax-0.4.0-cp313-cp313-win32.whl", hash = "sha256:3302f5d8f921e873b8f99d93cd7f093fc44e0fbea4ac6e9ce835991de3ca47e4", size = 1702602, upload-time = "2025-09-28T17:52:07.221Z" }, + { url = "https://files.pythonhosted.org/packages/a6/a0/f8de5940e5f286f8e1c88fbff8297c1427be0117a8f3258a6482d831f405/selectolax-0.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:6424ea4d27d26cb49042c729a7721cdc1210e7c404abcfe991c6f299a330bca7", size = 1806161, upload-time = "2025-09-28T17:52:08.613Z" }, + { url = "https://files.pythonhosted.org/packages/29/e0/0a8ae951856c9819551ab22ac57c1cae530702a170f81fae48ab5da2bb25/selectolax-0.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:8e4994013ce69d65224afe4ae2038316a968ac3c0dcfd34faeaff09bf6825e61", size = 1749911, upload-time = "2025-09-28T17:52:10.102Z" }, + { url = "https://files.pythonhosted.org/packages/27/dc/ac0b643411d0cd394e3b9c2660351c73cce7805efde5b1923ec943c27d6b/selectolax-0.4.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:5edf5793c83b713022a325437ec1f0460b1163655fd1145ee2acf427ab1ff388", size = 2037005, upload-time = "2025-09-28T17:52:11.538Z" }, + { url = "https://files.pythonhosted.org/packages/51/69/e4629f8dd307f5039ce9c50ac45085da5e194e8a1710cbd8bb4cf5ea02f5/selectolax-0.4.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a0faaad30a143ec0068cd29d346106309ca14add2813607c84548f69ff0babd9", size = 2030288, upload-time = "2025-09-28T17:52:13.319Z" }, + { url = "https://files.pythonhosted.org/packages/ca/d1/a59866e3ef94349a3fa17c8db8e12c783de52e99277c8bff45bed30ce0fe/selectolax-0.4.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:da04f1a1bf31a33e05739bc14ac585523adb4df8fbd0ce4eb3b1b6da82e76c56", size = 2223289, upload-time = "2025-09-28T17:52:14.84Z" }, + { url = "https://files.pythonhosted.org/packages/98/8f/12e2e3958c1d2a50123725fd05a8739c436093e88b5086b1c29bdea818bb/selectolax-0.4.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:468063c361d45ce04e3ada12f8848b0e99289aeb32b100c2b38dc86e085cea6a", size = 2255545, upload-time = "2025-09-28T17:52:16.243Z" }, + { url = "https://files.pythonhosted.org/packages/39/ea/514a4e18b08820601091ab64328f1e0f6327e219123ea24e72a2444401f1/selectolax-0.4.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b137e6a3470c6e374552cbe3ef69d5e10f4ac5317b0dd8d8a5d288ad02a980e7", size = 2239440, upload-time = "2025-09-28T17:52:18.365Z" }, + { url = "https://files.pythonhosted.org/packages/f3/dd/a6ef1d4fac7014d008ee25296057ed785a2b0daf6bc2a8ee503a84c428fa/selectolax-0.4.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e98416d1096f438fa7fa243f53350c8fc3e63e7c781a513300ff2da4ae247f95", size = 2262156, upload-time = "2025-09-28T17:52:19.759Z" }, + { url = "https://files.pythonhosted.org/packages/15/9f/a01ebbed6e83ca61bc2bf40119ada6c1206f562a15bb50535fa341264f69/selectolax-0.4.0-cp314-cp314-win32.whl", hash = "sha256:a144be60b2e9b8c34602cf07b01d1edf310fe1079944c473867295cc582c30ef", size = 1795070, upload-time = "2025-09-28T17:52:21.152Z" }, + { url = "https://files.pythonhosted.org/packages/4c/9a/fd58ca54e4544b59603a9f106ff7af412fd5e3852d1c1e3f4f56ab653dfc/selectolax-0.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:5d2bf8359fda6c5e7d2ac4183343fed5c84276adc7a28aa1f4f5c94964d15d5d", size = 1894458, upload-time = "2025-09-28T17:52:23.016Z" }, + { url = "https://files.pythonhosted.org/packages/02/5a/86a330a64519749ddeb4124a8370124737f44bceb8cfd8b0a3c36aa67d30/selectolax-0.4.0-cp314-cp314-win_arm64.whl", hash = "sha256:9f249b72ce4a344f1ac8b14fc9c9ccb043c8afbef6fedfc535ba8d03db0c7f19", size = 1843326, upload-time = "2025-09-28T17:52:24.367Z" }, + { url = "https://files.pythonhosted.org/packages/a7/64/f5f5ef5ccac5a70db6d3d129c027eff0d50ac0222360fbdd86ffab0e5119/selectolax-0.4.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:f2048fad6e21bbecb0a96f6d830f4c295c8638fb66b04e996afc7225db751594", size = 2052639, upload-time = "2025-09-28T17:52:25.77Z" }, + { url = "https://files.pythonhosted.org/packages/d9/8f/46dc6990313b872ab1f33175c5c02846ef9eb7c6c488ddf0984824673d69/selectolax-0.4.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:4b28af17499028ae8c210b3e8ef04c93f4d7a1b4e150da5781a3cf331a63f5a6", size = 2052569, upload-time = "2025-09-28T17:52:27.136Z" }, + { url = "https://files.pythonhosted.org/packages/8d/ee/48ff15c3ddac428749a5fbbc99b55c0b0a1720ba83ca048fbaf630254338/selectolax-0.4.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9da70a07e6ec2409f2cc6da787a6c4018ca672c7e139983e37de0cefa0686395", size = 2239877, upload-time = "2025-09-28T17:52:28.577Z" }, + { url = "https://files.pythonhosted.org/packages/e5/cb/5bffe2b9acd022eefb3a84c80cbdf441624a5754c062c05fcf1f6752a195/selectolax-0.4.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e62446575f9c58681a112273b48fcd659ee7c5f9f1b8c774223b128ce793ae38", size = 2263748, upload-time = "2025-09-28T17:52:29.964Z" }, + { url = "https://files.pythonhosted.org/packages/ee/1b/1a1970723372c6f4cf28b7100d415b66c2530138f39e82c5aad6789680e2/selectolax-0.4.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:a20ae8d377e74ce6480e55039c83c3ebfa96ed41281660530f02758a62ce6b80", size = 2254831, upload-time = "2025-09-28T17:52:31.309Z" }, + { url = "https://files.pythonhosted.org/packages/71/1c/359e14203c84bd6542d7631510bf5b2eec263a5ca1efa5fdeb2b3a768a42/selectolax-0.4.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:744b2809fcb360b01389b3d4929476951f5a260e375b12be1a6fa86ce11805dc", size = 2271220, upload-time = "2025-09-28T17:52:32.759Z" }, + { url = "https://files.pythonhosted.org/packages/81/7b/69fb95a7f3f3ade4622f35f5e02b38ae1f73f6a73856eb017bc965174678/selectolax-0.4.0-cp314-cp314t-win32.whl", hash = "sha256:9d9a5b1b2ecb64409d143aa37d4708bf3a3aacf47c19fe095429880e8fd43e4e", size = 1846313, upload-time = "2025-09-28T17:52:34.114Z" }, + { url = "https://files.pythonhosted.org/packages/42/36/a50f7af1d35e4f299d52f2163dffb60318294fc2bc8a58dcaf07d8d95e3d/selectolax-0.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:bbb5f0c0ea169a01aaa92c46a054a46ddc53c6e69aef683abf438fe39fbd60c2", size = 1967681, upload-time = "2025-09-28T17:52:35.542Z" }, + { url = "https://files.pythonhosted.org/packages/c3/96/e2f3ddd77dbfabf2b22dff676a16b0c87da71785756fa53b48b2217e1e4a/selectolax-0.4.0-cp314-cp314t-win_arm64.whl", hash = "sha256:22f2ce15e8e79acb66b576518609efe5d34e32200cb0818b48c7ebd89f59d4e6", size = 1863193, upload-time = "2025-09-28T17:52:37.052Z" }, +] + [[package]] name = "six" version = "1.17.0" @@ -1076,6 +1352,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, ] +[[package]] +name = "sniffio" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" }, +] + [[package]] name = "sqlparse" version = "0.5.3" @@ -1087,11 +1372,23 @@ wheels = [ [[package]] name = "typing-extensions" -version = "4.14.0" +version = "4.15.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d1/bc/51647cd02527e87d05cb083ccc402f93e441606ff1f01739a62c8ad09ba5/typing_extensions-4.14.0.tar.gz", hash = "sha256:8676b788e32f02ab42d9e7c61324048ae4c6d844a399eebace3d4979d75ceef4", size = 107423, upload-time = "2025-06-02T14:52:11.399Z" } +sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/69/e0/552843e0d356fbb5256d21449fa957fa4eff3bbc135a74a691ee70c7c5da/typing_extensions-4.14.0-py3-none-any.whl", hash = "sha256:a1514509136dd0b477638fc68d6a91497af5076466ad0fa6c338e44e359944af", size = 43839, upload-time = "2025-06-02T14:52:10.026Z" }, + { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" }, +] + +[[package]] +name = "typing-inspection" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload-time = "2025-10-01T02:14:41.687Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" }, ] [[package]] @@ -1177,6 +1474,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e1/07/c6fe3ad3e685340704d314d765b7912993bcb8dc198f0e7a89382d37974b/win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390", size = 4083, upload-time = "2024-12-07T15:28:26.465Z" }, ] +[[package]] +name = "xlsxwriter" +version = "3.2.9" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/46/2c/c06ef49dc36e7954e55b802a8b231770d286a9758b3d936bd1e04ce5ba88/xlsxwriter-3.2.9.tar.gz", hash = "sha256:254b1c37a368c444eac6e2f867405cc9e461b0ed97a3233b2ac1e574efb4140c", size = 215940, upload-time = "2025-09-16T00:16:21.63Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3a/0c/3662f4a66880196a590b202f0db82d919dd2f89e99a27fadef91c4a33d41/xlsxwriter-3.2.9-py3-none-any.whl", hash = "sha256:9a5db42bc5dff014806c58a20b9eae7322a134abb6fce3c92c181bfb275ec5b3", size = 175315, upload-time = "2025-09-16T00:16:20.108Z" }, +] + [[package]] name = "zstandard" version = "0.25.0" From 6d805e6fb94b770a1e44db3b6cc32a4732e098cf Mon Sep 17 00:00:00 2001 From: igorsimb Date: Sun, 12 Oct 2025 09:59:05 +0300 Subject: [PATCH 2/2] feat(parser): add parser app --- parser/http.py | 32 +- parser/parser_tech_plan.md | 43 +- parser/scripts/__init__.py | 0 parser/scripts/check_proxies_for_stparts.py | 137 + parser/scripts/check_stparts_direct.py | 80 + .../scripts/check_stparts_with_playwright.py | 166 + parser/utils.py | 1 + pyproject.toml | 2 + requirements.txt | 3 + tests/parser/initial_page.html | 6310 ++++++++ tests/parser/page_with_disableFiltering.html | 12157 ++++++++++++++++ tests/parser/test_excel_export.py | 2 +- tests/parser/test_stparts_pipeline.py | 6 +- uv.lock | 47 + 14 files changed, 18963 insertions(+), 23 deletions(-) create mode 100644 parser/scripts/__init__.py create mode 100644 parser/scripts/check_proxies_for_stparts.py create mode 100644 parser/scripts/check_stparts_direct.py create mode 100644 parser/scripts/check_stparts_with_playwright.py create mode 100644 parser/utils.py create mode 100644 tests/parser/initial_page.html create mode 100644 tests/parser/page_with_disableFiltering.html diff --git a/parser/http.py b/parser/http.py index faa976b..84992bd 100644 --- a/parser/http.py +++ b/parser/http.py @@ -2,6 +2,7 @@ import asyncio import random +from enum import Enum from itertools import cycle from typing import Any, TypedDict from urllib.parse import parse_qs, urlparse @@ -18,8 +19,21 @@ class Proxy(TypedDict): port: int username: str password: str + proxy_type: str +class ProxyType(Enum): + """ + Used in proxy_list table (proxy_type column). + """ + DCV6_DEDICATED = "datacenter_ipv6_dedicated" + DCV4_SHARED = "datacenter_ipv4_shared" + DCV4_DEDICATED = "datacenter_ipv4_dedicated" + MOBILE_DEDICATED = "mobile_dedicated" + MOBILE_SHARED = "mobile_shared" + RESIDENTIAL_SHARED = "residential_shared" + RESIDENTIAL_DEDICATED = "residential_dedicated" + # --- DB Connection Details --- DB_CONFIG = { "host": "185.175.47.222", @@ -30,15 +44,27 @@ class Proxy(TypedDict): } -async def get_proxies_from_db() -> list[Proxy]: +async def get_proxies_from_db(proxy_type: ProxyType | None = None) -> list[Proxy]: """ Fetches a list of available proxy servers from the external PostgreSQL database. + + Args: + proxy_type: An optional filter to fetch only proxies of a specific type. """ conn = None try: conn = await asyncpg.connect(**DB_CONFIG) - records = await conn.fetch("SELECT ip, port, username, password FROM proxy_list WHERE availability = TRUE") - logger.debug(f"Fetched {len(records)} proxies from the external database.") + + query = "SELECT ip, port, username, password, proxy_type FROM proxy_list WHERE availability = TRUE" + params = [] + + if proxy_type: + query += " AND proxy_type = $1" + params.append(proxy_type.value) + + records = await conn.fetch(query, *params) + + logger.debug(f"Fetched {len(records)} proxies from the external database for type: {proxy_type or 'any'}.") # The records from asyncpg are list-like and dict-like. return [dict(record) for record in records] except (asyncpg.PostgresError, OSError) as e: # OSError can happen on connection failure diff --git a/parser/parser_tech_plan.md b/parser/parser_tech_plan.md index 2b177b5..a347205 100644 --- a/parser/parser_tech_plan.md +++ b/parser/parser_tech_plan.md @@ -303,25 +303,36 @@ Purpose Persist results (TTL 4 months), export Excel in the wide “top-10 suppliers” layout your users expect, and delete exports older than 5 days. --- ClickHouse DDL (final) -CREATE TABLE IF NOT EXISTS sup_stat.parser_offers +```sql +-- dif.stparts_percentage definition + +CREATE TABLE dif.stparts_percentage ( - run_id UUID, - fetched_at DateTime DEFAULT now(), - source LowCardinality(String), - brand String, - article String, - name String, - price Decimal(12,2), - quantity UInt32, - supplier String, - rating Nullable(UInt8), -- 0..100 where available - deadline_days UInt16, - is_analog UInt8 + `b` String, -- brand + `a` String, -- article + `price` Float64, + `quantity` Int32, + `delivery` Int32, -- deadline days + `provider` String, -- supplier + `rating` Nullable(UInt8), -- 0..100 where available + `name` String, + `created_at` DateTime DEFAULT now(), + `is_analog` UInt8, + `run_id` UUID, ) -ENGINE = MergeTree -ORDER BY (article, source, supplier, price, deadline_days, fetched_at) -TTL fetched_at + INTERVAL 4 MONTH +ENGINE = ReplacingMergeTree +ORDER BY (b, + a, + price, + quantity, + delivery, + provider, + rating, + name, + toDate(created_at)) +TTL created_at + toIntervalMonth(12) SETTINGS index_granularity = 8192; +``` Notes run_id groups a single job execution; fetched_at drives TTL. diff --git a/parser/scripts/__init__.py b/parser/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/parser/scripts/check_proxies_for_stparts.py b/parser/scripts/check_proxies_for_stparts.py new file mode 100644 index 0000000..676654f --- /dev/null +++ b/parser/scripts/check_proxies_for_stparts.py @@ -0,0 +1,137 @@ + +""" +A diagnostic script to check the status of all proxies from the database against stparts.ru. + +This script iterates through all proxies marked as available in the database and makes a +test request to stparts.ru through each one. It then categorizes each proxy as 'working', +'blocked', or 'failed' based on the content of the HTML response. + +It is used to determine if the parsing failures are due to stparts.ru blocking the +proxy pool. If a high percentage of proxies are blocked, it may be necessary to +acquire new proxies or switch to a different parsing method like Playwright. + +Usage: + python parser/scripts/check_proxies_for_stparts.py +""" +import asyncio + +import sys +from pathlib import Path +import httpx +from tqdm.asyncio import tqdm + +# Add the project root to the Python path +project_root = Path(__file__).resolve().parent.parent.parent +sys.path.insert(0, str(project_root)) + + +from loguru import logger + +from parser.http import Proxy, get_proxies_from_db + +# Configure Loguru for clear output +logger.remove() +logger.add(sys.stderr, level="INFO") + +# --- Configuration --- +# URL to test against. A simple search query is a good choice. +TEST_URL = "https://stparts.ru/search" +TEST_PARAMS = {"pcode": "210202R920"} # A common article code + +# Concurrency limit to avoid overwhelming the server or getting banned. +CONCURRENCY_LIMIT = 20 + +# --- Success/Failure Markers --- +# Text expected in a successful response (i.e., a real search results page) +SUCCESS_MARKER = "searchResultsTable" +# Text expected in a response when the proxy is blocked +BLOCKED_MARKER = "Access Denied" + + +async def check_proxy(proxy: Proxy, stats: dict) -> None: + """ + Checks a single proxy by making a request to the test URL. + + Args: + proxy: The proxy to check. + stats: A dictionary to update with the results (working/blocked). + """ + proxy_id = "" + try: + proxy_id = f"{proxy['ip']}:{proxy['port']}" + proxy_url = f"http://{proxy['username']}:{proxy['password']}@{proxy['ip']}:{proxy['port']}" + + async with httpx.AsyncClient(proxy=proxy_url, follow_redirects=True) as client: + response = await client.get(TEST_URL, params=TEST_PARAMS, timeout=20.0) + response.raise_for_status() + html = response.text + + if BLOCKED_MARKER in html: + stats["blocked"] += 1 + elif SUCCESS_MARKER in html: + stats["working"] += 1 + else: + stats["unknown"] += 1 + + except (httpx.RequestError, httpx.HTTPStatusError) as e: + if stats["failed"] < 5: # Log the first 5 network errors for diagnosis + logger.error(f"[!] Proxy {proxy_id} failed with network error: {e.__class__.__name__}.") + stats["failed"] += 1 + except Exception as e: + if stats["failed"] < 5: # Log the first 5 unexpected errors for diagnosis + logger.exception(f"[!] An unexpected error occurred with proxy {proxy_id}: {e}") + stats["failed"] += 1 + + +async def main(limit: int = None): + """ + Main function to fetch proxies and run the checks concurrently. + """ + logger.info("--- Starting Proxy Check for stparts.ru ---") + logger.info("Fetching proxies from the database...") + + proxies = await get_proxies_from_db() + if not proxies: + logger.error("No proxies found in the database. Exiting.") + return + + total_proxies = len(proxies) + logger.info(f"Found {total_proxies} proxies to check.") + logger.info(f"Sample proxy record from DB: {proxies[0]}") + + if limit: + proxies = proxies[:limit] + logger.info(f"Limiting to {limit} proxies for this run.") + + stats = {"working": 0, "blocked": 0, "failed": 0, "unknown": 0} + semaphore = asyncio.Semaphore(CONCURRENCY_LIMIT) + + tasks = [] + for proxy in proxies: + + async def task_wrapper(p: Proxy): + async with semaphore: + await check_proxy(p, stats) + + tasks.append(task_wrapper(proxy)) + + await tqdm.gather(*tasks, desc="Checking Proxies") + + logger.info("--- Proxy Check Complete ---") + logger.info("Summary:") + logger.info(f" Total Proxies Checked: {total_proxies}") + logger.success(f" Working Proxies: {stats['working']}") + logger.warning(f" Blocked Proxies: {stats['blocked']}") + logger.error(f" Failed Proxies (errors/timeouts): {stats['failed']}") + if stats["unknown"] > 0: + logger.error(f" Unknown Responses: {stats['unknown']}") + logger.info("----------------------------") + + if stats["working"] == 0: + logger.critical("All checked proxies are blocked or failed. It is highly likely we need to switch to a different parsing approach like Playwright.") + else: + logger.info("Some proxies are still working. The existing setup can be used, but the proxy list may need cleaning.") + + +if __name__ == "__main__": + asyncio.run(main(limit=10)) diff --git a/parser/scripts/check_stparts_direct.py b/parser/scripts/check_stparts_direct.py new file mode 100644 index 0000000..9ee72ba --- /dev/null +++ b/parser/scripts/check_stparts_direct.py @@ -0,0 +1,80 @@ +""" +A simple script to perform a direct connectivity test to stparts.ru without a proxy. + +This script serves as a control experiment. It makes a single request to a test URL +on stparts.ru using the application's standard browser headers but no proxy. +It checks if the response is a valid results page or a block page and saves the +full HTML response to a local file (`direct_response.html`) for inspection. + +This is useful for verifying baseline connectivity and confirming what a successful, +unblocked HTML response should look like. + +Usage: + python parser/scripts/check_stparts_direct.py +""" +import asyncio + +import sys +from pathlib import Path +import httpx + +# Add the project root to the Python path +project_root = Path(__file__).resolve().parent.parent.parent +sys.path.insert(0, str(project_root)) + +from loguru import logger +from parser.http import ProxySession + +# Configure Loguru for clear output +logger.remove() +logger.add(sys.stderr, level="INFO") + +# --- Configuration --- +TEST_URL = "https://stparts.ru/search" +TEST_PARAMS = {"pcode": "210202R920"} # A common article code + +SCRIPT_DIR = Path(__file__).resolve().parent +OUTPUT_FILE = "" + +# --- Success/Failure Markers --- +SUCCESS_MARKER = "searchResultsTable" +BLOCKED_MARKER = "Access Denied" + + +async def main(): + """ + Main function to make a direct request to stparts.ru without a proxy. + """ + logger.info("--- Starting Direct Check for stparts.ru ---") + logger.info(f"Requesting URL: {TEST_URL} with params: {TEST_PARAMS}") + + try: + async with httpx.AsyncClient(follow_redirects=True, headers=ProxySession.BASE_HEADERS) as client: + response = await client.get(TEST_URL, params=TEST_PARAMS, timeout=30.0) + + logger.info(f"Request completed with status code: {response.status_code}") + + html = response.text + + if BLOCKED_MARKER in html: + logger.error("Result: The direct request was BLOCKED.") + OUTPUT_FILE = SCRIPT_DIR / "direct_response_blocked.html" + elif SUCCESS_MARKER in html: + logger.success("Result: The direct request was SUCCESSFUL.") + OUTPUT_FILE = SCRIPT_DIR / "direct_response_success.html" + else: + logger.warning("Result: The response did not contain a clear success or blocked marker.") + + # Save the response for inspection + with open(OUTPUT_FILE, "w", encoding="utf-8") as f: + f.write(html) + logger.info(f"Full HTML response saved to: {OUTPUT_FILE}") + + except httpx.RequestError as e: + logger.exception(f"A network error occurred during the direct request: {e}") + except Exception as e: + logger.exception(f"An unexpected error occurred: {e}") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/parser/scripts/check_stparts_with_playwright.py b/parser/scripts/check_stparts_with_playwright.py new file mode 100644 index 0000000..267f4dd --- /dev/null +++ b/parser/scripts/check_stparts_with_playwright.py @@ -0,0 +1,166 @@ +""" +Minimal Playwright probe that uses your get_proxies_from_db() to pick a proxy, +launch Chromium through that proxy, visit the test URL, detect gate vs success, +and persist storage_state per proxy for reuse. + +Adjust TEST_URL, SUCCESS_MARKER and BLOCKED_MARKER as needed. +""" +import random +from pathlib import Path +import asyncio +import json +import time +import sys + +# Add the project root to the Python path +project_root = Path(__file__).resolve().parent.parent.parent +sys.path.insert(0, str(project_root)) + +from loguru import logger + +from playwright.sync_api import sync_playwright, TimeoutError as PWTimeoutError + +# import your proxy helper the same way as your other script +from parser.http import get_proxies_from_db, ProxyType # type: ignore + +# --- Configuration --- +TEST_URL = "https://stparts.ru/search?pcode=210202R920" +SUCCESS_MARKER = "searchResultsTable" # found in good page HTML +BLOCKED_MARKERS = ("Access Denied", "Доступ запрещен") # found in the edge block page +AUTOCHECK_SNIPPET = "autocheck.dyn" # the autocheck/hcaptcha link snippet +HEADLESS = False # set True to run headless (no manual captcha solve) +TIMEOUT_MS = 60_000 # page load timeout +WAIT_BEFORE_CLOSING_SEC = 3 # seconds to wait before closing browser, 0 to disable +STORAGE_DIR = Path("./parser/scripts/playwright_storage") +STORAGE_DIR.mkdir(exist_ok=True) + +def format_proxy_for_playwright(proxy: dict) -> dict | None: + """ + Given a proxy dict from your DB, return a Playwright proxy dict. + Expects keys: ip, port, username, password (username/password optional). + """ + if not proxy: + return None + server = f"http://{proxy['ip']}:{proxy['port']}" + result = {"server": server} + if proxy.get("username") and proxy.get("password"): + result["username"] = proxy["username"] + result["password"] = proxy["password"] + return result + +def storage_path_for_proxy(proxy: dict) -> Path: + safe = f"{proxy['ip']}-{proxy['port']}" + return STORAGE_DIR / f"storage_{safe}.json" + +def probe_one_proxy(proxy: dict | None, use_proxy: bool = True) -> None: + if use_proxy and proxy: + logger.info(f"Probing proxy {proxy['ip']}:{proxy['port']} (Type: {proxy.get('proxy_type', 'Unknown')})") + pw_proxy = format_proxy_for_playwright(proxy) + storage_file = storage_path_for_proxy(proxy) + else: + logger.info("Probing with direct connection (no proxy).") + pw_proxy = None + storage_file = STORAGE_DIR / "storage_direct.json" + + with sync_playwright() as p: + launch_args = { + "headless": HEADLESS, + "args": [ + "--no-sandbox", + "--disable-gpu", + "--disable-dev-shm-usage", + ], + } + # provide proxy at launch so TLS/connection goes through proxy + if pw_proxy: + launch_args["proxy"] = pw_proxy + + browser = p.firefox.launch(**launch_args) + + # If we have an existing storage_state, load it; otherwise create a fresh context. + context_args = { + "user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36", + "locale": "ru-RU", + "timezone_id": "Europe/Moscow", + } + + if storage_file.exists(): + logger.info(f"Loading existing storage state from {storage_file}") + context = browser.new_context(storage_state=str(storage_file), **context_args) + else: + context = browser.new_context(**context_args) + + page = context.new_page() + + try: + logger.info("Navigating to test URL...") + page.goto(TEST_URL, timeout=TIMEOUT_MS) + html = page.content() + # quick checks + if any(marker in html for marker in BLOCKED_MARKERS): + logger.warning("Page contains Access Denied marker (edge blocking).") + if AUTOCHECK_SNIPPET in html: + logger.warning("autocheck/hCaptcha snippet detected.") + # Save HTML for debugging + debug_file = STORAGE_DIR / f"blocked_{proxy['ip']}_{proxy['port'] if proxy else 'direct'}.html" + debug_file.write_text(html, encoding="utf-8") + logger.info(f"Blocked HTML saved to {debug_file}") + elif SUCCESS_MARKER in html: + logger.info("Success marker found — page looks like a real search result.") + # persist storage state so subsequent runs can reuse cookies + context.storage_state(path=str(storage_file)) + logger.info(f"Saved storage_state to {storage_file}") + # optionally extract something simple: + title = page.title() + logger.info(f"Page title: {title}") + else: + logger.info("Neither success nor access-denied marker found. Saving HTML for inspection.") + debug_file = STORAGE_DIR / f"unknown_{proxy['ip']}_{proxy['port'] if proxy else 'direct'}.html" + debug_file.write_text(html, encoding="utf-8") + logger.info(f"Unknown HTML saved to {debug_file}") + + except PWTimeoutError: + logger.error("Playwright navigation timed out.") + except Exception as exc: + logger.exception("Unexpected error during probe: %s", exc) + finally: + if WAIT_BEFORE_CLOSING_SEC > 0: + logger.info(f"Waiting for {WAIT_BEFORE_CLOSING_SEC} seconds before closing...") + time.sleep(WAIT_BEFORE_CLOSING_SEC) + try: + page.close() + except Exception: + pass + context.close() + browser.close() + +def main(limit: int = 1, use_proxy: bool = True, proxy_type: ProxyType | None = None): + if use_proxy: + if proxy_type: + logger.info(f"Fetching proxies of type: {proxy_type.value}") + proxies = asyncio.run(get_proxies_from_db(proxy_type=proxy_type)) + else: + logger.info("Fetching all available proxies (no specific type filter).") + proxies = asyncio.run(get_proxies_from_db()) + + if not proxies: + logger.error(f"No proxies returned. Exiting.") + return + + proxies_to_probe = proxies[:limit] + if proxies_to_probe: + logger.info(f"Probing a random proxy from the {'specified type' if proxy_type else 'entire pool'}.") + for proxy in proxies_to_probe: + logger.info(f"Probing proxy {proxy['ip']}:{proxy['port']} (Type: {proxy.get('proxy_type', 'Unknown')})") + probe_one_proxy(proxy=proxy, use_proxy=True) + else: + logger.warning(f"No proxies available to probe from the {'specified type' if proxy_type else 'entire pool'}.") + else: + probe_one_proxy(proxy=None, use_proxy=False) + +if __name__ == "__main__": + # To test a direct connection, run: main(use_proxy=False) + # To test all available proxies (including those with NULL proxy_type), run: main(limit=1, use_proxy=True) + # To test a specific proxy type, run: main(limit=1, use_proxy=True, proxy_type=ProxyType.DCV4_DEDICATED) + main(limit=5, use_proxy=True) diff --git a/parser/utils.py b/parser/utils.py new file mode 100644 index 0000000..26a3794 --- /dev/null +++ b/parser/utils.py @@ -0,0 +1 @@ +"""Provides small helper functions and utilities for the parser app.""" diff --git a/pyproject.toml b/pyproject.toml index 752201b..b980339 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,8 @@ dependencies = [ "selectolax>=0.4.0", "xlsxwriter>=3.2.9", "asyncpg>=0.30.0", + "tqdm>=4.67.1", + "playwright>=1.55.0", ] [tool.uv] diff --git a/requirements.txt b/requirements.txt index 005b680..ac7bf08 100644 --- a/requirements.txt +++ b/requirements.txt @@ -45,6 +45,7 @@ colorama==0.4.6 # via # click # loguru + # tqdm cron-descriptor==1.4.5 # via django-celery-beat django==5.2 @@ -162,6 +163,8 @@ sniffio==1.3.1 # via anyio sqlparse==0.5.3 # via django +tqdm==4.67.1 + # via admin2 (pyproject.toml) typing-extensions==4.15.0 # via # anyio diff --git a/tests/parser/initial_page.html b/tests/parser/initial_page.html new file mode 100644 index 0000000..70fd255 --- /dev/null +++ b/tests/parser/initial_page.html @@ -0,0 +1,6310 @@ + + + + + + + + + + + + + + + + + + + + + + + 0PN1113H52 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+
+ +
+
+ + STparts + +
+
+
+
+
Москва, Центральный офис
+
+
+ +
8 (495) 790-53-53
+
+ +
+
Пн-Пт 9:00 - 18:00, Cб,Вс - выходной
+
+ + +
+
+ + + + + + + + +
+ + + +
+
+
+
+ +
+
+
+ + + + + +
+ + + + + + +
+ ВНИМАНИЕ! Цены действительны при условии самовывоза со склада в г. Москве и ежемесячном обороте от 2 000 000 рублей. Для получения актуальных цен необходимо пройти регистрацию. +
+ + + + + + +
+
+ +
+ + + + + +
+
+
+
+
+ +
+
+ +
+
+
+ + +

+ Hyundai-KIA + 0PN1113H52 +
+ Прокладка топливной системы +

+ + +
+
+ + + + + + + +
+ 0 оценок +
+
+
+
+
+ + + + + + +
+
+
+ + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+ + + + + +
+ + + +
+
+
+ +
+
+ Все +
+ +
+ + Бренды + () + + +
+
+
+
+ + Популярные +
+
+ + +
+ + + + +
+ + +
+
+
+
+
+
+
+
+
+ +
+ +
+ +
+ +
+ +
+ +
+ +
+
+ +
+
+ +
+
+ +
+ +
+ дн + + +
+
+ +
+
+
+ +
+
+ от +
+ +
+ шт + + +
+
+
+ +
+ +
+
+
+
+ + +
+ +
+ + +
+
+
+
+
+ +
+ + Склады + () + + +
+
+
+ + +
+ + + +
+ + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+
+
+ + +
+
+ Поисковая выдача сокращена, были скрыты неинтересные для вас предложения
+ + Показать все варианты + +
+
+
+ +

Фильтр по бренду

+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Фото + + Бренд + + + + + Код детали + + + + + Описание + + + + + Наличие + + + + + Склад + + + + + Ожидаемый срок + + + + + Цена + + + Заказ
+ Запрашиваемый артикул +
+
+
+ + + + + + + + 0 * * * 2 + + КОЛЬЦО ФОРСУНКИ МЕТАЛЛИЧЕСКОЕ + + + * + + + UAE1893 + + + +
+
+ 35 - 36 дней +
+
+ +
+
Заказ до
+
15:45
+
+
+
+
+
+ +
+
+ 38,07 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + КОЛЬЦО ФОРСУНКИ МЕТАЛЛИЧЕСКОЕ + + + * + + + UAE1910 + + + +
+
+ 35 - 36 дней +
+
+ +
+
Заказ до
+
15:45
+
+
+
+
+
+ +
+
+ 38,07 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + КОЛЬЦО ФОРСУНКИ МЕТАЛЛИЧЕСКОЕ + + + * + + + UAE1909 + + +
+
+ 35 - 36 дней +
+
+ +
+
Заказ до
+
15:45
+
+
+
+
+
+ +
+
+ 43,51 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + КОЛЬЦО ФОРСУНКИ МЕТАЛЛИЧЕСКОЕ + + + * + + + UAE497 + + + +
+
+ 22 - 28 дней +
+
+
+
+ +
+
+ 44,00 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + Прокладка топливной системы + + + * + + + UAE435 + + + +
+
+ 41 - 43 дня +
+
+
+
+ +
+
+ 73,00 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + ПРОКЛАДКА ТОПЛИВНОЙ СИСТЕМЫ + + + * + + + POS219 + + +
+
+ 1 - 2 дня +
+
+ +
+
Заказ до
+
14:15
+
+
+
+
+
+ +
+
+ 74,80 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + Прокладка топливной системы + + + * + + + AG137 + + +
+
+ 3 - 4 дня +
+
+
+
+ +
+
+ 90,42 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + ПРОКЛАДКА ТОПЛИВНОЙ СИСТЕМЫ + + + * + + + POS1820 + + +
+
+ 1 - 2 дня +
+
+ +
+
Заказ до
+
15:45
+
+
+
+
+
+ +
+
+ 99,00 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + Прокладка форсунки (нижняя) Bongo 3, HYUNDAI/KIA/MOBIS + + + * + + + PP1016 + + +
+
+ 1 - 3 дня +
+
+
+
+ +
+
+ 107,80 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + ПРОКЛАДКА ФОРСУНКИ НИЖНЯЯ + + + * + + + AG113 + + + +
+
+ 1 - 3 дня +
+
+
+
+ +
+
+ 114,00 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + Шайба форсунки 4,1*11,1*1,3 HYUNDAI/KIA 0PN11-13H52 + + + * + + + POS1760 + + +
+
+ 1 - 2 дня +
+
+
+
+ +
+
+ 126,00 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + ПРОКЛАДКА ТОПЛИВНОЙ СИСТЕМЫ + + + * + + + PP239 + + + +
+
+ 1 - 3 дня +
+
+
+
+ +
+
+ 136,00 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + КольцоФорсунки + + + * + + + POS1178 + + +
+
+ 1 - 2 дня +
+
+
+
+ +
+
+ 142,80 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + Прокладка топливной системы + + + * + + + UAE1176 + + + +
+
+ 36 - 37 дней +
+
+ +
+
Заказ до
+
15:15
+
+
+
+
+
+ +
+
+ 149,26 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
Возможные аналоги (требуется проверка применимости)
+
+
+ + + + +
+ + AMR +
+ +
+
+ A * * * 0 +
+
+ +
+
+ Шайба топливной форсунки + + + * + + + OD727 + + + +
+
+ 1 - 3 дня +
+
+
+
+ +
+
+ 79,20 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + +
+ + AMR +
+ +
+
+ A * * * 0 +
+
+ +
+
+ Шайба топливной форсунки + + + * + + + OD1067 + + + +
+
+ 1 - 3 дня +
+
+
+
+ +
+
+ 79,20 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + +
+ + AMR +
+ +
+
+ A * * * 0 +
+
+ +
+
+ Шайба топливной форсунки + + + * + + + OD1484 + + + +
+
+ 3 - 5 дней +
+
+
+
+ +
+
+ 79,20 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + +
+ + AMR +
+ +
+
+ A * * * 0 +
+
+ +
+
+ Шайба топливной форсунки + + + * + + + AG492 + + +
+
+ 1 - 2 дня +
+
+
+
+ +
+
+ 88,00 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + +
+ + AMR +
+ +
+
+ A * * * 0 +
+
+ +
+
+ Шайба топливной форсунки + + + * + + + AG719 + + +
+
+ 5 - 7 дней +
+
+
+
+ +
+
+ 108,00 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + +
+ + AMR +
+ +
+
+ A * * * 0 +
+
+ +
+
+ Шайба топливная + + + * + + + POS1628 + + +
+
+ 3 - 6 дней +
+
+
+
+ +
+
+ 112,20 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+ + + + + + + + + + + + + + +
+ Поисковая выдача сокращена, были скрыты неинтересные для вас предложения
+ + Показать все варианты + +
+

Информация по подбору аналогов, замен артикулов производителем является справочной, наименование деталей, а также фото деталей могут содержать ошибки - эта информация требует уточнения в официальных каталогах производителей и не является причиной для возврата.

+

*** Скрыто для незарегистрированных посетителей.

+
+
+
+ +
+
+
+ +
+
+
+ + \ No newline at end of file diff --git a/tests/parser/page_with_disableFiltering.html b/tests/parser/page_with_disableFiltering.html new file mode 100644 index 0000000..19bf3aa --- /dev/null +++ b/tests/parser/page_with_disableFiltering.html @@ -0,0 +1,12157 @@ + + + + + + + + + + + + + + + + + + + + + + + Hyundai-KIA 0PN1113H52 Прокладка топливной системы + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+
+ +
+
+ + STparts + +
+
+
+
+
Москва, Центральный офис
+
+
+ +
8 (495) 790-53-53
+
+ +
+
Пн-Пт 9:00 - 18:00, Cб,Вс - выходной
+
+ + +
+
+ + + + + + + + +
+ + + +
+
+
+
+ +
+
+
+ + + + + +
+ + + + + + +
+ ВНИМАНИЕ! Цены действительны при условии самовывоза со склада в г. Москве и ежемесячном обороте от 2 000 000 рублей. Для получения актуальных цен необходимо пройти регистрацию. +
+ + + + + + +
+
+ +
+ + + + + +
+
+
+
+
+ +
+
+ +
+
+
+ + +

+ Hyundai-KIA + 0PN1113H52 +
+ Прокладка топливной системы +

+ + +
+
+ + + + + + + +
+ 0 оценок +
+
+
+
+
+ + + + + + +
+
+
+ + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+ + + + + +
+ + + +
+
+
+ +
+
+ Все +
+ +
+ + Бренды + () + + +
+
+
+
+ + Популярные +
+
+ + +
+ + + + +
+ + +
+
+
+
+
+
+
+
+
+ +
+ +
+ +
+ +
+ +
+ +
+ +
+
+ +
+
+ +
+
+ +
+ +
+ дн + + +
+
+ +
+
+
+ +
+
+ от +
+ +
+ шт + + +
+
+
+ +
+ +
+
+
+
+ + +
+ +
+ + +
+
+
+
+
+ +
+ + Склады + () + + +
+
+
+ + +
+ + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+
+
+ + +
+
+
+ +

Фильтр по бренду

+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Фото + + Бренд + + + + + Код детали + + + + + Описание + + + + + Наличие + + + + + Склад + + + + + Ожидаемый срок + + + + + Цена + + + Заказ
+ Запрашиваемый артикул +
+
+
+ + + + + + + + 0 * * * 2 + + КОЛЬЦО ФОРСУНКИ МЕТАЛЛИЧЕСКОЕ + + + * + + + UAE1893 + + + +
+
+ 35 - 36 дней +
+
+ +
+
Заказ до
+
15:45
+
+
+
+
+
+ +
+
+ 38,07 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + КОЛЬЦО ФОРСУНКИ МЕТАЛЛИЧЕСКОЕ + + + * + + + UAE1910 + + + +
+
+ 35 - 36 дней +
+
+ +
+
Заказ до
+
15:45
+
+
+
+
+
+ +
+
+ 38,07 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + КОЛЬЦО ФОРСУНКИ МЕТАЛЛИЧЕСКОЕ + + + * + + + UAE1909 + + +
+
+ 35 - 36 дней +
+
+ +
+
Заказ до
+
15:45
+
+
+
+
+
+ +
+
+ 43,51 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + КОЛЬЦО ФОРСУНКИ МЕТАЛЛИЧЕСКОЕ + + + * + + + UAE497 + + + +
+
+ 22 - 28 дней +
+
+
+
+ +
+
+ 44,00 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + Прокладка топливной системы + + + * + + + UAE435 + + + +
+
+ 41 - 43 дня +
+
+
+
+ +
+
+ 73,00 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + ПРОКЛАДКА ТОПЛИВНОЙ СИСТЕМЫ + + + * + + + POS219 + + +
+
+ 1 - 2 дня +
+
+ +
+
Заказ до
+
14:15
+
+
+
+
+
+ +
+
+ 74,80 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + Прокладка топливной системы + + + * + + + AG137 + + +
+
+ 3 - 4 дня +
+
+
+
+ +
+
+ 90,42 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + ПРОКЛАДКА ТОПЛИВНОЙ СИСТЕМЫ + + + * + + + POS1820 + + +
+
+ 1 - 2 дня +
+
+ +
+
Заказ до
+
15:45
+
+
+
+
+
+ +
+
+ 99,00 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + Прокладка топливной системы + + + * + + + AG690 + + + +
+
+ 12 - 13 дней +
+
+ +
+
Заказ до
+
14:45
+
+
+
+
+
+ +
+
+ 99,33 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + Прокладка уплотнительная PREGIO,WIDE BONGO,FRONTIER 1.3, 2.5TON,FRONTIER 1TON + + + * + + + AG925 + + +
+
+ 2 - 4 дня +
+
+
+
+ +
+
+ 102,82 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + "Прокладка топливной системы" + + + * + + + AG634 + + + +
+
+ 8 - 9 дней +
+
+ +
+
Заказ до
+
15:45
+
+
+
+
+
+ +
+
+ 105,60 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + Прокладка форсунки (нижняя) Bongo 3, HYUNDAI/KIA/MOBIS + + + * + + + PP1016 + + +
+
+ 1 - 3 дня +
+
+
+
+ +
+
+ 107,80 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + ПРОКЛАДКА ФОРСУНКИ НИЖНЯЯ + + + * + + + AG113 + + + +
+
+ 1 - 3 дня +
+
+
+
+ +
+
+ 114,00 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + Прокладка топливной системы + + + * + + + AG097 + + +
+
+ 10 - 11 дней +
+
+ +
+
Заказ до
+
15:45
+
+
+
+
+
+ +
+
+ 118,00 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + Шайба форсунки 4,1*11,1*1,3 HYUNDAI/KIA 0PN11-13H52 + + + * + + + POS1760 + + +
+
+ 1 - 2 дня +
+
+
+
+ +
+
+ 126,00 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + ПРОКЛАДКА ТОПЛИВНОЙ СИСТЕМЫ + + + * + + + PP239 + + + +
+
+ 1 - 3 дня +
+
+
+
+ +
+
+ 136,00 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + ПРОКЛАДКА ТОПЛИВНОЙ СИСТЕМЫ + + + * + + + PP1480 + + + +
+
+ 2 - 4 дня +
+
+
+
+ +
+
+ 136,00 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + Прокладка топливной системы + + + * + + + AG1129 + + + +
+
+ 2 - 4 дня +
+
+
+
+ +
+
+ 138,00 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + КольцоФорсунки + + + * + + + POS1178 + + +
+
+ 1 - 2 дня +
+
+
+
+ +
+
+ 142,80 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + Шайба металлическая + + + * + + + AG965 + + +
+
+ 1 - 2 дня +
+
+
+
+ +
+
+ 144,00 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + Прокладка топливной системы + + + * + + + UAE1094 + + + +
+
+ 20 - 21 день +
+
+ +
+
Заказ до
+
15:15
+
+
+
+
+
+ +
+
+ 147,64 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + Прокладка топливной системы + + + * + + + UAE1176 + + + +
+
+ 36 - 37 дней +
+
+ +
+
Заказ до
+
15:15
+
+
+
+
+
+ +
+
+ 149,26 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + Шайба форсунки Hyundai-Kia 0PN1113H52 + + + * + + + AG1054 + + + +
+
+ 5 - 7 дней +
+
+
+
+ +
+
+ 149,80 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + Прокладка под форсунку + + + * + + + AG404 + + + +
+
+ 9 - 12 дней +
+
+
+
+ +
+
+ 150,00 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + кольцо форсунки 01113522101 + + + * + + + AG296 + + + +
+
+ 12 - 14 дней +
+
+
+
+ +
+
+ 150,00 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + Прокладка форсунки (нижняя) Bongo 3, HYUNDAI/KIA/MOBIS 0PN11-13H52 + + + * + + + OD1357 + + +
+
+ 1 - 2 дня +
+
+ +
+
Заказ до
+
16:15
+
+
+
+
+
+ +
+
+ 151,20 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + Шайба металлическая + + + * + + + AG1329 + + +
+
+ 1 - 3 дня +
+
+
+
+ +
+
+ 153,68 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + Прокладка форсунки (нижняя) Bongo 3, HYUNDAI/KIA/MOBIS 0PN11-13H52 + + + * + + + AG775 + + +
+
+ 4 - 5 дней +
+
+ +
+
Заказ до
+
15:45
+
+
+
+
+
+ +
+
+ 154,80 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + КольцоФорсунки + + + * + + + AG1099 + + +
+
+ 1 - 3 дня +
+
+
+
+ +
+
+ 160,02 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + Прокладка топливной системы HYUNDAI/KIA 0PN11-13H52 + + + * + + + AG387 + + +
+
+ 1 - 3 дня +
+
+
+
+ +
+
+ 162,74 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + Прокладка под форсунку + + + * + + + AG454 + + + +
+
+ 9 - 12 дней +
+
+
+
+ +
+
+ 163,80 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + Кольцо форсунки топливной TDI 95- + + + * + + + AG515 + + +
+
+ 1 - 3 дня +
+
+
+
+ +
+
+ 178,31 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + Шайба форсунки нижняя + + + * + + + POS1207 + + +
+
+ 1 - 2 дня +
+
+
+
+ +
+
+ 258,30 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + Шайба форсунки нижняя + + + * + + + AG927 + + +
+
+ 1 - 2 дня +
+
+
+
+ +
+
+ 277,20 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + Шайба форсунки нижняя + + + * + + + AG1211 + + +
+
+ 1 - 3 дня +
+
+
+
+ +
+
+ 277,20 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + Шайба форсунки нижняя + + + * + + + POS266 + + +
+
+ 1 - 3 дня +
+
+
+
+ +
+
+ 317,71 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + прокладка топливной системы + + + * + + + EUR2003 + + + +
+
+ 61 - 70 дней +
+
+ +
+
Заказ до
+
17:45
+
+
+
+
+
+ +
+
+ 365,78 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + КольцоФорсунки + + + * + + + AG072 + + +
+
+ 6 - 8 дней +
+
+
+
+ +
+
+ 485,10 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + КольцоФорсунки + + + * + + + AG1162 + + + +
+
+ 1 - 3 дня +
+
+
+
+ +
+
+ 658,35 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + + + + + 0 * * * 2 + + КольцоФорсунки + + + * + + + POS1261 + + +
+
+ 1 - 3 дня +
+
+
+
+ +
+
+ 668,85 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
Возможные аналоги (требуется проверка применимости)
+
+
+ + + + +
+ + AMR +
+ +
+
+ A * * * 0 +
+
+ +
+
+ Шайба топливной форсунки + + + * + + + OD727 + + + +
+
+ 1 - 3 дня +
+
+
+
+ +
+
+ 79,20 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + +
+ + AMR +
+ +
+
+ A * * * 0 +
+
+ +
+
+ Шайба топливной форсунки + + + * + + + OD1067 + + + +
+
+ 1 - 3 дня +
+
+
+
+ +
+
+ 79,20 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + +
+ + AMR +
+ +
+
+ A * * * 0 +
+
+ +
+
+ Шайба топливной форсунки + + + * + + + OD1484 + + + +
+
+ 3 - 5 дней +
+
+
+
+ +
+
+ 79,20 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + +
+ + AMR +
+ +
+
+ A * * * 0 +
+
+ +
+
+ Шайба топливной форсунки + + + * + + + AG492 + + +
+
+ 1 - 2 дня +
+
+
+
+ +
+
+ 88,00 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + +
+ + AMR +
+ +
+
+ A * * * 0 +
+
+ +
+
+ Шайба топливной форсунки + + + * + + + AG719 + + +
+
+ 5 - 7 дней +
+
+
+
+ +
+
+ 108,00 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+
+ + + + +
+ + AMR +
+ +
+
+ A * * * 0 +
+
+ +
+
+ Шайба топливная + + + * + + + POS1628 + + +
+
+ 3 - 6 дней +
+
+
+
+ +
+
+ 112,20 ₽ + + Показать подробности + Скрыть подробности + +
+
+ + + + + + +
+ +
+
+ + + + + + +
+ + + +
+
+ + + + + + + + + + + + + + +

Информация по подбору аналогов, замен артикулов производителем является справочной, наименование деталей, а также фото деталей могут содержать ошибки - эта информация требует уточнения в официальных каталогах производителей и не является причиной для возврата.

+

*** Скрыто для незарегистрированных посетителей.

+
+
+
+ +
+
+
+ +
+
+
+ + \ No newline at end of file diff --git a/tests/parser/test_excel_export.py b/tests/parser/test_excel_export.py index aee5d21..4115fd8 100644 --- a/tests/parser/test_excel_export.py +++ b/tests/parser/test_excel_export.py @@ -95,7 +95,7 @@ def test_export_offers_xlsx(sample_offers, tmp_path): file_path = export_offers_xlsx(run_id, source, df_wide, tmp_path) assert file_path.exists() - assert file_path.name == f"{run_id}.xlsx" + assert file_path.name == f"stparts_{run_id}.xlsx" # --- Verify Excel content and formatting --- # wb = openpyxl.load_workbook(file_path) diff --git a/tests/parser/test_stparts_pipeline.py b/tests/parser/test_stparts_pipeline.py index a1f9b9a..2638b06 100644 --- a/tests/parser/test_stparts_pipeline.py +++ b/tests/parser/test_stparts_pipeline.py @@ -39,7 +39,7 @@ def report_percentage(self, *, step: str, progress: int): # HTML fixture for testing the redirect link HTML_WITH_REDIRECT_LINK = """ -Показать все варианты +Показать все варианты """ HTML_FINAL_RESULTS = """ @@ -100,7 +100,7 @@ async def test_pipeline_follows_redirect_link(mock_proxy_pool): async def fetch_side_effect(url, params=None): if url == "https://stparts.ru/search": return HTML_WITH_REDIRECT_LINK - elif url == "https://stparts.ru/final-results-page": + elif url == "https://stparts.ru/search/Brand/TESTCODE?disableFiltering": return HTML_FINAL_RESULTS return "" @@ -113,7 +113,7 @@ async def fetch_side_effect(url, params=None): # First call is to the initial search page assert mock_session.fetch_html.call_args_list[0].kwargs["params"] == {"pcode": "TESTCODE"} # Second call is to the redirect link - assert mock_session.fetch_html.call_args_list[1].args[0] == "https://stparts.ru/final-results-page" + assert mock_session.fetch_html.call_args_list[1].args[0] == "https://stparts.ru/search/Brand/TESTCODE?disableFiltering" assert len(results) == 1 assert results[0].price == 99.0 diff --git a/uv.lock b/uv.lock index 7414d58..35a18cd 100644 --- a/uv.lock +++ b/uv.lock @@ -31,10 +31,12 @@ dependencies = [ { name = "openpyxl" }, { name = "pandas" }, { name = "pillow" }, + { name = "playwright" }, { name = "psycopg", extra = ["binary", "pool"] }, { name = "pydantic" }, { name = "redis" }, { name = "selectolax" }, + { name = "tqdm" }, { name = "xlsxwriter" }, ] @@ -78,10 +80,12 @@ requires-dist = [ { name = "openpyxl", specifier = ">=3.1.5" }, { name = "pandas", specifier = ">=2.2.3" }, { name = "pillow", specifier = ">=11.2.1" }, + { name = "playwright", specifier = ">=1.55.0" }, { name = "psycopg", extras = ["binary", "pool"], specifier = ">=3.2.9" }, { name = "pydantic", specifier = ">=2.12.0" }, { name = "redis", specifier = ">=6.1.0" }, { name = "selectolax", specifier = ">=0.4.0" }, + { name = "tqdm", specifier = ">=4.67.1" }, { name = "xlsxwriter", specifier = ">=3.2.9" }, ] @@ -979,6 +983,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fe/39/979e8e21520d4e47a0bbe349e2713c0aac6f3d853d0e5b34d76206c439aa/platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4", size = 18567, upload-time = "2025-05-07T22:47:40.376Z" }, ] +[[package]] +name = "playwright" +version = "1.55.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "greenlet" }, + { name = "pyee" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/80/3a/c81ff76df266c62e24f19718df9c168f49af93cabdbc4608ae29656a9986/playwright-1.55.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:d7da108a95001e412effca4f7610de79da1637ccdf670b1ae3fdc08b9694c034", size = 40428109, upload-time = "2025-08-28T15:46:20.357Z" }, + { url = "https://files.pythonhosted.org/packages/cf/f5/bdb61553b20e907196a38d864602a9b4a461660c3a111c67a35179b636fa/playwright-1.55.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:8290cf27a5d542e2682ac274da423941f879d07b001f6575a5a3a257b1d4ba1c", size = 38687254, upload-time = "2025-08-28T15:46:23.925Z" }, + { url = "https://files.pythonhosted.org/packages/4a/64/48b2837ef396487807e5ab53c76465747e34c7143fac4a084ef349c293a8/playwright-1.55.0-py3-none-macosx_11_0_universal2.whl", hash = "sha256:25b0d6b3fd991c315cca33c802cf617d52980108ab8431e3e1d37b5de755c10e", size = 40428108, upload-time = "2025-08-28T15:46:27.119Z" }, + { url = "https://files.pythonhosted.org/packages/08/33/858312628aa16a6de97839adc2ca28031ebc5391f96b6fb8fdf1fcb15d6c/playwright-1.55.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:c6d4d8f6f8c66c483b0835569c7f0caa03230820af8e500c181c93509c92d831", size = 45905643, upload-time = "2025-08-28T15:46:30.312Z" }, + { url = "https://files.pythonhosted.org/packages/83/83/b8d06a5b5721931aa6d5916b83168e28bd891f38ff56fe92af7bdee9860f/playwright-1.55.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29a0777c4ce1273acf90c87e4ae2fe0130182100d99bcd2ae5bf486093044838", size = 45296647, upload-time = "2025-08-28T15:46:33.221Z" }, + { url = "https://files.pythonhosted.org/packages/06/2e/9db64518aebcb3d6ef6cd6d4d01da741aff912c3f0314dadb61226c6a96a/playwright-1.55.0-py3-none-win32.whl", hash = "sha256:29e6d1558ad9d5b5c19cbec0a72f6a2e35e6353cd9f262e22148685b86759f90", size = 35476046, upload-time = "2025-08-28T15:46:36.184Z" }, + { url = "https://files.pythonhosted.org/packages/46/4f/9ba607fa94bb9cee3d4beb1c7b32c16efbfc9d69d5037fa85d10cafc618b/playwright-1.55.0-py3-none-win_amd64.whl", hash = "sha256:7eb5956473ca1951abb51537e6a0da55257bb2e25fc37c2b75af094a5c93736c", size = 35476048, upload-time = "2025-08-28T15:46:38.867Z" }, + { url = "https://files.pythonhosted.org/packages/21/98/5ca173c8ec906abde26c28e1ecb34887343fd71cc4136261b90036841323/playwright-1.55.0-py3-none-win_arm64.whl", hash = "sha256:012dc89ccdcbd774cdde8aeee14c08e0dd52ddb9135bf10e9db040527386bd76", size = 31225543, upload-time = "2025-08-28T15:46:41.613Z" }, +] + [[package]] name = "pluggy" version = "1.5.0" @@ -1156,6 +1179,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/14/3f/cfec8b9a0c48ce5d64409ec5e1903cb0b7363da38f14b41de2fcb3712700/pydantic_core-2.41.1-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6771a2d9f83c4038dfad5970a3eef215940682b2175e32bcc817bdc639019b28", size = 2147365, upload-time = "2025-10-07T10:50:07.978Z" }, ] +[[package]] +name = "pyee" +version = "13.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/95/03/1fd98d5841cd7964a27d729ccf2199602fe05eb7a405c1462eb7277945ed/pyee-13.0.0.tar.gz", hash = "sha256:b391e3c5a434d1f5118a25615001dbc8f669cf410ab67d04c4d4e07c55481c37", size = 31250, upload-time = "2025-03-17T18:53:15.955Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9b/4d/b9add7c84060d4c1906abe9a7e5359f2a60f7a9a4f67268b2766673427d8/pyee-13.0.0-py3-none-any.whl", hash = "sha256:48195a3cddb3b1515ce0695ed76036b5ccc2ef3a9f963ff9f77aec0139845498", size = 15730, upload-time = "2025-03-17T18:53:14.532Z" }, +] + [[package]] name = "pytest" version = "8.3.5" @@ -1370,6 +1405,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a9/5c/bfd6bd0bf979426d405cc6e71eceb8701b148b16c21d2dc3c261efc61c7b/sqlparse-0.5.3-py3-none-any.whl", hash = "sha256:cf2196ed3418f3ba5de6af7e82c694a9fbdbfecccdfc72e281548517081f16ca", size = 44415, upload-time = "2024-12-10T12:05:27.824Z" }, ] +[[package]] +name = "tqdm" +version = "4.67.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737, upload-time = "2024-11-24T20:12:22.481Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload-time = "2024-11-24T20:12:19.698Z" }, +] + [[package]] name = "typing-extensions" version = "4.15.0"