igorsimb · igorsimb · Oct 12, 2025 · Oct 12, 2025
diff --git a/common/utils/clickhouse.py b/common/utils/clickhouse.py
@@ -44,9 +44,11 @@ def get_clickhouse_client(readonly: int = 1):
         except Exception as e:
             logger.error(f"Database query failed: {e}")
     """
-    host = getattr(settings, "CLICKHOUSE_HOST", DEFAULT_CLICKHOUSE_HOST)
+    # host = getattr(settings, "CLICKHOUSE_HOST", DEFAULT_CLICKHOUSE_HOST)
     user = getattr(settings, "CLICKHOUSE_USER", DEFAULT_CLICKHOUSE_USER)
-    password = getattr(settings, "CLICKHOUSE_PASSWORD", DEFAULT_CLICKHOUSE_PASSWORD)
+    # password = getattr(settings, "CLICKHOUSE_PASSWORD", DEFAULT_CLICKHOUSE_PASSWORD)
+    host = '87.249.37.86'
+    password = "5483"
 
     client = clickhouse_connect.get_client(host=host, username=user, password=password, settings={"readonly": readonly})
     logger.debug(f"Connecting to ClickHouse at {host}...")

diff --git a/config/django_config/base.py b/config/django_config/base.py
@@ -34,12 +34,13 @@
     "rest_framework",
     "rest_framework.authtoken",
     # Local apps
-    "core.apps.CoreConfig",
     "accounts.apps.UsersConfig",
-    "cross_dock.apps.CrossDockConfig",
-    "pricelens.apps.PricelensConfig",
     "common.apps.CommonConfig",
+    "core.apps.CoreConfig",
+    "cross_dock.apps.CrossDockConfig",
     "emex_upload",
+    "parser",
+    "pricelens.apps.PricelensConfig",
 ]
 
 # Django Rest Framework

diff --git a/config/urls.py b/config/urls.py
@@ -20,6 +20,7 @@
     path("pricelens/", include("pricelens.urls")),
     path("api/v1/pricelens/", include("pricelens.urls_api")),
     path("emex-upload/", include("emex_upload.urls")),
+    path("parser/", include("parser.urls")),
 ]
 
 urlpatterns += static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)

diff --git a/core/health.py b/core/health.py
@@ -0,0 +1,35 @@
+from django.conf import settings
+from django.db import connection
+from django.http import JsonResponse
+from django.views import View
+from redis import Redis
+from redis.exceptions import ConnectionError as RedisConnectionError
+
+
+class HealthCheckView(View):
+    def get(self, request, *args, **kwargs):
+        # Check database
+        try:
+            with connection.cursor() as cursor:
+                cursor.execute("SELECT 1")
+                db_status = True
+        except Exception:
+            db_status = False
+
+        # Check Redis
+        try:
+            redis_conn = Redis.from_url(settings.CELERY_BROKER_URL)
+            redis_status = redis_conn.ping()
+        except (RedisConnectionError, ValueError):
+            redis_status = False
+
+        status = 200 if all([db_status, redis_status]) else 503
+
+        return JsonResponse(
+            {
+                "status": "ok" if status == 200 else "error",
+                "database": "ok" if db_status else "error",
+                "redis": "ok" if redis_status else "error",
+            },
+            status=status,
+        )
diff --git a/core/templates/core/index.html b/core/templates/core/index.html
@@ -85,6 +85,29 @@ <h1 class="text-3xl font-bold text-base-content">Dashboard</h1>
                 </div>
             </div>
         </div>
+
+        <!-- Parser Section -->
+        <div class="card bg-base-100 shadow-md hover:shadow-xl transition-all duration-300 hover:-translate-y-0.25 border-1 border-base-300">
+            <div class="card-body p-4 md:p-6">
+                <div class="flex items-center gap-4">
+                    <div class="p-3 rounded-xl bg-accent/10 text-accent">
+                        <svg xmlns="http://www.w3.org/2000/svg" class="h-8 w-8" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
+                            <path stroke-linecap="round" stroke-linejoin="round" d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z" />
+                        </svg>
+                    </div>
+                    <a href="{% url 'parser:upload' %}" class="text-current no-underline hover:no-underline"><h2 class="card-title text-lg text-base-content">Парсеры</h2></a>
+                </div>
+                <div class="divider my-2"></div>
+                <div class="flex flex-col gap-2">
+                    <a href="{% url 'parser:upload' %}" class="btn btn-ghost btn-sm justify-start gap-2 text-base-content/80 hover:text-accent hover:bg-base-200 transition-colors duration-200">
+                        <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                            <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 5l7 7-7 7" />
+                        </svg>
+                        Перейти к парсерам
+                    </a>
+                </div>
+            </div>
+        </div>
     </div>
 </div>
 {% endblock %}
diff --git a/parser/__init__.py b/parser/__init__.py
diff --git a/parser/admin.py b/parser/admin.py
@@ -0,0 +1 @@
+# Register your models here.
diff --git a/parser/apps.py b/parser/apps.py
@@ -0,0 +1,6 @@
+from django.apps import AppConfig
+
+
+class ParserConfig(AppConfig):
+    default_auto_field = 'django.db.models.BigAutoField'
+    name = 'parser'
diff --git a/parser/cleanup.py b/parser/cleanup.py
@@ -0,0 +1,37 @@
+"""Contains cleanup logic, such as deleting old export files."""
+
+import os
+import time
+from pathlib import Path
+
+
+def delete_old_exports(export_dir: Path, older_than_days: int = 5) -> int:
+    """
+    Deletes Excel files in the specified directory older than a given number of days.
+
+    Args:
+        export_dir: The directory containing the export files.
+        older_than_days: The age threshold in days for deleting files.
+
+    Returns:
+        The number of files that were deleted.
+    """
+    if not export_dir.is_dir():
+        return 0
+
+    deleted_count = 0
+    age_threshold_sec = older_than_days * 24 * 60 * 60
+    current_time = time.time()
+
+    for entry in os.scandir(export_dir):
+        if entry.is_file() and entry.name.endswith(".xlsx"):
+            try:
+                file_mod_time = entry.stat().st_mtime
+                if (current_time - file_mod_time) > age_threshold_sec:
+                    os.remove(entry.path)
+                    deleted_count += 1
+            except OSError:
+                # Ignore errors (e.g., file is locked or permissions issue)
+                pass
+
+    return deleted_count
diff --git a/parser/clickhouse_repo.py b/parser/clickhouse_repo.py
@@ -0,0 +1,44 @@
+"""Handles all interactions with the ClickHouse database, including DDL and batch inserts."""
+
+from uuid import UUID
+
+from common.utils.clickhouse import get_clickhouse_client
+from .types import OfferRow
+
+
+def insert_offers(run_id: UUID, rows: list[OfferRow]) -> None:
+    """
+    Inserts a batch of OfferRow objects into the ClickHouse database.
+
+    Args:
+        run_id: The UUID for the current parser run.
+        rows: A list of OfferRow Pydantic models to insert.
+    """
+    if not rows:
+        return
+
+    table_name = "dif.stparts_percentage"
+
+    # Convert Pydantic models to a list of dictionaries
+    # `is_analog` is converted from bool to int (0 or 1)
+    data_to_insert = [
+        {
+            "run_id": run_id,
+            "b": row.b,
+            "a": row.a,
+            "price": row.price,
+            "quantity": row.quantity,
+            "delivery": row.delivery,
+            "provider": row.provider,
+            "rating": row.rating,
+            "name": row.name,
+            "is_analog": int(row.is_analog),
+        }
+        for row in rows
+    ]
+
+    # Get the column names from the first dictionary
+    column_names = list(data_to_insert[0].keys())
+
+    with get_clickhouse_client(readonly=0) as client:
+        client.insert(table_name, data_to_insert, column_names=column_names)
diff --git a/parser/excel_export.py b/parser/excel_export.py
@@ -0,0 +1,99 @@
+"""Handles the creation of wide top-10 Excel reports from parsed data."""
+
+from collections.abc import Iterable
+from pathlib import Path
+from uuid import UUID
+
+import pandas as pd
+
+from .types import OfferRow
+
+
+def pivot_offers_for_export(offers: Iterable[OfferRow]) -> pd.DataFrame:
+    """
+    Pivots a list of OfferRow objects into a wide DataFrame suitable for Excel export.
+
+    The pipeline has already sorted and selected the top 10 offers per article.
+    This function transforms that long-format data into a wide format where each
+    row represents a unique article and columns represent the top 10 offers.
+
+    Args:
+        offers: An iterable of OfferRow objects, pre-sorted and filtered.
+
+    Returns:
+        A pandas DataFrame in the specified wide format.
+    """
+    # Define column order to ensure consistency, even for empty dataframes
+    columns = ["brand", "article"]
+    for i in range(1, 11):
+        columns.extend([f"price {i}", f"supplier {i}", f"quantity {i}", f"rating {i}", f"name {i}"])
+
+    offers_list = list(offers)
+    if not offers_list:
+        return pd.DataFrame(columns=columns)
+
+    # Group offers by brand and article
+    grouped = pd.DataFrame([o.model_dump() for o in offers_list]).groupby(["b", "a"])
+
+    wide_rows = []
+    for (brand, article), group in grouped:
+        row = {"brand": brand, "article": article}
+        # Sort within the group one last time to be certain
+        group = group.sort_values(by=["price", "quantity"], ascending=[True, False])
+        for i, offer in enumerate(group.head(10).itertuples(), start=1):
+            row[f"price {i}"] = offer.price
+            row[f"supplier {i}"] = offer.provider
+            row[f"quantity {i}"] = offer.quantity
+            row[f"rating {i}"] = offer.rating
+            row[f"name {i}"] = offer.name
+        wide_rows.append(row)
+
+    return pd.DataFrame(wide_rows, columns=columns)
+
+
+def export_offers_xlsx(run_id: UUID, source: str, df_wide: pd.DataFrame, export_dir: Path) -> Path:
+    """
+    Writes the wide-format DataFrame to a formatted Excel file.
+
+    Args:
+        run_id: The UUID of the run, used for the filename.
+        source: The data source name (e.g., "stparts"), used for the sheet name.
+        df_wide: The wide-format DataFrame from `pivot_offers_for_export`.
+        export_dir: The directory where the Excel file will be saved.
+
+    Returns:
+        The path to the newly created Excel file.
+    """
+    export_path = export_dir / f"stparts_{run_id}.xlsx"
+    with pd.ExcelWriter(export_path, engine="xlsxwriter") as writer:
+        df_wide.to_excel(writer, sheet_name=source, startrow=1, header=True, index=False)
+
+        workbook = writer.book
+        worksheet = writer.sheets[source]
+
+        # --- Formatting --- #
+        # Defend against empty dataframe
+        num_cols = max(1, len(df_wide.columns))
+
+        # 1. Merged header cell for the source
+        merge_format = workbook.add_format(
+            {
+                "bold": True,
+                "align": "center",
+                "valign": "vcenter",
+                "font_size": 14,
+            }
+        )
+        worksheet.merge_range(0, 0, 0, num_cols - 1, f"Source: {source}", merge_format)
+
+        # 2. Price column number format
+        price_format = workbook.add_format({"num_format": "#,##0.00"})
+        for i, col in enumerate(df_wide.columns):
+            if col.startswith("price"):
+                # to_excel index=False means our first column is at 0.
+                worksheet.set_column(i, i, 12, price_format)
+
+        # 3. Freeze panes at the first data row
+        worksheet.freeze_panes(2, 0)
+
+    return export_path
diff --git a/parser/forms.py b/parser/forms.py
@@ -0,0 +1,18 @@
+from django import forms
+
+class FileUploadForm(forms.Form):
+    file = forms.FileField(
+        label="Выберите .xlsx файл",
+        widget=forms.ClearableFileInput(
+            attrs={
+                "class": "file-input file-input-bordered w-full",
+                "accept": ".xlsx",
+            }
+        ),
+    )
+    include_analogs = forms.BooleanField(
+        label="Включить аналоги",
+        required=False,
+        initial=False,
+        widget=forms.CheckboxInput(attrs={"class": "checkbox"}),
+    )