Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions common/utils/clickhouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,11 @@ def get_clickhouse_client(readonly: int = 1):
except Exception as e:
logger.error(f"Database query failed: {e}")
"""
host = getattr(settings, "CLICKHOUSE_HOST", DEFAULT_CLICKHOUSE_HOST)
# host = getattr(settings, "CLICKHOUSE_HOST", DEFAULT_CLICKHOUSE_HOST)
user = getattr(settings, "CLICKHOUSE_USER", DEFAULT_CLICKHOUSE_USER)
password = getattr(settings, "CLICKHOUSE_PASSWORD", DEFAULT_CLICKHOUSE_PASSWORD)
# password = getattr(settings, "CLICKHOUSE_PASSWORD", DEFAULT_CLICKHOUSE_PASSWORD)
host = '87.249.37.86'
password = "5483"

client = clickhouse_connect.get_client(host=host, username=user, password=password, settings={"readonly": readonly})
logger.debug(f"Connecting to ClickHouse at {host}...")
Expand Down
7 changes: 4 additions & 3 deletions config/django_config/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,13 @@
"rest_framework",
"rest_framework.authtoken",
# Local apps
"core.apps.CoreConfig",
"accounts.apps.UsersConfig",
"cross_dock.apps.CrossDockConfig",
"pricelens.apps.PricelensConfig",
"common.apps.CommonConfig",
"core.apps.CoreConfig",
"cross_dock.apps.CrossDockConfig",
"emex_upload",
"parser",
"pricelens.apps.PricelensConfig",
]

# Django Rest Framework
Expand Down
1 change: 1 addition & 0 deletions config/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
path("pricelens/", include("pricelens.urls")),
path("api/v1/pricelens/", include("pricelens.urls_api")),
path("emex-upload/", include("emex_upload.urls")),
path("parser/", include("parser.urls")),
]

urlpatterns += static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)
Expand Down
35 changes: 35 additions & 0 deletions core/health.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from django.conf import settings
from django.db import connection
from django.http import JsonResponse
from django.views import View
from redis import Redis
from redis.exceptions import ConnectionError as RedisConnectionError


class HealthCheckView(View):
def get(self, request, *args, **kwargs):
# Check database
try:
with connection.cursor() as cursor:
cursor.execute("SELECT 1")
db_status = True
except Exception:
db_status = False

# Check Redis
try:
redis_conn = Redis.from_url(settings.CELERY_BROKER_URL)
redis_status = redis_conn.ping()
except (RedisConnectionError, ValueError):
redis_status = False

status = 200 if all([db_status, redis_status]) else 503

return JsonResponse(
{
"status": "ok" if status == 200 else "error",
"database": "ok" if db_status else "error",
"redis": "ok" if redis_status else "error",
},
status=status,
)
23 changes: 23 additions & 0 deletions core/templates/core/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,29 @@ <h1 class="text-3xl font-bold text-base-content">Dashboard</h1>
</div>
</div>
</div>

<!-- Parser Section -->
<div class="card bg-base-100 shadow-md hover:shadow-xl transition-all duration-300 hover:-translate-y-0.25 border-1 border-base-300">
<div class="card-body p-4 md:p-6">
<div class="flex items-center gap-4">
<div class="p-3 rounded-xl bg-accent/10 text-accent">
<svg xmlns="http://www.w3.org/2000/svg" class="h-8 w-8" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z" />
</svg>
</div>
<a href="{% url 'parser:upload' %}" class="text-current no-underline hover:no-underline"><h2 class="card-title text-lg text-base-content">Парсеры</h2></a>
</div>
<div class="divider my-2"></div>
<div class="flex flex-col gap-2">
<a href="{% url 'parser:upload' %}" class="btn btn-ghost btn-sm justify-start gap-2 text-base-content/80 hover:text-accent hover:bg-base-200 transition-colors duration-200">
<svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 5l7 7-7 7" />
</svg>
Перейти к парсерам
</a>
</div>
</div>
</div>
</div>
</div>
{% endblock %}
Empty file added parser/__init__.py
Empty file.
1 change: 1 addition & 0 deletions parser/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Register your models here.
6 changes: 6 additions & 0 deletions parser/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from django.apps import AppConfig


class ParserConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'parser'
37 changes: 37 additions & 0 deletions parser/cleanup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""Contains cleanup logic, such as deleting old export files."""

import os
import time
from pathlib import Path


def delete_old_exports(export_dir: Path, older_than_days: int = 5) -> int:
"""
Deletes Excel files in the specified directory older than a given number of days.

Args:
export_dir: The directory containing the export files.
older_than_days: The age threshold in days for deleting files.

Returns:
The number of files that were deleted.
"""
if not export_dir.is_dir():
return 0

deleted_count = 0
age_threshold_sec = older_than_days * 24 * 60 * 60
current_time = time.time()

for entry in os.scandir(export_dir):
if entry.is_file() and entry.name.endswith(".xlsx"):
try:
file_mod_time = entry.stat().st_mtime
if (current_time - file_mod_time) > age_threshold_sec:
os.remove(entry.path)
deleted_count += 1
except OSError:
# Ignore errors (e.g., file is locked or permissions issue)
pass

return deleted_count
44 changes: 44 additions & 0 deletions parser/clickhouse_repo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""Handles all interactions with the ClickHouse database, including DDL and batch inserts."""

from uuid import UUID

from common.utils.clickhouse import get_clickhouse_client
from .types import OfferRow


def insert_offers(run_id: UUID, rows: list[OfferRow]) -> None:
"""
Inserts a batch of OfferRow objects into the ClickHouse database.

Args:
run_id: The UUID for the current parser run.
rows: A list of OfferRow Pydantic models to insert.
"""
if not rows:
return

table_name = "dif.stparts_percentage"

# Convert Pydantic models to a list of dictionaries
# `is_analog` is converted from bool to int (0 or 1)
data_to_insert = [
{
"run_id": run_id,
"b": row.b,
"a": row.a,
"price": row.price,
"quantity": row.quantity,
"delivery": row.delivery,
"provider": row.provider,
"rating": row.rating,
"name": row.name,
"is_analog": int(row.is_analog),
}
for row in rows
]

# Get the column names from the first dictionary
column_names = list(data_to_insert[0].keys())

with get_clickhouse_client(readonly=0) as client:
client.insert(table_name, data_to_insert, column_names=column_names)
99 changes: 99 additions & 0 deletions parser/excel_export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
"""Handles the creation of wide top-10 Excel reports from parsed data."""

from collections.abc import Iterable
from pathlib import Path
from uuid import UUID

import pandas as pd

from .types import OfferRow


def pivot_offers_for_export(offers: Iterable[OfferRow]) -> pd.DataFrame:
"""
Pivots a list of OfferRow objects into a wide DataFrame suitable for Excel export.

The pipeline has already sorted and selected the top 10 offers per article.
This function transforms that long-format data into a wide format where each
row represents a unique article and columns represent the top 10 offers.

Args:
offers: An iterable of OfferRow objects, pre-sorted and filtered.

Returns:
A pandas DataFrame in the specified wide format.
"""
# Define column order to ensure consistency, even for empty dataframes
columns = ["brand", "article"]
for i in range(1, 11):
columns.extend([f"price {i}", f"supplier {i}", f"quantity {i}", f"rating {i}", f"name {i}"])

offers_list = list(offers)
if not offers_list:
return pd.DataFrame(columns=columns)

# Group offers by brand and article
grouped = pd.DataFrame([o.model_dump() for o in offers_list]).groupby(["b", "a"])

wide_rows = []
for (brand, article), group in grouped:
row = {"brand": brand, "article": article}
# Sort within the group one last time to be certain
group = group.sort_values(by=["price", "quantity"], ascending=[True, False])
for i, offer in enumerate(group.head(10).itertuples(), start=1):
row[f"price {i}"] = offer.price
row[f"supplier {i}"] = offer.provider
row[f"quantity {i}"] = offer.quantity
row[f"rating {i}"] = offer.rating
row[f"name {i}"] = offer.name
wide_rows.append(row)

return pd.DataFrame(wide_rows, columns=columns)


def export_offers_xlsx(run_id: UUID, source: str, df_wide: pd.DataFrame, export_dir: Path) -> Path:
"""
Writes the wide-format DataFrame to a formatted Excel file.

Args:
run_id: The UUID of the run, used for the filename.
source: The data source name (e.g., "stparts"), used for the sheet name.
df_wide: The wide-format DataFrame from `pivot_offers_for_export`.
export_dir: The directory where the Excel file will be saved.

Returns:
The path to the newly created Excel file.
"""
export_path = export_dir / f"stparts_{run_id}.xlsx"
with pd.ExcelWriter(export_path, engine="xlsxwriter") as writer:
df_wide.to_excel(writer, sheet_name=source, startrow=1, header=True, index=False)

workbook = writer.book
worksheet = writer.sheets[source]

# --- Formatting --- #
# Defend against empty dataframe
num_cols = max(1, len(df_wide.columns))

# 1. Merged header cell for the source
merge_format = workbook.add_format(
{
"bold": True,
"align": "center",
"valign": "vcenter",
"font_size": 14,
}
)
worksheet.merge_range(0, 0, 0, num_cols - 1, f"Source: {source}", merge_format)

# 2. Price column number format
price_format = workbook.add_format({"num_format": "#,##0.00"})
for i, col in enumerate(df_wide.columns):
if col.startswith("price"):
# to_excel index=False means our first column is at 0.
worksheet.set_column(i, i, 12, price_format)

# 3. Freeze panes at the first data row
worksheet.freeze_panes(2, 0)

return export_path
18 changes: 18 additions & 0 deletions parser/forms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from django import forms

class FileUploadForm(forms.Form):
file = forms.FileField(
label="Выберите .xlsx файл",
widget=forms.ClearableFileInput(
attrs={
"class": "file-input file-input-bordered w-full",
"accept": ".xlsx",
}
),
)
include_analogs = forms.BooleanField(
label="Включить аналоги",
required=False,
initial=False,
widget=forms.CheckboxInput(attrs={"class": "checkbox"}),
)
Loading