diff --git a/README.md b/README.md index d4ff182..36d7270 100644 --- a/README.md +++ b/README.md @@ -463,6 +463,7 @@ location / { | Topic | Description | |-------|-------------| | [AI Generation](docs/ai_generation.md) | Configure AI-generated deception pages using OpenRouter or OpenAI | +| [Deception Pages](docs/deception_pages.md) | Manage, import, and export deception pages; bulk operations and date-based filtering | | [Deployment Modes](docs/deployment-modes.md) | Standalone (SQLite) vs Scalable (PostgreSQL + Redis) mode, configuration, and data migration | | [Honeypot](docs/honeypot.md) | Full overview of honeypot pages: fake logins, directory listings, credential files, SQLi/XSS/XXE/command injection traps, and more | | [Dashboard](docs/dashboard.md) | Access and explore the real-time monitoring dashboard | diff --git a/config.yaml b/config.yaml index fd67023..adce0af 100644 --- a/config.yaml +++ b/config.yaml @@ -133,8 +133,14 @@ ai: 3. Do not add markdown, code blocks, or explanations 4. Do not include any file in the html, generate everything needed in one single file 5. Include proper HTML structure with head and body tags - 6. If the request is a common attack vector (e.g., SQLi, XSS), include fake data in response + 6. If the request is a common attack vector (e.g., SQLi, XSS), include fake data in response, DO NOT tell the data is fake for any reasone 7. If the request has a file extension, generate a RAW content relevant to that type (e.g. a fake json for .json requests) Path: {path}{query_part} Generate the complete HTML page. + +deception: + # Enable automatic import of deception pages from src/templates/deception directory on startup + # Files are mapped to paths by replacing "/" with "_" (e.g., test_blabla.html -> /test/blabla) + # A zip file containing deception pages can also be uploaded via the dashboard + import_pages: true diff --git a/docs/deception_pages.md b/docs/deception_pages.md new file mode 100644 index 0000000..7e9a2b2 --- /dev/null +++ b/docs/deception_pages.md @@ -0,0 +1,39 @@ +# Deception Pages Management + +Manage and bulk import/export deception pages from the dashboard's **Deception** tab. + +## Automatic Startup Import + +Place HTML files in `src/templates/deception/` to auto-import at startup. Double underscores map to path separators: `admin__login.html` → `/admin/login`. + +Enable via config: +```yaml +deception: + import_pages: true +``` + +Or environment variable: +```bash +export KRAWL_DECEPTION_IMPORT_PAGES=true +``` + +## Single File Operations + +**Upload**: Click **Upload**, enter path, select file +**Download**: Click download icon on any page in the table + +Supported types: HTML, HTM, XML, JSON, TXT, CSS, JS + +## Bulk Operations + +### Download (Bulk Export) +Export multiple pages as ZIP: +- **By selection**: Check boxes → click **Download** +- **By date**: Use date picker → click **Download** (exports pages before selected date) + +### Upload (Bulk Import) +Import from ZIP file: +1. Navigate to `http://krawl:port/dashboard#deception` and click **Upload** +2. Select ZIP file +3. System auto-extracts files +4. Click **Upload** diff --git a/helm/Chart.yaml b/helm/Chart.yaml index ac8b946..38dc63d 100644 --- a/helm/Chart.yaml +++ b/helm/Chart.yaml @@ -2,8 +2,8 @@ apiVersion: v2 name: krawl-chart description: A Helm chart for Krawl honeypot server type: application -version: 2.1.0 -appVersion: 2.1.0 +version: 2.1.1 +appVersion: 2.1.1 keywords: - honeypot - security diff --git a/helm/templates/configmap.yaml b/helm/templates/configmap.yaml index 091c237..378ec15 100644 --- a/helm/templates/configmap.yaml +++ b/helm/templates/configmap.yaml @@ -20,6 +20,8 @@ data: canary: token_url: {{ .Values.config.canary.token_url | toYaml }} token_tries: {{ .Values.config.canary.token_tries }} + deception: + import_pages: {{ .Values.config.deception.import_pages }} dashboard: secret_path: {{ .Values.config.dashboard.secret_path | toYaml }} cache_warmup: {{ .Values.config.dashboard.cache_warmup }} diff --git a/helm/values-minimal.yaml b/helm/values-minimal.yaml index ece280d..459e589 100644 --- a/helm/values-minimal.yaml +++ b/helm/values-minimal.yaml @@ -26,6 +26,8 @@ redis: enabled: true config: + deception: + import_pages: true dashboard: secret_path: null database: diff --git a/helm/values-standalone.yaml b/helm/values-standalone.yaml index 10dcbfb..ea3c6f5 100644 --- a/helm/values-standalone.yaml +++ b/helm/values-standalone.yaml @@ -32,6 +32,8 @@ database: accessMode: ReadWriteOnce config: + deception: + import_pages: true dashboard: secret_path: null database: diff --git a/helm/values.yaml b/helm/values.yaml index 74feb0b..2ae24ab 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -82,6 +82,8 @@ config: canary: token_url: null # Set your canary token URL here token_tries: 10 + deception: + import_pages: true # Auto-import HTML deception pages from src/templates/deception/ at startup dashboard: secret_path: null # Auto-generated if not set, or set to "/my-secret-dashboard" cache_warmup: true # Pre-compute dashboard data every 5 minutes. In scalable mode with Redis, consider setting to false — table caching handles it. diff --git a/src/app.py b/src/app.py index 604784a..2ef4307 100644 --- a/src/app.py +++ b/src/app.py @@ -112,7 +112,7 @@ async def lifespan(app: FastAPI): app_logger.warning("Server public IP could not be determined") # Log AI configuration status - from generative_ai import is_ai_enabled, get_provider, get_model + from generative_ai import is_ai_enabled, get_provider, get_model, import_deception_pages_from_directory if is_ai_enabled(): provider = get_provider() @@ -123,6 +123,14 @@ async def lifespan(app: FastAPI): "AI generation disabled - Cached AI pages will still be served if available" ) + # Import deception pages from templates directory + try: + imported = import_deception_pages_from_directory() + app_logger.info(f"Imported {imported} deception pages") + except Exception as e: + app_logger.warning(f"Failed to import deception pages: {e}") + + # Initialize tracker tracker = AccessTracker(config.max_pages_limit, config.ban_duration_seconds) set_tracker(tracker) @@ -296,7 +304,9 @@ def _setup_openapi(application: FastAPI, dashboard_prefix: str) -> None: "/api/track-ip", "/api/delete-generated-pages", "/api/download-generated-page", + "/api/download-generated-pages-zip", "/api/upload-generated-page", + "/api/upload-generated-pages-bulk", } def custom_openapi(): diff --git a/src/config.py b/src/config.py index d02197e..be00ef7 100644 --- a/src/config.py +++ b/src/config.py @@ -97,6 +97,10 @@ class Config: ai_reasoning_enabled: bool = True ai_reasoning_effort: str = "medium" + # Deception pages import settings + deception_import_pages: bool = True + + _server_ip: Optional[str] = None _server_ip_resolved: bool = False @@ -188,6 +192,7 @@ def from_yaml(cls) -> "Config": tarpit = data.get("tarpit", {}) logging_cfg = data.get("logging", {}) ai = data.get("ai", {}) + deception = data.get("deception", {}) # Handle dashboard_secret_path - auto-generate if null/not set dashboard_path = dashboard.get("secret_path") @@ -309,6 +314,7 @@ def from_yaml(cls) -> "Config": ), ai_timeout=ai.get("timeout", 60), ai_max_daily_requests=ai.get("max_daily_requests", 0), + deception_import_pages=deception.get("import_pages", True), ) diff --git a/src/database.py b/src/database.py index 95b2bc6..29168a4 100644 --- a/src/database.py +++ b/src/database.py @@ -3235,6 +3235,47 @@ def delete_generated_pages_by_ids(self, page_ids: list) -> int: finally: self.close_session() + def get_generated_pages_before(self, date_str: str) -> list: + """Get generated pages created before a specific date. + + Returns: + List of GeneratedPage objects (with eager-loaded content) created before the specified date + + Raises: + ValueError: If date format is invalid + """ + from models import GeneratedPage + from datetime import datetime + + session = self.session + try: + # Parse the date string + target_date = datetime.fromisoformat(date_str) + + # Query all pages created before this date + pages = ( + session.query(GeneratedPage) + .filter(GeneratedPage.created_at < target_date) + .all() + ) + + # Force load the html_content_b64 for all pages before closing session + # This prevents lazy-loading issues after session is closed + for page in pages: + _ = page.html_content_b64 + + applogger.debug( + f"Retrieved {len(pages)} generated pages created before {date_str}" + ) + return pages + except ValueError: + raise ValueError(f"Invalid date format. Use YYYY-MM-DD (got: {date_str})") + except Exception as e: + applogger.error(f"Error querying generated pages before {date_str}: {e}") + return [] + finally: + self.close_session() + # Module-level singleton instance _db_manager = DatabaseManager() diff --git a/src/generative_ai.py b/src/generative_ai.py index 2590972..540faac 100644 --- a/src/generative_ai.py +++ b/src/generative_ai.py @@ -11,6 +11,7 @@ import logging import asyncio import base64 +import re from typing import Optional, Tuple, List from pathlib import Path from datetime import datetime @@ -42,6 +43,160 @@ async def close_aiohttp_session() -> None: _aiohttp_session = None +def _is_valid_deception_filename(filename: str) -> bool: + """Validate filename to prevent path traversal and other attacks. + + Checks performed: + 1. Not empty/None and is string + 2. Length <= 255 characters + 3. No path traversal patterns (.., /, \\) + 4. No null bytes (raw or URL-encoded) + 5. No URL-encoded path traversal (%2e%2e, %2f) + 6. No dangerous shell/special characters + 7. Only alphanumeric, underscore, hyphen, dot + 8. Not a reserved system name + + Args: + filename: Filename to validate + + Returns: + True if filename is safe to import, False otherwise + """ + # 1. Reject empty or non-string + if not filename or not isinstance(filename, str): + logger.debug(f"Filename validation failed: empty or non-string") + return False + + # 2. Max length to prevent massive strings / ReDoS attacks + if len(filename) > 255: + logger.warning(f"Filename too long ({len(filename)} chars): {filename}") + return False + + # 3. Reject path traversal attempts (before decoding) + if ".." in filename or "/" in filename or "\\" in filename: + logger.warning(f"Filename contains path traversal: {filename}") + return False + + # 4. Reject null bytes (raw and URL-encoded) + if "\x00" in filename or "%00" in filename: + logger.warning(f"Filename contains null byte: {filename}") + return False + + # 5. Reject URL-encoded path traversal patterns + if "%2e%2e" in filename.lower() or "%2f" in filename.lower(): + logger.warning(f"Filename contains URL-encoded path traversal: {filename}") + return False + + # 6. Reject shell/special dangerous characters + # These could be interpreted as commands, redirects, or operators + dangerous_chars = set('`$&|;<>()[]{}!*?#@"\'\\%\x00') + if any(c in filename for c in dangerous_chars): + logger.warning(f"Filename contains dangerous characters: {filename}") + return False + + # 7. Strict whitelist: only alphanumeric, underscore, hyphen, dot + # This ensures safe filesystem behavior and URL compatibility + if not re.match(r"^[a-zA-Z0-9_.\-]+$", filename): + logger.warning(f"Filename contains non-whitelisted characters: {filename}") + return False + + # 8. Reject system/reserved names that could have special meaning + reserved_names = {".", "..", "~", "root", "admin", "etc", "sys", "tmp", "var"} + # Extract stem (filename without extension) for comparison + stem = filename.rsplit(".", 1)[0].lower() if "." in filename else filename.lower() + if stem in reserved_names: + logger.warning(f"Filename uses reserved name: {filename}") + return False + + return True + + + +def import_deception_pages_from_directory() -> int: + """Import HTML pages from src/templates/deception directory into the database. + + Files are mapped to paths by replacing double underscores with slashes: + - admin__panel__login.html → /admin/panel/login + - test__blabla.html → /test/blabla + - wordpress__wp__admin__users.html → /wordpress/wp/admin/users + + Only imports if deception.import_pages is enabled in config. + Skips files that already exist in the database (lightweight check). + + Returns: + Number of pages successfully imported + """ + from config import get_config + + config = get_config() + + # Check if import is enabled + if not hasattr(config, 'deception_import_pages') or not config.deception_import_pages: + return 0 + + deception_dir = Path(__file__).parent / "templates" / "deception" + + if not deception_dir.exists(): + return 0 + + imported_count = 0 + + try: + # Find all HTML files directly in the directory (not recursive - flat structure only) + html_files = list(deception_dir.glob("*.html")) + total_files = len(html_files) + + for html_file in html_files: + try: + # Get filename without extension + filename = html_file.stem # e.g., "admin__panel__login" + + # Validate filename for security (path traversal, injection, etc.) + if not _is_valid_deception_filename(html_file.name): + logger.debug(f"Filename validation failed, skipping: {html_file.name}") + continue + + # Convert double underscores to slashes for URL path + # admin__panel__login → admin/panel/login + url_path = "/" + filename.replace("__", "/") + + if not url_path or url_path == "/": + logger.debug(f"Could not generate valid URL path for {html_file.name}, skipping") + continue + + # Check if this path already exists in the database + if has_generated_page_in_db(url_path): + logger.debug(f"Page already exists in DB for path {url_path}, skipping") + continue + + # Read the HTML file + try: + with open(html_file, 'r', encoding='utf-8') as f: + html_content = f.read() + except UnicodeDecodeError: + # Try with different encoding + try: + with open(html_file, 'r', encoding='latin-1') as f: + html_content = f.read() + except Exception as err: + logger.debug(f"Could not read {html_file}: {err}") + continue + + # Save to database + if save_generated_page_to_db(url_path, html_content): + imported_count += 1 + + except Exception as err: + logger.debug(f"Error processing deception page {html_file}: {err}") + + logger.info(f"Imported {imported_count}/{total_files} deception pages") + return imported_count + + except Exception as err: + logger.error(f"Unexpected error during deception page import: {err}") + return 0 + + def is_ai_enabled() -> bool: """Check if AI generation is enabled via config or environment variable.""" from config import get_config diff --git a/src/routes/api.py b/src/routes/api.py index d2512a5..410952f 100644 --- a/src/routes/api.py +++ b/src/routes/api.py @@ -10,6 +10,9 @@ import hmac import secrets import time +import io +import zipfile +import base64 from fastapi import APIRouter, Request, Response, Query, Cookie from fastapi.responses import JSONResponse @@ -741,11 +744,97 @@ async def download_generated_page( db.close_session() +@router.post("/api/download-generated-pages-zip") +async def download_generated_pages_zip( + request: Request, + paths: str = Query(None), + before_date: str = Query(None), +): + """Download multiple generated deception pages as a ZIP file.""" + if not verify_auth(request): + return JSONResponse(content={"error": "Unauthorized"}, status_code=401) + + from models import GeneratedPage + + db = get_db() + try: + session = db.session + pages_to_download = [] + + if paths: + # Parse paths (comma-separated) + path_list = [p.strip() for p in paths.split(',') if p.strip()] + if not path_list: + return JSONResponse(content={"error": "No paths provided"}, status_code=400) + + get_app_logger().debug(f"[DECEPTION] Download requested for paths: {path_list}") + + # Query pages by paths + for path in path_list: + page = session.query(GeneratedPage).filter(GeneratedPage.path == path).first() + if page: + pages_to_download.append(page) + else: + get_app_logger().debug(f"[DECEPTION] Path not found: {path}") + + elif before_date: + # Query pages by date + try: + pages_to_download = db.get_generated_pages_before(before_date) + get_app_logger().debug(f"[DECEPTION] Download by date {before_date}: found {len(pages_to_download)} pages") + except ValueError as e: + return JSONResponse(content={"error": str(e)}, status_code=400) + else: + return JSONResponse( + content={"error": "Please specify either paths or before_date"}, + status_code=400 + ) + + if not pages_to_download: + return JSONResponse(content={"error": "No pages found to download"}, status_code=404) + + # Create ZIP file in memory + zip_buffer = io.BytesIO() + with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file: + for page in pages_to_download: + try: + html_content = base64.b64decode(page.html_content_b64).decode("utf-8") + # Build a safe filename from the path + safe_name = page.path.strip("/").replace("/", "_") or "index" + if not safe_name.endswith(".html"): + safe_name += ".html" + + # Add file to ZIP + zip_file.writestr(safe_name, html_content) + except Exception as e: + get_app_logger().warning(f"[DECEPTION] Error adding page {page.path} to ZIP: {e}") + continue + + zip_buffer.seek(0) + return Response( + content=zip_buffer.getvalue(), + media_type="application/zip", + headers={ + "Content-Disposition": 'attachment; filename="deception_pages.zip"', + }, + ) + except Exception as e: + get_app_logger().error(f"[DECEPTION] ZIP download error: {e}") + return JSONResponse(content={"error": "Internal server error"}, status_code=500) + finally: + db.close_session() + + + class UploadPageRequest(BaseModel): path: str content: str +class UploadBulkPagesRequest(BaseModel): + pages: dict # { path: content, ... } + + @router.post("/api/upload-generated-page") async def upload_generated_page(request: Request, body: UploadPageRequest): """Upload a custom page to serve as a deception page.""" @@ -806,3 +895,81 @@ async def upload_generated_page(request: Request, body: UploadPageRequest): return JSONResponse(content={"error": "Internal server error"}, status_code=500) finally: db.close_session() + + +@router.post("/api/upload-generated-pages-bulk") +async def upload_generated_pages_bulk(request: Request, body: UploadBulkPagesRequest): + """Upload multiple deception pages from a ZIP file.""" + if not verify_auth(request): + return JSONResponse(content={"error": "Unauthorized"}, status_code=401) + + import base64 + from datetime import datetime + from models import GeneratedPage + + if not body.pages or not isinstance(body.pages, dict): + return JSONResponse( + content={"error": "No pages provided"}, status_code=400 + ) + + db = get_db() + try: + session = db.session + uploaded_count = 0 + errors = [] + + for path, content in body.pages.items(): + try: + path = path.strip() + if not path or not content: + continue + + # Ensure path starts with / + if not path.startswith("/"): + path = "/" + path + + # Validate file extension + allowed_exts = (".html", ".htm", ".xml", ".json", ".txt", ".css", ".js") + if not any(path.endswith(ext) for ext in allowed_exts): + # No extension — treat as html + pass + + html_b64 = base64.b64encode(content.encode("utf-8")).decode("utf-8") + + existing = ( + session.query(GeneratedPage).filter(GeneratedPage.path == path).first() + ) + if existing: + existing.html_content_b64 = html_b64 + existing.last_accessed = datetime.now() + else: + page = GeneratedPage( + path=path, + html_content_b64=html_b64, + created_at=datetime.now(), + last_accessed=datetime.now(), + access_count=0, + ) + session.add(page) + + uploaded_count += 1 + except Exception as e: + errors.append((path, str(e))) + continue + + session.commit() + get_app_logger().info(f"[DECEPTION] Bulk uploaded {uploaded_count} pages from ZIP") + return JSONResponse( + content={ + "ok": True, + "uploaded": uploaded_count, + "errors": errors, + } + ) + + except Exception as e: + session.rollback() + get_app_logger().error(f"[DECEPTION] Bulk upload error: {e}") + return JSONResponse(content={"error": "Internal server error"}, status_code=500) + finally: + db.close_session() diff --git a/src/templates/jinja2/dashboard/partials/deception_panel.html b/src/templates/jinja2/dashboard/partials/deception_panel.html index 8096977..fef7842 100644 --- a/src/templates/jinja2/dashboard/partials/deception_panel.html +++ b/src/templates/jinja2/dashboard/partials/deception_panel.html @@ -5,13 +5,9 @@