diff --git a/docs/cx/BENCHKIT_GAP_ANALYSIS.md b/docs/cx/BENCHKIT_GAP_ANALYSIS.md index 333078e..cdb0a51 100644 --- a/docs/cx/BENCHKIT_GAP_ANALYSIS.md +++ b/docs/cx/BENCHKIT_GAP_ANALYSIS.md @@ -58,7 +58,7 @@ Continuous estimation has now moved beyond a mere entry point: a common estimati However, estimation is still not yet broadly deployed across multiple applications, and AI-driven optimization integration remains mostly at the integration-point stage. As of the current repository survey, BenchKit has six benchmark applications with `build.sh`/`run.sh`, but only `qws` has an `estimate.sh`. -The result portal also already has a meaningful test base (`result_server/tests`: 30 `test_*.py` modules), and the repository now has a repo-local Python dependency manifest, a standard portal test entrypoint under `result_server/tests`, and a lightweight GitHub Actions verification path for portal-oriented changes. +The result portal also already has a meaningful test base (`result_server/tests`: 32 `test_*.py` modules), and the repository now has a repo-local Python dependency manifest, a standard portal test entrypoint under `result_server/tests`, and a lightweight GitHub Actions verification path for portal-oriented changes. The main GitLab pipeline still intentionally skips heavy benchmark execution when a direct or manually triggered GitLab pipeline sees changes limited to `result_server/**/*` or portal display metadata such as `config/system_info.csv`. Protected-branch synchronization itself uses `ci.skip`, so the dedicated lightweight GitHub Actions path should continue to be kept in sync as portal-side files evolve. ## 2.1 現時点で明示しておく設計負債 / Explicit Design Debts to Keep Visible @@ -296,7 +296,7 @@ Once the estimation specification is clarified, many other design decisions beco 今回のコードベース調査では、性能推定に次ぐ実務上の詰まりどころとして、`result_server` の検証導線が見えた。 -- `result_server/tests` には 30 個の `test_*.py` モジュールがあり、portal 側はすでに「検証すべき対象」になっている +- `result_server/tests` には 32 個の `test_*.py` モジュールがあり、portal 側はすでに「検証すべき対象」になっている - repo-local な依存関係定義として `requirements-result-server.txt` があり、`result_server/tests/run_result_server_tests.py` が標準 test entrypoint として使える - portal-oriented 変更向けの lightweight GitHub Actions として `.github/workflows/result-server-tests.yml` が用意されている - `.gitlab-ci.yml` は直接または手動起動されたGitLab pipelineで `result_server/**/*` や `config/system_info.csv` 変更時に重い benchmark pipeline を skip する。保護ブランチ同期自体は `ci.skip` を使うため、GitHub Actions 側の path filter を portal 周辺の実ファイルに追従させ続ける必要がある diff --git a/docs/deploy/hardening-guide.md b/docs/deploy/hardening-guide.md new file mode 100644 index 0000000..59b3b31 --- /dev/null +++ b/docs/deploy/hardening-guide.md @@ -0,0 +1,47 @@ +# Result Portal Hardening Guide + +This checklist covers production-facing `result_server` deployments. + +## Request Limits + +The portal enforces an application-level request body limit: + +```text +RESULT_SERVER_MAX_UPLOAD_MB=512 +``` + +Large estimation input archives are also checked per member: + +```text +RESULT_SERVER_MAX_ARCHIVE_MEMBER_MB=1024 +``` + +Set these values to match the largest expected PA Data or estimation input +archive. Keep the reverse proxy body limit at or below the Flask limit so that +oversized uploads are rejected before they consume worker memory. + +## Rate Limits + +API ingest/query routes and admin write routes use Redis-backed fixed-window +rate limits. Production deployments must keep Redis monitored and available; +when Redis is required but unavailable, protected operations fail closed with a +503 response. + +Default limits: + +- API ingest: 120 requests per runner per minute +- API query: 60 requests per runner per minute +- Admin write actions: 20 requests per admin user per minute + +## Reverse Proxy + +Run the Flask app behind a reverse proxy that terminates TLS and forwards only +loopback traffic to the app. Keep `/admin/` and `/auth/` protected by portal +authentication; `robots.txt` only reduces crawler noise and is not an access +control mechanism. + +## Gunicorn + +Use the repository `gunicorn.conf.py` as the baseline process manager +configuration. It binds to `127.0.0.1:8800` by default, sets worker timeouts, +and enables `max_requests` recycling to reduce long-running worker risk. diff --git a/docs/guides/developer-reference.md b/docs/guides/developer-reference.md index d93bd03..e2c30ea 100644 --- a/docs/guides/developer-reference.md +++ b/docs/guides/developer-reference.md @@ -220,6 +220,7 @@ For production portal deployments: - The legacy `RESULT_SERVER_KEY` variable is still accepted as runner `default` for compatibility, but should be rotated to `RESULT_SERVER_KEYS`. - See `docs/deploy/key-management.md` for generation and rotation guidance. - `REDIS_URL` must point to a monitored Redis instance; production authentication refuses login when Redis is unavailable. +- API ingest and query endpoints use Redis-backed rate limits by default; set `RESULT_SERVER_MAX_UPLOAD_MB` and `RESULT_SERVER_MAX_ARCHIVE_MEMBER_MB` when deployment-specific upload limits are needed. - `app_dev.py` is localhost-only, uses ephemeral development secrets when none are provided, and enables the Werkzeug debugger only with `RESULT_SERVER_DEV_DEBUG=1`. ### Result Quality Visibility diff --git a/gunicorn.conf.py b/gunicorn.conf.py new file mode 100644 index 0000000..7fd049b --- /dev/null +++ b/gunicorn.conf.py @@ -0,0 +1,23 @@ +"""Reference Gunicorn configuration for result_server deployments.""" + +import multiprocessing +import os + + +bind = os.environ.get("RESULT_SERVER_BIND", "127.0.0.1:8800") +workers = int( + os.environ.get("RESULT_SERVER_WORKERS", str(multiprocessing.cpu_count() * 2 + 1)) +) +worker_class = "sync" +timeout = int(os.environ.get("RESULT_SERVER_TIMEOUT", "60")) +graceful_timeout = int(os.environ.get("RESULT_SERVER_GRACEFUL_TIMEOUT", "30")) +keepalive = int(os.environ.get("RESULT_SERVER_KEEPALIVE", "5")) +max_requests = int(os.environ.get("RESULT_SERVER_MAX_REQUESTS", "1000")) +max_requests_jitter = int(os.environ.get("RESULT_SERVER_MAX_REQUESTS_JITTER", "50")) +limit_request_line = int(os.environ.get("RESULT_SERVER_LIMIT_REQUEST_LINE", "8190")) +limit_request_field_size = int( + os.environ.get("RESULT_SERVER_LIMIT_REQUEST_FIELD_SIZE", "8190") +) +accesslog = "-" +errorlog = "-" +loglevel = os.environ.get("RESULT_SERVER_LOG_LEVEL", "info") diff --git a/result_server/app.py b/result_server/app.py index 26e980e..f0d80d4 100644 --- a/result_server/app.py +++ b/result_server/app.py @@ -2,7 +2,7 @@ import sys from datetime import timedelta -from flask import Flask, render_template +from flask import Flask, jsonify, render_template from flask_session import Session from routes.api import api_bp @@ -14,6 +14,8 @@ from utils.preflight import validate_production_config +DEFAULT_MAX_UPLOAD_MB = 512 +DEFAULT_MAX_ARCHIVE_MEMBER_MB = 1024 INGEST_KEYS = parse_ingest_keys() PREFLIGHT_ERRORS = validate_production_config(os.environ, INGEST_KEYS) @@ -75,6 +77,23 @@ def _configure_result_directories(app, base_dir): app.config.update(dir_map) +def _configure_upload_limits(app): + """Configure request and archive size limits for ingest endpoints.""" + max_upload_mb = int(os.environ.get("RESULT_SERVER_MAX_UPLOAD_MB", DEFAULT_MAX_UPLOAD_MB)) + max_member_mb = int( + os.environ.get("RESULT_SERVER_MAX_ARCHIVE_MEMBER_MB", DEFAULT_MAX_ARCHIVE_MEMBER_MB) + ) + app.config["MAX_CONTENT_LENGTH"] = max_upload_mb * 1024 * 1024 + app.config["MAX_ARCHIVE_MEMBER_SIZE"] = max_member_mb * 1024 * 1024 + + @app.errorhandler(413) + def payload_too_large(_error): + return jsonify( + error="Payload too large", + limit_mb=app.config["MAX_CONTENT_LENGTH"] // 1024 // 1024, + ), 413 + + def _register_portal_blueprints(app, prefix): """Register all portal blueprints using the given URL prefix.""" from routes.admin import admin_bp @@ -107,6 +126,7 @@ def create_app(prefix="", base_dir=None): _configure_user_store(app) _configure_totp_issuer(app, prefix) _configure_result_directories(app, base_dir) + _configure_upload_limits(app) init_csrf(app, exempt_blueprints=(api_bp,)) register_home_routes(app, prefix=prefix) diff --git a/result_server/app_dev.py b/result_server/app_dev.py index b50b04f..a9482c0 100644 --- a/result_server/app_dev.py +++ b/result_server/app_dev.py @@ -23,6 +23,8 @@ from datetime import datetime, timedelta LOOPBACK_HOSTS = {"127.0.0.1", "localhost", "::1"} +DEFAULT_MAX_UPLOAD_MB = 512 +DEFAULT_MAX_ARCHIVE_MEMBER_MB = 1024 def setup_dev_environment(base_dir): @@ -155,7 +157,7 @@ def create_dev_app(base_dir): sys.modules["redis"] = types.ModuleType("redis") sys.modules["utils.totp_manager"] = _create_stub_totp_manager() - from flask import Flask, render_template + from flask import Flask, jsonify, render_template from flask_session import Session from routes.home import register_home_routes @@ -173,9 +175,26 @@ def create_dev_app(base_dir): SESSION_PERMANENT=False, AUTH_REQUIRES_REDIS=False, INGEST_KEYS=parse_ingest_keys(), + MAX_CONTENT_LENGTH=( + int(os.environ.get("RESULT_SERVER_MAX_UPLOAD_MB", DEFAULT_MAX_UPLOAD_MB)) + * 1024 + * 1024 + ), + MAX_ARCHIVE_MEMBER_SIZE=( + int(os.environ.get("RESULT_SERVER_MAX_ARCHIVE_MEMBER_MB", DEFAULT_MAX_ARCHIVE_MEMBER_MB)) + * 1024 + * 1024 + ), ) Session(app) + @app.errorhandler(413) + def payload_too_large(_error): + return jsonify( + error="Payload too large", + limit_mb=app.config["MAX_CONTENT_LENGTH"] // 1024 // 1024, + ), 413 + # Register a default local admin user. stub_store = _StubUserStore() stub_store.create_user("admin@localhost", "DEVDEVDEVDEVDEVDEV", ["dev", "admin"]) diff --git a/result_server/routes/admin.py b/result_server/routes/admin.py index 273d787..d17b47e 100644 --- a/result_server/routes/admin.py +++ b/result_server/routes/admin.py @@ -15,6 +15,7 @@ ) from utils.user_store import get_user_store +from utils.rate_limit import rate_limited admin_bp = Blueprint("admin", __name__, url_prefix="/admin") @@ -33,6 +34,11 @@ def _render_users_page(invitation_url=None): return render_template("admin_users.html", users=all_users, invitation_url=invitation_url) +def _admin_rate_key(_request): + """Return the session-scoped admin rate-limit key.""" + return f"admin:{session.get('user_email', 'anon')}" + + def admin_required(f): """Allow access only to authenticated users with the admin affiliation.""" @@ -57,6 +63,7 @@ def users(): @admin_bp.route("/users/add", methods=["POST"]) @admin_required +@rate_limited(max_per_minute=20, key_fn=_admin_rate_key, scope="admin_write") def add_user(): """Create a user invitation and show the generated invitation URL.""" store = get_user_store() @@ -81,6 +88,7 @@ def add_user(): @admin_bp.route("/users//delete", methods=["POST"]) @admin_required +@rate_limited(max_per_minute=20, key_fn=_admin_rate_key, scope="admin_write") def delete_user(email): """Delete a user unless the current admin targets their own account.""" if email == session.get("user_email"): @@ -94,6 +102,7 @@ def delete_user(email): @admin_bp.route("/users//affiliations", methods=["POST"]) @admin_required +@rate_limited(max_per_minute=20, key_fn=_admin_rate_key, scope="admin_write") def update_affiliations(email): """Update the affiliations stored for a user.""" store = get_user_store() @@ -106,6 +115,7 @@ def update_affiliations(email): @admin_bp.route("/users//reinvite", methods=["POST"]) @admin_required +@rate_limited(max_per_minute=20, key_fn=_admin_rate_key, scope="admin_write") def reinvite_user(email): """Generate a new invitation link after clearing the current TOTP secret.""" store = get_user_store() diff --git a/result_server/routes/api.py b/result_server/routes/api.py index bbe33c6..9d5fe29 100644 --- a/result_server/routes/api.py +++ b/result_server/routes/api.py @@ -12,9 +12,11 @@ from datetime import datetime from utils.auth import verify_ingest_key +from utils.rate_limit import rate_limited api_bp = Blueprint("api", __name__) _TIMESTAMP_RE = re.compile(r"^\d{8}_\d{6}$") +DEFAULT_MAX_ARCHIVE_MEMBER_SIZE = 1024 * 1024 * 1024 # ========================================== @@ -37,6 +39,12 @@ def require_api_key(): return runner_id +def _api_rate_key(req): + """Return the runner-scoped API rate-limit key for a request.""" + runner_id = verify_ingest_key(req.headers.get("X-API-Key", "")) or "unknown" + return f"runner:{runner_id}" + + def save_json_file(data, prefix, out_dir, given_uuid=None): """Persist a JSON payload using atomic file replacement.""" if given_uuid is not None and not is_valid_uuid(given_uuid): @@ -173,8 +181,14 @@ def _find_result_file_by_uuid(received_dir, uuid_value): def _safe_extract_tar_bytes(file_storage, target_dir): """Extract uploaded tar bytes with path and member-type checks.""" os.makedirs(target_dir, exist_ok=True) + max_member_size = current_app.config.get( + "MAX_ARCHIVE_MEMBER_SIZE", + DEFAULT_MAX_ARCHIVE_MEMBER_SIZE, + ) with tarfile.open(fileobj=file_storage.stream, mode="r:*") as tar: for member in tar.getmembers(): + if member.size > max_member_size: + abort(400, description="Archive member too large") normalized = os.path.normpath(member.name) drive, _ = os.path.splitdrive(normalized) if ( @@ -233,6 +247,7 @@ def _replace_directory_after_success(source_dir, target_dir): # ========================================== @api_bp.route("/api/ingest/result", methods=["POST"]) +@rate_limited(max_per_minute=120, key_fn=_api_rate_key, scope="api_ingest") def ingest_result(): """Receive and persist a collected result JSON payload.""" require_api_key() @@ -245,6 +260,7 @@ def ingest_result(): @api_bp.route("/api/ingest/estimate", methods=["POST"]) +@rate_limited(max_per_minute=120, key_fn=_api_rate_key, scope="api_ingest") def ingest_estimate(): """Receive and persist an estimated-result JSON payload.""" require_api_key() @@ -262,6 +278,7 @@ def ingest_estimate(): @api_bp.route("/api/ingest/padata", methods=["POST"]) +@rate_limited(max_per_minute=120, key_fn=_api_rate_key, scope="api_ingest") def ingest_padata(): """Receive and store a PA Data archive.""" require_api_key() @@ -296,7 +313,7 @@ def ingest_padata(): tmp_path = save_path + ".tmp" with open(tmp_path, "wb") as f: - f.write(uploaded_file.read()) + shutil.copyfileobj(uploaded_file.stream, f, length=1024 * 1024) f.flush() os.fsync(f.fileno()) os.rename(tmp_path, save_path) @@ -312,6 +329,7 @@ def ingest_padata(): @api_bp.route("/api/ingest/estimation-inputs", methods=["POST"]) +@rate_limited(max_per_minute=120, key_fn=_api_rate_key, scope="api_ingest") def ingest_estimation_inputs(): """Estimation input archive (tgz) upload and expansion.""" require_api_key() @@ -356,6 +374,7 @@ def ingest_estimation_inputs(): # ========================================== @api_bp.route("/api/query/result", methods=["GET"]) +@rate_limited(max_per_minute=60, key_fn=_api_rate_key, scope="api_query") def query_result(): """Search results by uuid or by system/code/exp and return one result. @@ -433,6 +452,7 @@ def query_result(): @api_bp.route("/api/query/estimation-inputs", methods=["GET"]) +@rate_limited(max_per_minute=60, key_fn=_api_rate_key, scope="api_query") def query_estimation_inputs(): """Return estimation input artifacts for a result UUID as a tar.gz archive.""" require_api_key() @@ -472,6 +492,7 @@ def query_estimation_inputs(): @api_bp.route("/api/query/estimate", methods=["GET"]) +@rate_limited(max_per_minute=60, key_fn=_api_rate_key, scope="api_query") def query_estimate(): """Return one estimate JSON document identified by UUID.""" require_api_key() diff --git a/result_server/tests/test_rate_limit.py b/result_server/tests/test_rate_limit.py new file mode 100644 index 0000000..f94b6f3 --- /dev/null +++ b/result_server/tests/test_rate_limit.py @@ -0,0 +1,196 @@ +"""Tests for Redis-backed endpoint rate limits.""" + +import json +import os +import shutil +import sys +import tempfile + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from test_support import build_api_route_app, build_portal_route_app, install_portal_test_stubs + +install_portal_test_stubs() + +API_KEY = "test-api-key-12345678901234567890" +SECOND_API_KEY = "second-api-key-123456789012345678" + + +class FakeRedis: + def __init__(self, *, fail=False): + self.fail = fail + self.counts = {} + self.expirations = {} + + def incr(self, key): + if self.fail: + raise ConnectionError("redis unavailable") + self.counts[key] = self.counts.get(key, 0) + 1 + return self.counts[key] + + def expire(self, key, seconds): + if self.fail: + raise ConnectionError("redis unavailable") + self.expirations[key] = seconds + + +class _Store: + def __init__(self): + self._users = { + "admin@test.com": { + "email": "admin@test.com", + "totp_secret": "SECRET", + "affiliations": ["admin"], + }, + "user@test.com": { + "email": "user@test.com", + "totp_secret": "SECRET2", + "affiliations": ["dev"], + }, + } + + def get_affiliations(self, email): + user = self._users.get(email) + return user["affiliations"] if user else [] + + def list_users(self): + return list(self._users.values()) + + def has_totp_secret(self, email): + user = self._users.get(email) + return bool(user and user.get("totp_secret")) + + def delete_user(self, email): + return self._users.pop(email, None) is not None + + def user_exists(self, email): + return email in self._users + + def update_affiliations(self, email, affiliations): + self._users[email]["affiliations"] = affiliations + return True + + def clear_totp_secret(self, email): + self._users[email]["totp_secret"] = "" + return True + + def create_invitation(self, email, affiliations): + return "token-1" + + +def _api_app(): + received = tempfile.mkdtemp() + received_padata = tempfile.mkdtemp() + received_estimation_inputs = tempfile.mkdtemp() + estimated = tempfile.mkdtemp() + app = build_api_route_app( + received_dir=received, + received_padata_dir=received_padata, + received_estimation_inputs_dir=received_estimation_inputs, + estimated_dir=estimated, + ) + app.config["INGEST_KEYS"] = { + API_KEY: "test-runner", + SECOND_API_KEY: "second-runner", + } + app.config["REDIS_CONN"] = FakeRedis() + app.config["REDIS_PREFIX"] = "test:" + app.config["RATE_LIMITS"] = {"api_ingest": 1, "api_query": 1} + return app, (received, received_padata, received_estimation_inputs, estimated) + + +def _portal_app(): + received = tempfile.mkdtemp() + estimated = tempfile.mkdtemp() + app = build_portal_route_app( + templates_dir=os.path.join(os.path.dirname(__file__), "..", "templates"), + received_dir=received, + estimated_dir=estimated, + user_store=_Store(), + ) + app.config["REDIS_CONN"] = FakeRedis() + app.config["REDIS_PREFIX"] = "test:" + app.config["RATE_LIMITS"] = {"admin_write": 1} + return app, (received, estimated) + + +def _cleanup(paths): + for path in paths: + shutil.rmtree(path) + + +def test_api_ingest_rate_limit_returns_429(): + app, temp_dirs = _api_app() + try: + with app.test_client() as client: + first = client.post( + "/api/ingest/result", + data=json.dumps({"code": "first"}), + headers={"X-API-Key": API_KEY, "Content-Type": "application/json"}, + ) + second = client.post( + "/api/ingest/result", + data=json.dumps({"code": "second"}), + headers={"X-API-Key": API_KEY, "Content-Type": "application/json"}, + ) + + assert first.status_code == 200 + assert second.status_code == 429 + finally: + _cleanup(temp_dirs) + + +def test_api_rate_limit_is_runner_scoped(): + app, temp_dirs = _api_app() + try: + with app.test_client() as client: + first = client.post( + "/api/ingest/result", + data=json.dumps({"code": "first"}), + headers={"X-API-Key": API_KEY, "Content-Type": "application/json"}, + ) + second_runner = client.post( + "/api/ingest/result", + data=json.dumps({"code": "second"}), + headers={"X-API-Key": SECOND_API_KEY, "Content-Type": "application/json"}, + ) + + assert first.status_code == 200 + assert second_runner.status_code == 200 + finally: + _cleanup(temp_dirs) + + +def test_rate_limit_redis_failure_fails_closed_when_required(): + app, temp_dirs = _api_app() + app.config["REDIS_CONN"] = FakeRedis(fail=True) + app.config["AUTH_REQUIRES_REDIS"] = True + try: + with app.test_client() as client: + resp = client.post( + "/api/ingest/result", + data=json.dumps({"code": "first"}), + headers={"X-API-Key": API_KEY, "Content-Type": "application/json"}, + ) + + assert resp.status_code == 503 + finally: + _cleanup(temp_dirs) + + +def test_admin_write_rate_limit_returns_429(): + app, temp_dirs = _portal_app() + try: + with app.test_client() as client: + with client.session_transaction() as sess: + sess["authenticated"] = True + sess["user_email"] = "admin@test.com" + sess["user_affiliations"] = ["admin"] + + first = client.post("/admin/users/user@test.com/delete") + second = client.post("/admin/users/user@test.com/delete") + + assert first.status_code == 302 + assert second.status_code == 429 + finally: + _cleanup(temp_dirs) diff --git a/result_server/tests/test_upload_limits.py b/result_server/tests/test_upload_limits.py new file mode 100644 index 0000000..6171067 --- /dev/null +++ b/result_server/tests/test_upload_limits.py @@ -0,0 +1,89 @@ +"""Tests for result_server upload size limits.""" + +import io +import json +import os +import shutil +import sys +import tarfile +import tempfile + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from test_support import build_api_route_app, install_portal_test_stubs + +install_portal_test_stubs() + +API_KEY = "test-api-key-12345678901234567890" + + +def _api_app(): + received = tempfile.mkdtemp() + received_padata = tempfile.mkdtemp() + received_estimation_inputs = tempfile.mkdtemp() + estimated = tempfile.mkdtemp() + app = build_api_route_app( + received_dir=received, + received_padata_dir=received_padata, + received_estimation_inputs_dir=received_estimation_inputs, + estimated_dir=estimated, + ) + app.config["INGEST_KEYS"] = {API_KEY: "test-runner"} + return app, (received, received_padata, received_estimation_inputs, estimated) + + +def _cleanup(paths): + for path in paths: + shutil.rmtree(path) + + +def test_padata_upload_over_max_content_length_returns_413(): + app, temp_dirs = _api_app() + app.config["MAX_CONTENT_LENGTH"] = 128 + try: + with app.test_client() as client: + resp = client.post( + "/api/ingest/padata", + data={ + "id": "12345678-1234-1234-1234-123456789abc", + "timestamp": "20250101_120000", + "file": (io.BytesIO(b"x" * 512), "large.tgz"), + }, + headers={"X-API-Key": API_KEY}, + content_type="multipart/form-data", + ) + + assert resp.status_code == 413 + finally: + _cleanup(temp_dirs) + + +def test_estimation_inputs_rejects_archive_member_over_limit(): + app, temp_dirs = _api_app() + received = temp_dirs[0] + app.config["MAX_ARCHIVE_MEMBER_SIZE"] = 3 + uuid_value = "12345678-1234-1234-1234-123456789abc" + result_filename = f"result_20250101_000000_{uuid_value}.json" + with open(os.path.join(received, result_filename), "w", encoding="utf-8") as f: + json.dump({"code": "qws", "_server_uuid": uuid_value}, f) + + archive_bytes = io.BytesIO() + with tarfile.open(fileobj=archive_bytes, mode="w:gz") as tar: + payload = b"too large" + info = tarfile.TarInfo(name="input.json") + info.size = len(payload) + tar.addfile(info, io.BytesIO(payload)) + archive_bytes.seek(0) + + try: + with app.test_client() as client: + resp = client.post( + "/api/ingest/estimation-inputs", + data={"id": uuid_value, "file": (archive_bytes, "inputs.tgz")}, + headers={"X-API-Key": API_KEY}, + content_type="multipart/form-data", + ) + + assert resp.status_code == 400 + finally: + _cleanup(temp_dirs) diff --git a/result_server/utils/rate_limit.py b/result_server/utils/rate_limit.py new file mode 100644 index 0000000..757d6e8 --- /dev/null +++ b/result_server/utils/rate_limit.py @@ -0,0 +1,61 @@ +"""Redis-backed endpoint rate limiting helpers.""" + +from __future__ import annotations + +from collections.abc import Callable +from functools import wraps + +from flask import abort, current_app, request +from werkzeug.exceptions import HTTPException + + +RateKeyFunc = Callable[[object], str] + + +def _configured_limit(scope: str, default: int) -> int: + """Return a scope-specific rate limit, allowing tests/deployments to override.""" + overrides = current_app.config.get("RATE_LIMITS", {}) + return int(overrides.get(scope, default)) + + +def rate_limited(*, max_per_minute: int, key_fn: RateKeyFunc | None = None, scope: str): + """Apply a simple Redis-backed fixed-window rate limit to a Flask view.""" + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + redis_conn = current_app.config.get("REDIS_CONN") + if redis_conn is None: + return func(*args, **kwargs) + + key_suffix = key_fn(request) if key_fn else (request.remote_addr or "unknown") + redis_prefix = current_app.config.get("REDIS_PREFIX", "") + redis_key = f"{redis_prefix}rate:{scope}:{key_suffix}" + limit = _configured_limit(scope, max_per_minute) + + try: + count = redis_conn.incr(redis_key) + if count == 1: + redis_conn.expire(redis_key, 60) + if count > limit: + current_app.logger.warning( + "rate_limit_exceeded", + extra={ + "rate_limit_scope": scope, + "rate_limit_key": key_suffix, + "rate_limit_count": count, + "rate_limit_threshold": limit, + }, + ) + abort(429, description="Too many requests") + except HTTPException: + raise + except Exception: + current_app.logger.exception("Rate limit check failed") + if current_app.config.get("AUTH_REQUIRES_REDIS"): + abort(503, description="Rate limiting service unavailable") + + return func(*args, **kwargs) + + return wrapper + + return decorator