From 6f04ce887c2311895b948d3c154492c384799c7a Mon Sep 17 00:00:00 2001 From: Yoshifumi Nakamura Date: Tue, 19 May 2026 14:45:13 +0900 Subject: [PATCH] Harden result server ingest and security metadata Signed-off-by: Yoshifumi Nakamura --- README.md | 1 + SECURITY.md | 50 ++++++++++++++ docs/ci.md | 5 ++ docs/cx/BENCHKIT_GAP_ANALYSIS.md | 4 +- docs/deploy/key-management.md | 50 ++++++++++++++ docs/guides/developer-reference.md | 4 +- result_server/app.py | 9 ++- result_server/app_dev.py | 2 + result_server/routes/api.py | 47 ++++++++++--- result_server/routes/security_metadata.py | 38 +++++++++++ result_server/tests/test_api_routes.py | 67 ++++++++++++++++--- result_server/tests/test_csrf.py | 2 +- result_server/tests/test_preflight.py | 41 ++++++++++++ result_server/tests/test_security_metadata.py | 46 +++++++++++++ result_server/utils/preflight.py | 59 ++++++++++++++++ 15 files changed, 403 insertions(+), 22 deletions(-) create mode 100644 SECURITY.md create mode 100644 docs/deploy/key-management.md create mode 100644 result_server/routes/security_metadata.py create mode 100644 result_server/tests/test_preflight.py create mode 100644 result_server/tests/test_security_metadata.py create mode 100644 result_server/utils/preflight.py diff --git a/README.md b/README.md index ca3ab2e..f5f87ec 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ Repository migration details are documented in [docs/repository-history.md](docs - Add a new system: [docs/guides/add-site.md](docs/guides/add-site.md) - Add estimation support: [docs/guides/add-estimation.md](docs/guides/add-estimation.md) - CI execution control: [docs/ci.md](docs/ci.md) +- Security policy: [SECURITY.md](SECURITY.md) - Profiler support guide: [docs/guides/profiler-support.md](docs/guides/profiler-support.md) - Profiler level reference: [docs/guides/profiler-level-reference.md](docs/guides/profiler-level-reference.md) diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..dd0b12a --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,50 @@ +# Security Policy + +BenchKit is published as open source, so security fixes and reporting paths +need to be clear for external users and researchers. + +## Supported Versions + +| Version or branch | Supported | +| --- | --- | +| `main` | Yes | +| `develop` | Yes, for upcoming fixes before release | +| Older untagged revisions | No | + +## Reporting a Vulnerability + +Please report suspected vulnerabilities privately instead of opening a public +issue. + +- GitHub private vulnerability reports: https://github.com/RIKEN-RCCS/benchkit/security/advisories/new + +Include the affected component, impact, reproduction steps, proof of concept +details if available, and any suggested fix. Do not include real secrets, +credentials, or personal data in the report. + +## Response Targets + +| Step | Target | +| --- | --- | +| Initial acknowledgement | Within 3 business days | +| Triage | Within 7 business days | +| Critical or High severity patch | Within 30 days | +| Medium severity patch | Within 90 days | +| Coordinated disclosure | After a fix is available, usually within 30 to 90 days | + +## Scope + +In scope: + +- `result_server` authentication, authorization, ingestion, and portal routes +- CI and runner integration that could expose credentials or corrupt results +- Deployment guidance that could lead to insecure production defaults + +Out of scope: + +- Social engineering +- Attacks requiring already-compromised infrastructure outside this repository +- Vulnerabilities in third-party dependencies that should be reported upstream +- Local development configurations intentionally bound to loopback interfaces + +We appreciate coordinated disclosure and will credit reporters when requested. diff --git a/docs/ci.md b/docs/ci.md index 5e4e5e2..d1faa3d 100644 --- a/docs/ci.md +++ b/docs/ci.md @@ -226,11 +226,16 @@ Protected-branch synchronization pushes to GitLab with `ci.skip`, so these skip Current skip-oriented patterns include: +- `.github/**/*` - `*.md` - `docs/**/*` - `result_server/**/*` - `config/system_info.csv` +> Synchronization note: this list mirrors the `paths:` entries in +> `.gitlab-ci.yml`. Update this document in the same PR when those rules +> change. + `system_info.csv` is the public portal catalog. Every system listed there must also be registered in `system.csv` and reference a queue defined in `queue.csv`. The reverse is intentionally not required: private or development-only systems may exist in `system.csv` / `queue.csv` without being exposed in `system_info.csv`. `system_info.csv` はportalでユーザーに見える公開catalogです。そこに載せたsystemは必ず `system.csv` に登録され、`queue.csv` に定義されたqueueを参照する必要があります。逆方向は必須ではありません。開発用・非公開用のsystemやqueueは、`system_info.csv` に公開せず `system.csv` / `queue.csv` にだけ存在してよいです。 diff --git a/docs/cx/BENCHKIT_GAP_ANALYSIS.md b/docs/cx/BENCHKIT_GAP_ANALYSIS.md index af7654e..333078e 100644 --- a/docs/cx/BENCHKIT_GAP_ANALYSIS.md +++ b/docs/cx/BENCHKIT_GAP_ANALYSIS.md @@ -58,7 +58,7 @@ Continuous estimation has now moved beyond a mere entry point: a common estimati However, estimation is still not yet broadly deployed across multiple applications, and AI-driven optimization integration remains mostly at the integration-point stage. As of the current repository survey, BenchKit has six benchmark applications with `build.sh`/`run.sh`, but only `qws` has an `estimate.sh`. -The result portal also already has a meaningful test base (`result_server/tests`: 27), and the repository now has a repo-local Python dependency manifest, a standard portal test entrypoint under `result_server/tests`, and a lightweight GitHub Actions verification path for portal-oriented changes. +The result portal also already has a meaningful test base (`result_server/tests`: 30 `test_*.py` modules), and the repository now has a repo-local Python dependency manifest, a standard portal test entrypoint under `result_server/tests`, and a lightweight GitHub Actions verification path for portal-oriented changes. The main GitLab pipeline still intentionally skips heavy benchmark execution when a direct or manually triggered GitLab pipeline sees changes limited to `result_server/**/*` or portal display metadata such as `config/system_info.csv`. Protected-branch synchronization itself uses `ci.skip`, so the dedicated lightweight GitHub Actions path should continue to be kept in sync as portal-side files evolve. ## 2.1 現時点で明示しておく設計負債 / Explicit Design Debts to Keep Visible @@ -296,7 +296,7 @@ Once the estimation specification is clarified, many other design decisions beco 今回のコードベース調査では、性能推定に次ぐ実務上の詰まりどころとして、`result_server` の検証導線が見えた。 -- `result_server/tests` には 27 本の pytest ベースのテストがあり、portal 側はすでに「検証すべき対象」になっている +- `result_server/tests` には 30 個の `test_*.py` モジュールがあり、portal 側はすでに「検証すべき対象」になっている - repo-local な依存関係定義として `requirements-result-server.txt` があり、`result_server/tests/run_result_server_tests.py` が標準 test entrypoint として使える - portal-oriented 変更向けの lightweight GitHub Actions として `.github/workflows/result-server-tests.yml` が用意されている - `.gitlab-ci.yml` は直接または手動起動されたGitLab pipelineで `result_server/**/*` や `config/system_info.csv` 変更時に重い benchmark pipeline を skip する。保護ブランチ同期自体は `ci.skip` を使うため、GitHub Actions 側の path filter を portal 周辺の実ファイルに追従させ続ける必要がある diff --git a/docs/deploy/key-management.md b/docs/deploy/key-management.md new file mode 100644 index 0000000..81b9616 --- /dev/null +++ b/docs/deploy/key-management.md @@ -0,0 +1,50 @@ +# Result Portal Key Management + +This guide covers the secrets used by `result_server/app.py`. + +## Required Secrets + +Production deployments must provide: + +- `FLASK_SECRET_KEY`: at least 32 characters, generated randomly. +- `RESULT_SERVER_KEYS`: one or more runner-scoped ingest keys. + +Use runner-scoped keys instead of the legacy `RESULT_SERVER_KEY`: + +```text +RESULT_SERVER_KEYS=runner-a:,runner-b: +``` + +Each key must be at least 32 characters and must not use known insecure +examples such as `dev-api-key`, `changeme`, or `secret`. The production app +refuses to start when these checks fail. + +## Generation + +Generate random values with a local secret generator, for example: + +```bash +openssl rand -hex 32 +``` + +Do not commit generated values. Store them in the deployment secret mechanism, +such as a systemd `EnvironmentFile`, a site secret manager, or an internal +vault service. + +## Rotation + +For a normal runner key rotation: + +1. Add the new key to `RESULT_SERVER_KEYS` while keeping the old key. +2. Deploy the portal configuration. +3. Update the runner to use the new key. +4. Confirm successful ingest events for the runner. +5. Remove the old key after the agreed overlap window. + +If a key may have leaked, remove it immediately, deploy the portal, update the +affected runner, and review ingest logs for suspicious activity. + +## Logging + +Logs may include runner ids and endpoint names. They must not include API key +values, TOTP codes, or Flask secret values. diff --git a/docs/guides/developer-reference.md b/docs/guides/developer-reference.md index 1277f89..d93bd03 100644 --- a/docs/guides/developer-reference.md +++ b/docs/guides/developer-reference.md @@ -215,8 +215,10 @@ For production portal deployments: - Set `FLASK_SECRET_KEY` to a strong secret and run `result_server/app.py`, not `app_dev.py`. - `app.py` binds to `127.0.0.1:8800` by default; set `RESULT_SERVER_HOST` and `RESULT_SERVER_PORT` explicitly when the deployment requires a different bind address. -- Set runner-scoped ingest keys with `RESULT_SERVER_KEYS=runner-a:key-a,runner-b:key-b`. +- Set runner-scoped ingest keys with `RESULT_SERVER_KEYS=runner-a:,runner-b:`. +- `FLASK_SECRET_KEY` and each ingest key must be at least 32 characters and must not use known insecure examples such as `dev-api-key`, `changeme`, or `secret`; production startup refuses these values. - The legacy `RESULT_SERVER_KEY` variable is still accepted as runner `default` for compatibility, but should be rotated to `RESULT_SERVER_KEYS`. +- See `docs/deploy/key-management.md` for generation and rotation guidance. - `REDIS_URL` must point to a monitored Redis instance; production authentication refuses login when Redis is unavailable. - `app_dev.py` is localhost-only, uses ephemeral development secrets when none are provided, and enables the Werkzeug debugger only with `RESULT_SERVER_DEV_DEBUG=1`. diff --git a/result_server/app.py b/result_server/app.py index c7ec38e..26e980e 100644 --- a/result_server/app.py +++ b/result_server/app.py @@ -11,12 +11,15 @@ from routes.results import results_bp from utils.auth import parse_ingest_keys from utils.csrf import init_csrf +from utils.preflight import validate_production_config INGEST_KEYS = parse_ingest_keys() +PREFLIGHT_ERRORS = validate_production_config(os.environ, INGEST_KEYS) -if not INGEST_KEYS: - print("ERROR: RESULT_SERVER_KEYS or RESULT_SERVER_KEY is not set.", file=sys.stderr) +if PREFLIGHT_ERRORS: + for error in PREFLIGHT_ERRORS: + print(f"ERROR: {error}", file=sys.stderr) sys.exit(1) @@ -76,7 +79,9 @@ def _register_portal_blueprints(app, prefix): """Register all portal blueprints using the given URL prefix.""" from routes.admin import admin_bp from routes.auth import auth_bp + from routes.security_metadata import register_security_metadata_routes + register_security_metadata_routes(app, prefix=prefix) app.register_blueprint(api_bp, url_prefix=prefix) app.register_blueprint(results_bp, url_prefix=f"{prefix}/results") app.register_blueprint(estimated_bp, url_prefix=f"{prefix}/estimated") diff --git a/result_server/app_dev.py b/result_server/app_dev.py index 46e8538..b50b04f 100644 --- a/result_server/app_dev.py +++ b/result_server/app_dev.py @@ -159,6 +159,7 @@ def create_dev_app(base_dir): from flask_session import Session from routes.home import register_home_routes + from routes.security_metadata import register_security_metadata_routes from utils.auth import parse_ingest_keys from utils.csrf import init_csrf from utils.system_info import get_all_systems_info, summarize_systems_info @@ -197,6 +198,7 @@ def create_dev_app(base_dir): # Home routes and loaders pull everything from current_app.config. register_home_routes(app) + register_security_metadata_routes(app) # Register all portal blueprints. from routes.api import api_bp diff --git a/result_server/routes/api.py b/result_server/routes/api.py index 61f7959..bbe33c6 100644 --- a/result_server/routes/api.py +++ b/result_server/routes/api.py @@ -14,6 +14,7 @@ from utils.auth import verify_ingest_key api_bp = Blueprint("api", __name__) +_TIMESTAMP_RE = re.compile(r"^\d{8}_\d{6}$") # ========================================== @@ -38,9 +39,12 @@ def require_api_key(): def save_json_file(data, prefix, out_dir, given_uuid=None): """Persist a JSON payload using atomic file replacement.""" + if given_uuid is not None and not is_valid_uuid(given_uuid): + abort(400, description="Invalid UUID") + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") unique_id = given_uuid or str(uuid.uuid4()) - filename = f"{prefix}_{timestamp}_{unique_id}.json" + filename = _safe_basename(f"{prefix}_{timestamp}_{unique_id}.json") path = os.path.join(out_dir, filename) tmp_path = path + ".tmp" @@ -85,13 +89,35 @@ def save_json_file(data, prefix, out_dir, given_uuid=None): def is_valid_uuid(value): """Return whether the given string is a valid UUID.""" + if not isinstance(value, str) or not value: + return False try: - uuid.UUID(value) - return True + parsed = uuid.UUID(value) + return str(parsed) == value.lower() except ValueError: return False +def is_valid_timestamp(value): + """Return whether the string matches the canonical timestamp format.""" + return isinstance(value, str) and bool(_TIMESTAMP_RE.fullmatch(value)) + + +def _safe_basename(name): + """Return a basename-only file component or abort with 400.""" + if not isinstance(name, str) or not name: + abort(400, description="Invalid file component") + if ( + name in {".", ".."} + or os.path.isabs(name) + or os.path.basename(name) != name + or "/" in name + or "\\" in name + ): + abort(400, description="Invalid file component") + return name + + def _load_json_by_uuid(directory, field_path, uuid_value): """Return the first JSON payload whose target field matches the UUID.""" json_files = sorted( @@ -222,12 +248,16 @@ def ingest_result(): def ingest_estimate(): """Receive and persist an estimated-result JSON payload.""" require_api_key() + raw_uuid = request.headers.get("X-UUID") + if raw_uuid is not None and not is_valid_uuid(raw_uuid): + abort(400, description="Invalid X-UUID header") + data = request.data return save_json_file( data=data, prefix="estimate", out_dir=current_app.config["ESTIMATED_DIR"], - given_uuid=request.headers.get("X-UUID"), + given_uuid=raw_uuid, ), 200 @@ -241,8 +271,8 @@ def ingest_padata(): abort(400, description="Invalid or missing UUID") timestamp = request.form.get("timestamp") - if not timestamp: - abort(400, description="Missing Timestamp") + if not is_valid_timestamp(timestamp): + abort(400, description="Invalid or missing Timestamp") uploaded_file = request.files.get("file") if not uploaded_file: @@ -256,12 +286,13 @@ def ingest_padata(): ] if matched_files: - old_file_path = os.path.join(received_dir, matched_files[0]) + old_file_path = os.path.join(received_dir, _safe_basename(matched_files[0])) backup_path = old_file_path + ".bak" shutil.move(old_file_path, backup_path) save_path = old_file_path else: - save_path = os.path.join(received_dir, f"padata_{timestamp}_{uuid_str}.tgz") + filename = _safe_basename(f"padata_{timestamp}_{uuid_str}.tgz") + save_path = os.path.join(received_dir, filename) tmp_path = save_path + ".tmp" with open(tmp_path, "wb") as f: diff --git a/result_server/routes/security_metadata.py b/result_server/routes/security_metadata.py new file mode 100644 index 0000000..654099a --- /dev/null +++ b/result_server/routes/security_metadata.py @@ -0,0 +1,38 @@ +"""Security metadata routes for vulnerability reporting and crawler hints.""" + +from flask import Response + + +SECURITY_TXT = """Contact: https://github.com/RIKEN-RCCS/benchkit/security/advisories/new +Expires: 2027-05-19T00:00:00Z +Preferred-Languages: ja, en +Canonical: https://fncx.r-ccs.riken.jp/.well-known/security.txt +Policy: https://github.com/RIKEN-RCCS/benchkit/blob/main/SECURITY.md +""" + +ROBOTS_TXT = """User-agent: * +Disallow: /admin/ +Disallow: /auth/ +Disallow: /dev/admin/ +Disallow: /dev/auth/ +""" + + +def register_security_metadata_routes(app, prefix=""): + """Register RFC 9116 security.txt and robots.txt routes.""" + endpoint_prefix = ( + "security_metadata" + if not prefix + else f"security_metadata_{prefix.strip('/').replace('/', '_')}" + ) + + @app.route( + f"{prefix}/.well-known/security.txt", + endpoint=f"{endpoint_prefix}_security_txt", + ) + def security_txt(): + return Response(SECURITY_TXT, mimetype="text/plain") + + @app.route(f"{prefix}/robots.txt", endpoint=f"{endpoint_prefix}_robots_txt") + def robots_txt(): + return Response(ROBOTS_TXT, mimetype="text/plain") diff --git a/result_server/tests/test_api_routes.py b/result_server/tests/test_api_routes.py index cb450c1..170b073 100644 --- a/result_server/tests/test_api_routes.py +++ b/result_server/tests/test_api_routes.py @@ -17,7 +17,7 @@ install_portal_test_stubs() -API_KEY = "test-api-key-12345" +API_KEY = "test-api-key-12345678901234567890" @pytest.fixture @@ -105,18 +105,18 @@ def test_valid_key_logs_runner_id(self, client, caplog): def test_multiple_ingest_keys_accept_individual_runner_keys(self, app): """RESULT_SERVER_KEYS-style config should accept each runner key.""" app.config["INGEST_KEYS"] = { - "runner-a-key": "runner-a", - "runner-b-key": "runner-b", + "runner-a-key-12345678901234567890": "runner-a", + "runner-b-key-12345678901234567890": "runner-b", } with app.test_client() as client: resp_a = client.post("/api/ingest/result", data=b'{"code":"a"}', - headers={"X-API-Key": "runner-a-key", + headers={"X-API-Key": "runner-a-key-12345678901234567890", "Content-Type": "application/json"}) resp_b = client.post("/api/ingest/result", data=b'{"code":"b"}', - headers={"X-API-Key": "runner-b-key", + headers={"X-API-Key": "runner-b-key-12345678901234567890", "Content-Type": "application/json"}) assert resp_a.status_code == 200 @@ -125,7 +125,7 @@ def test_multiple_ingest_keys_accept_individual_runner_keys(self, app): def test_legacy_result_server_key_env_is_still_accepted(self, tmp_dirs, monkeypatch): """RESULT_SERVER_KEY should remain valid as the default runner fallback.""" monkeypatch.delenv("RESULT_SERVER_KEYS", raising=False) - monkeypatch.setenv("RESULT_SERVER_KEY", "legacy-key") + monkeypatch.setenv("RESULT_SERVER_KEY", "legacy-key-12345678901234567890") received, received_padata, received_estimation_inputs, estimated = tmp_dirs app = build_api_route_app( received_dir=received, @@ -137,7 +137,7 @@ def test_legacy_result_server_key_env_is_still_accepted(self, tmp_dirs, monkeypa with app.test_client() as client: resp = client.post("/api/ingest/result", data=b'{"code":"legacy"}', - headers={"X-API-Key": "legacy-key", + headers={"X-API-Key": "legacy-key-12345678901234567890", "Content-Type": "application/json"}) assert resp.status_code == 200 @@ -185,6 +185,39 @@ def test_post_valid_json(self, client, tmp_dirs): assert saved["estimate_metadata"]["estimation_result_uuid"] == body["id"] assert "estimation_result_timestamp" in saved["estimate_metadata"] + def test_post_valid_json_with_uuid_header(self, client, tmp_dirs): + """A valid X-UUID header should be used as the persisted estimate id.""" + estimate_uuid = "12345678-1234-1234-1234-123456789abc" + resp = client.post("/api/ingest/estimate", + data=b'{"code":"est-test"}', + headers={"X-API-Key": API_KEY, + "X-UUID": estimate_uuid, + "Content-Type": "application/json"}) + + assert resp.status_code == 200 + body = resp.get_json() + assert body["id"] == estimate_uuid + assert estimate_uuid in body["json_file"] + + estimated = tmp_dirs[3] + files = os.listdir(estimated) + assert files == [body["json_file"]] + + @pytest.mark.parametrize("header_value", [ + "../../../etc/passwd_evil", + "..%2F..%2Fetc%2Fpasswd", + "not-a-uuid", + "", + ]) + def test_rejects_invalid_uuid_header(self, client, header_value): + """X-UUID must be a canonical UUID before it reaches file naming.""" + resp = client.post("/api/ingest/estimate", + data=b'{}', + headers={"X-API-Key": API_KEY, + "X-UUID": header_value, + "Content-Type": "application/json"}) + assert resp.status_code == 400 + def test_missing_api_key_returns_401(self, client): resp = client.post("/api/ingest/estimate", data=b'{}', @@ -241,6 +274,25 @@ def test_missing_file_returns_400(self, client): content_type="multipart/form-data") assert resp.status_code == 400 + @pytest.mark.parametrize("timestamp", [ + "../bad", + "20250101", + "2025/01/01_12:00:00", + "", + ]) + def test_rejects_invalid_timestamp(self, client, timestamp): + """PA Data timestamps must match YYYYMMDD_HHMMSS before file naming.""" + data = { + "id": "12345678-1234-1234-1234-123456789abc", + "timestamp": timestamp, + "file": (io.BytesIO(b"data"), "test.tgz"), + } + resp = client.post("/api/ingest/padata", + data=data, + headers={"X-API-Key": API_KEY}, + content_type="multipart/form-data") + assert resp.status_code == 400 + def test_missing_api_key_returns_401(self, client): resp = client.post("/api/ingest/padata", data={"id": "x", "timestamp": "t"}, @@ -524,4 +576,3 @@ def test_query_estimation_inputs_returns_archive(self, client, tmp_dirs): with tarfile.open(fileobj=archive_bytes, mode="r:gz") as tar: names = tar.getnames() assert "compute_solver_papi.tgz" in names - diff --git a/result_server/tests/test_csrf.py b/result_server/tests/test_csrf.py index 55520e0..d8f4968 100644 --- a/result_server/tests/test_csrf.py +++ b/result_server/tests/test_csrf.py @@ -13,7 +13,7 @@ install_portal_test_stubs() -API_KEY = "test-api-key-12345" +API_KEY = "test-api-key-12345678901234567890" class _Store: diff --git a/result_server/tests/test_preflight.py b/result_server/tests/test_preflight.py new file mode 100644 index 0000000..76694a6 --- /dev/null +++ b/result_server/tests/test_preflight.py @@ -0,0 +1,41 @@ +"""Tests for production preflight secret validation.""" + +import os +import sys + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from utils.preflight import validate_ingest_keys, validate_production_config + + +def test_rejects_short_ingest_keys(): + errors = validate_ingest_keys({"short-key": "runner-a"}) + + assert any("at least 32 characters" in error for error in errors) + + +def test_rejects_known_insecure_ingest_defaults(): + errors = validate_ingest_keys({"dev-api-key": "runner-a"}) + + assert any("known-insecure default" in error for error in errors) + + +def test_accepts_parallel_rotation_keys_for_same_runner(): + old_key = "old-runner-key-01234567890123456789" + new_key = "new-runner-key-01234567890123456789" + + assert validate_ingest_keys({old_key: "runner-a", new_key: "runner-a"}) == [] + + +def test_rejects_short_flask_secret_key(): + env = {"FLASK_SECRET_KEY": "short"} + errors = validate_production_config(env, {"runner-key-012345678901234567890": "runner-a"}) + + assert any("FLASK_SECRET_KEY must be at least 32 characters" in error for error in errors) + + +def test_accepts_strong_production_config(): + env = {"FLASK_SECRET_KEY": "flask-secret-012345678901234567890"} + ingest_keys = {"runner-key-012345678901234567890": "runner-a"} + + assert validate_production_config(env, ingest_keys) == [] diff --git a/result_server/tests/test_security_metadata.py b/result_server/tests/test_security_metadata.py new file mode 100644 index 0000000..770d74f --- /dev/null +++ b/result_server/tests/test_security_metadata.py @@ -0,0 +1,46 @@ +"""Tests for security metadata routes.""" + +import os +import sys + +from flask import Flask + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from routes.security_metadata import register_security_metadata_routes + + +def test_security_txt_route(): + app = Flask(__name__) + register_security_metadata_routes(app) + + resp = app.test_client().get("/.well-known/security.txt") + + assert resp.status_code == 200 + assert resp.mimetype == "text/plain" + text = resp.get_data(as_text=True) + assert "Contact: https://github.com/RIKEN-RCCS/benchkit/security/advisories/new" in text + assert "mailto:" not in text + assert "Expires: 2027-05-19T00:00:00Z" in text + assert "Policy: https://github.com/RIKEN-RCCS/benchkit/blob/main/SECURITY.md" in text + + +def test_robots_txt_route(): + app = Flask(__name__) + register_security_metadata_routes(app) + + resp = app.test_client().get("/robots.txt") + + assert resp.status_code == 200 + text = resp.get_data(as_text=True) + assert "Disallow: /admin/" in text + assert "Disallow: /auth/" in text + + +def test_security_metadata_routes_allow_prefix(): + app = Flask(__name__) + register_security_metadata_routes(app, prefix="/dev") + + resp = app.test_client().get("/dev/.well-known/security.txt") + + assert resp.status_code == 200 diff --git a/result_server/utils/preflight.py b/result_server/utils/preflight.py new file mode 100644 index 0000000..723f5e1 --- /dev/null +++ b/result_server/utils/preflight.py @@ -0,0 +1,59 @@ +"""Production preflight checks for result_server secrets.""" + +from __future__ import annotations + +from collections.abc import Mapping + + +MIN_SECRET_LENGTH = 32 + +FORBIDDEN_DEFAULTS = { + "FLASK_SECRET_KEY": frozenset({ + "", + "changeme", + "dev-secret-key", + "password", + "secret", + }), + "RESULT_SERVER_KEY": frozenset({"", "dev-api-key", "changeme", "key", "secret"}), +} + + +def _validate_secret(name: str, value: str | None) -> list[str]: + """Return validation errors for a secret-like configuration value.""" + errors: list[str] = [] + forbidden = FORBIDDEN_DEFAULTS.get(name, frozenset({""})) + normalized = value.strip().lower() if value else "" + if not value: + errors.append(f"{name} is not set") + elif normalized in forbidden: + errors.append(f"{name} is set to a known-insecure default") + elif len(value) < MIN_SECRET_LENGTH: + errors.append(f"{name} must be at least {MIN_SECRET_LENGTH} characters") + return errors + + +def validate_ingest_keys(ingest_keys: Mapping[str, str]) -> list[str]: + """Return validation errors for runner-scoped ingest keys.""" + errors: list[str] = [] + if not ingest_keys: + return ["RESULT_SERVER_KEYS or RESULT_SERVER_KEY is not set"] + + for key, runner_id in ingest_keys.items(): + errors.extend(_validate_secret("RESULT_SERVER_KEY", key)) + if not runner_id: + errors.append("RESULT_SERVER_KEYS contains an empty runner id") + + return errors + + +def validate_production_config( + env: Mapping[str, str], + ingest_keys: Mapping[str, str], +) -> list[str]: + """Return production startup errors for insecure result_server config.""" + errors = _validate_secret("FLASK_SECRET_KEY", env.get("FLASK_SECRET_KEY")) + errors.extend(validate_ingest_keys(ingest_keys)) + if env.get("FLASK_DEBUG") in {"1", "true", "True"}: + errors.append("FLASK_DEBUG must not be enabled for app.py") + return errors