diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 17b42bc..651bae1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,14 +24,15 @@ concurrency: permissions: contents: read - pull-requests: write - checks: write jobs: # Detect which packages have changed detect-changes: name: Detect Changed Packages runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: read outputs: cloudflare-auth: ${{ steps.filter.outputs.cloudflare-auth }} gcs-utilities: ${{ steps.filter.outputs.gcs-utilities }} @@ -41,6 +42,11 @@ jobs: steps.filter.outputs.gcs-utilities == 'true' || steps.filter.outputs.shared == 'true' }} steps: + - name: Harden the runner + uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450 # v2.19.1 + with: + egress-policy: audit + - name: Checkout repository uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 @@ -65,11 +71,18 @@ jobs: needs: detect-changes if: needs.detect-changes.outputs.cloudflare-auth == 'true' || needs.detect-changes.outputs.shared == 'true' runs-on: ubuntu-latest + permissions: + contents: read strategy: fail-fast: false matrix: python-version: ['3.10', '3.11', '3.12', '3.13'] steps: + - name: Harden the runner + uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450 # v2.19.1 + with: + egress-policy: audit + - name: Checkout repository uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 @@ -113,11 +126,18 @@ jobs: needs: detect-changes if: needs.detect-changes.outputs.gcs-utilities == 'true' || needs.detect-changes.outputs.shared == 'true' runs-on: ubuntu-latest + permissions: + contents: read strategy: fail-fast: false matrix: python-version: ['3.10', '3.11', '3.12', '3.13'] steps: + - name: Harden the runner + uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450 # v2.19.1 + with: + egress-policy: audit + - name: Checkout repository uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 @@ -161,7 +181,14 @@ jobs: needs: detect-changes if: needs.detect-changes.outputs.any-package == 'true' runs-on: ubuntu-latest + permissions: + contents: read steps: + - name: Harden the runner + uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450 # v2.19.1 + with: + egress-policy: audit + - name: Checkout repository uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 @@ -186,7 +213,14 @@ jobs: needs: [test-cloudflare-auth, test-gcs-utilities] if: always() && (needs.test-cloudflare-auth.result == 'success' || needs.test-gcs-utilities.result == 'success') runs-on: ubuntu-latest + permissions: + contents: read steps: + - name: Harden the runner + uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450 # v2.19.1 + with: + egress-policy: audit + - name: Checkout repository uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 @@ -212,7 +246,14 @@ jobs: runs-on: ubuntu-latest needs: [detect-changes, test-cloudflare-auth, test-gcs-utilities, security, coverage] if: always() + permissions: + contents: read steps: + - name: Harden the runner + uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450 # v2.19.1 + with: + egress-policy: audit + - name: Check all jobs env: NEEDS_JSON: ${{ toJSON(needs) }} diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml index 0ea96a6..07c56cd 100644 --- a/.github/workflows/codecov.yml +++ b/.github/workflows/codecov.yml @@ -23,7 +23,7 @@ jobs: name: Upload Coverage # Only run on successful CI completion if: ${{ github.event.workflow_run.conclusion == 'success' }} - uses: ByronWilliamsCPA/.github/.github/workflows/python-codecov.yml@main + uses: ByronWilliamsCPA/.github/.github/workflows/python-codecov.yml@e8fc83c98c2971ad1ece71573d28171463e30c16 # main with: artifact-name: 'coverage-reports' coverage-files: '*.xml' @@ -37,7 +37,14 @@ jobs: name: Report CI Failure runs-on: ubuntu-latest if: ${{ github.event.workflow_run.conclusion == 'failure' }} + permissions: + contents: read steps: + - name: Harden the runner + uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450 # v2.19.1 + with: + egress-policy: audit + - name: Report status run: | echo "## Coverage Upload Skipped" >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 2ab1b88..2fcb078 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -23,7 +23,7 @@ jobs: upload-coverage: name: Upload Coverage to Qlty if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' }} - uses: ByronWilliamsCPA/.github/.github/workflows/python-qlty-coverage.yml@main + uses: ByronWilliamsCPA/.github/.github/workflows/python-qlty-coverage.yml@e8fc83c98c2971ad1ece71573d28171463e30c16 # main with: coverage-artifact-name: coverage-reports coverage-file-path: coverage.xml diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml index 737072b..aed6604 100644 --- a/.github/workflows/dependency-review.yml +++ b/.github/workflows/dependency-review.yml @@ -13,13 +13,20 @@ on: permissions: contents: read - pull-requests: write jobs: dependency-review: name: Dependency Review runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write steps: + - name: Harden the runner + uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450 # v2.19.1 + with: + egress-policy: audit + - name: Checkout repository uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index bce78d9..a88804e 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -23,13 +23,15 @@ on: workflow_dispatch: permissions: - contents: write - pages: write - id-token: write + contents: read jobs: docs: - uses: ByronWilliamsCPA/.github/.github/workflows/python-docs.yml@main + permissions: + contents: write + pages: write + id-token: write + uses: ByronWilliamsCPA/.github/.github/workflows/python-docs.yml@e8fc83c98c2971ad1ece71573d28171463e30c16 # main with: python-version: '3.12' docs-directory: 'docs' diff --git a/.github/workflows/fips-compatibility.yml b/.github/workflows/fips-compatibility.yml index a7a6b2a..c17c94d 100644 --- a/.github/workflows/fips-compatibility.yml +++ b/.github/workflows/fips-compatibility.yml @@ -48,11 +48,13 @@ concurrency: permissions: contents: read - pull-requests: write jobs: fips-check: - uses: ByronWilliamsCPA/.github/.github/workflows/python-fips-compatibility.yml@main + permissions: + contents: read + pull-requests: write + uses: ByronWilliamsCPA/.github/.github/workflows/python-fips-compatibility.yml@e8fc83c98c2971ad1ece71573d28171463e30c16 # main with: strict-mode: ${{ github.event.inputs.strict_mode == 'true' }} include-tests: true diff --git a/.github/workflows/publish-artifact-registry.yml b/.github/workflows/publish-artifact-registry.yml index 9be0d53..2bd8982 100644 --- a/.github/workflows/publish-artifact-registry.yml +++ b/.github/workflows/publish-artifact-registry.yml @@ -48,7 +48,6 @@ concurrency: permissions: contents: read - id-token: write env: UV_VERSION: "0.5.x" @@ -58,7 +57,15 @@ jobs: publish: name: Build and Publish runs-on: ubuntu-latest + permissions: + contents: read + id-token: write steps: + - name: Harden the runner + uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450 # v2.19.1 + with: + egress-policy: audit + - name: Checkout repository uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 with: diff --git a/.github/workflows/python-compatibility.yml b/.github/workflows/python-compatibility.yml index a8a3845..0784e15 100644 --- a/.github/workflows/python-compatibility.yml +++ b/.github/workflows/python-compatibility.yml @@ -34,7 +34,7 @@ permissions: jobs: compatibility: - uses: ByronWilliamsCPA/.github/.github/workflows/python-compatibility.yml@main + uses: ByronWilliamsCPA/.github/.github/workflows/python-compatibility.yml@e8fc83c98c2971ad1ece71573d28171463e30c16 # main with: python-versions: '["3.10", "3.11", "3.12", "3.13"]' operating-systems: '["ubuntu-latest"]' diff --git a/.github/workflows/qlty.yml b/.github/workflows/qlty.yml index 9efc974..127338c 100644 --- a/.github/workflows/qlty.yml +++ b/.github/workflows/qlty.yml @@ -15,7 +15,7 @@ concurrency: jobs: qlty: if: ${{ github.event.workflow_run.conclusion == 'success' }} - uses: ByronWilliamsCPA/.github/.github/workflows/python-qlty-coverage.yml@main + uses: ByronWilliamsCPA/.github/.github/workflows/python-qlty-coverage.yml@e8fc83c98c2971ad1ece71573d28171463e30c16 # main permissions: contents: read actions: read diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 25d4044..8c55a4c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -35,16 +35,21 @@ concurrency: cancel-in-progress: false permissions: - contents: write - issues: write - pull-requests: write + contents: read # Standalone release workflow jobs: test: name: Pre-Release Tests runs-on: ubuntu-latest + permissions: + contents: read steps: + - name: Harden the runner + uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450 # v2.19.1 + with: + egress-policy: audit + - name: Checkout repository uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 with: @@ -68,7 +73,16 @@ jobs: name: Semantic Release needs: test runs-on: ubuntu-latest + permissions: + contents: write + issues: write + pull-requests: write steps: + - name: Harden the runner + uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450 # v2.19.1 + with: + egress-policy: audit + - name: Checkout repository uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 with: diff --git a/.github/workflows/sbom.yml b/.github/workflows/sbom.yml index bd48ad6..c56a61a 100644 --- a/.github/workflows/sbom.yml +++ b/.github/workflows/sbom.yml @@ -30,12 +30,14 @@ on: permissions: contents: read - security-events: write jobs: sbom: name: SBOM & Security - uses: ByronWilliamsCPA/.github/.github/workflows/python-sbom.yml@main + permissions: + contents: read + security-events: write + uses: ByronWilliamsCPA/.github/.github/workflows/python-sbom.yml@e8fc83c98c2971ad1ece71573d28171463e30c16 # main with: python-version: '3.12' fail-on-vulnerabilities: true diff --git a/.github/workflows/slsa-provenance.yml b/.github/workflows/slsa-provenance.yml index 59c1422..cd4ad2f 100644 --- a/.github/workflows/slsa-provenance.yml +++ b/.github/workflows/slsa-provenance.yml @@ -22,10 +22,7 @@ on: type: string permissions: - contents: write - id-token: write - actions: read - attestations: write + contents: read jobs: # ========================================================================== @@ -35,6 +32,11 @@ jobs: name: Build Package runs-on: ubuntu-latest if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' }} + permissions: + contents: read + id-token: write + actions: read + attestations: write outputs: hashes: ${{ steps.hashes.outputs.hashes }} version: ${{ steps.version.outputs.version }} @@ -98,7 +100,7 @@ jobs: slsa: name: SLSA Level 3 needs: [build] - uses: ByronWilliamsCPA/.github/.github/workflows/python-slsa.yml@main + uses: ByronWilliamsCPA/.github/.github/workflows/python-slsa.yml@e8fc83c98c2971ad1ece71573d28171463e30c16 # main with: base64-subjects: ${{ needs.build.outputs.hashes }} upload-assets: true diff --git a/CHANGELOG.md b/CHANGELOG.md index f429098..bdcc44e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,36 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Security + +- Hardened `CloudflareJWTValidator`: enforce an allowlist of asymmetric + JWT algorithms (`RS*`/`ES*`/`PS*`) at construction and against the + unverified token header, rejecting `none` and HS* to block + algorithm-confusion attacks. +- JWT decode now explicitly enables `verify_signature`, `verify_exp`, + `verify_nbf`, `verify_iat`, and a `require` set for `exp`/`iat`/ + `iss`/`sub`/`aud`. Added 30s leeway for clock skew. Removed the + `verify_exp=False` keyword from `validate_token` / + `validate_token_async`. +- Replaced blanket `except Exception` in JWT validation with specific + PyJWT exception handlers; `get_unverified_claims` now logs a warning + on every call. +- Hardened `GCSClient._sanitize_gcs_path`: reject control characters, + backslashes, and `.`/`..` segments. Reject null bytes in local paths. +- `GCSClient.upload_directory` re-sanitizes the joined GCS path so + unusual filenames cannot reintroduce traversal sequences. +- `GCSClient.delete_directory` rejects empty / whitespace-only prefixes + to prevent accidental bucket-wide wipes. +- Pinned all `ByronWilliamsCPA/.github` reusable-workflow references in + `.github/workflows/*.yml` to a commit SHA instead of `@main`. +- Reduced workflow-level `permissions:` to `contents: read` and granted + elevated rights only to the jobs that need them. +- Added `step-security/harden-runner` with `egress-policy: audit` to + every workflow job that runs inline steps. +- Added `SECURITY-NOTES.md` documenting the unbounded `>=` dependency + ranges and the three highest-risk dependencies (`pyjwt`, + `cryptography`, `google-cloud-storage`). + ### Documentation - Filled in remaining missing docstrings to reach 100% interrogate coverage diff --git a/SECURITY-NOTES.md b/SECURITY-NOTES.md new file mode 100644 index 0000000..caef191 --- /dev/null +++ b/SECURITY-NOTES.md @@ -0,0 +1,140 @@ +# Security Notes + + + +This document records security-relevant observations about the project that +do not fit into `SECURITY.md` (vulnerability reporting policy) but should be +visible to maintainers and downstream consumers. + +## Dependency Version Floors Are Unbounded + +Every production and dev dependency in `pyproject.toml` (root) and the +per-package `packages/*/pyproject.toml` files uses a bare `>=` floor with +**no upper bound**. Examples: + +```toml +"pyjwt>=2.8.0", +"cryptography>=41.0.0", +"google-cloud-storage>=2.10.0", +``` + +Because there is no `=` ranges are what downstream consumers see when they +install `byronwilliamscpa-cloudflare-auth` etc. into their own projects. + +**Recommendation:** add an upper bound (e.g. `>=2.8.0,<3.0.0`) to every +runtime dependency, especially the three listed below. Update the bound +deliberately after testing each new major release. + +## Three Highest-Risk Dependencies + +The risk ranking below considers (a) the blast radius if the dependency +ships a behaviour-changing release and (b) the security sensitivity of +the code path that uses it. All three are direct runtime dependencies of +production code paths. + +### 1. `pyjwt>=2.8.0` — JWT signing & validation + +- **Where used:** `packages/cloudflare-auth/src/cloudflare_auth/validators.py` + (`CloudflareJWTValidator.validate_token`) — the only authentication + gate in front of every protected route that uses `cloudflare-auth`. +- **Why high risk:** + - PyJWT has historically changed validation defaults across releases + (e.g. `options` keys, `require` semantics, audience matching, + handling of the `kid` header). A silent major upgrade could weaken + signature, expiry, or audience enforcement in ways that are not + obvious from a passing test suite if tests stub the signing key. + - The library accepts an `algorithms=` allowlist. If a future version + changes how that list is interpreted (e.g. treats an empty list + differently, or alters how `none` is rejected), it could enable + algorithm-confusion or signature-stripping attacks. + - Bugs in PyJWT itself — for example, CVE-2022-29217 (key-confusion + between symmetric and asymmetric algorithms) — have shipped in + point releases. We must be able to pin to a known-good range and + upgrade deliberately. +- **Suggested constraint:** `"pyjwt>=2.8.0,<3.0.0"`. + +### 2. `cryptography>=41.0.0` — Asymmetric key handling, TLS primitives + +- **Where used:** transitively by `PyJWKClient`/`pyjwt` to verify RSA + and EC signatures on Cloudflare Access tokens; also pulled in by + `httpx`/`google-cloud-storage` for TLS. Declared explicitly as a + direct dependency of `cloudflare-auth`. +- **Why high risk:** + - `cryptography` releases drop deprecated APIs and algorithms on a + regular cadence (e.g. removal of `Blowfish`/`CAST5`, narrowing of + `BACKEND` parameters). A major bump can break signature + verification or downgrade FIPS-mode compatibility — a hard + requirement called out in `CLAUDE.md`. + - It is the substrate behind nearly every crypto-sensitive operation + in the project. Any vulnerability here (e.g. CVE-2023-49083 in + `cryptography==41.0.x`) can directly compromise JWT validation + and TLS endpoint verification. + - The bundled OpenSSL is rebuilt with each minor release. A + surprise minor bump in transitive resolution can change the + OpenSSL FIPS provider's behaviour. +- **Suggested constraint:** `"cryptography>=42.0.0,<46.0.0"` (or + whatever current stable line your FIPS validation has been performed + against), and pin to the same constraint in every `packages/*` + `pyproject.toml` that depends on it. + +### 3. `google-cloud-storage>=2.10.0` — Data plane for all GCS operations + +- **Where used:** `packages/gcs-utilities/src/gcs_utilities/client.py` + (`GCSClient` — upload, download, list, delete, including + `delete_directory`). +- **Why high risk:** + - The 2.x line has already changed authentication default behaviour + around Application Default Credentials and `GOOGLE_APPLICATION_CREDENTIALS` + handling between minor releases. A major bump (3.x) could alter + how service-account JSON files are discovered or how retries are + performed, leading to either credential leaks or silent + cross-account access in misconfigured environments. + - `GCSClient` writes the decoded service-account key to a temporary + file with `0o600` permissions; any change in how + `google-cloud-storage` resolves credentials (e.g. preferring + workload identity in some contexts) could cause that file to be + bypassed without warning. + - Bucket listing, deletion, and signed-URL generation all live in + this SDK. A regression in pagination or filtering could surface + objects from neighbouring buckets, and a regression in + `Blob.exists()` semantics could break the + "exists-before-delete" guard in `GCSClient.delete_file`. +- **Suggested constraint:** `"google-cloud-storage>=2.10.0,<3.0.0"` + in `packages/gcs-utilities/pyproject.toml` (and the matching dev + dependency in the root `pyproject.toml`). + +## Honourable Mentions + +These are also unbounded and worth pinning, but rank below the top three: + +| Package | Risk | +|---|---| +| `pydantic>=2.0.0` | v1→v2 was famously breaking; another major bump could change settings/validator semantics. | +| `fastapi>=0.100.0` / `starlette>=0.27.0` | Auth middleware contract has shifted between minors; a major bump can change exception flow used by `cloudflare-auth` middleware. | +| `redis>=5.0.0` | Session store; major bumps have changed connection-pool defaults and TLS verification. | +| `httpx>=0.25.0` | Transport for the JWKS endpoint; HTTP/2 and proxy defaults have shifted between releases. | + +## Related Hardening Performed in This Branch + +- `CloudflareJWTValidator` now refuses unsafe algorithms (`none`, `HS*`) + at construction time and explicitly enforces `verify_signature`, + `verify_exp`, `verify_nbf`, `verify_iat`, and a `require` set for + `exp`, `iat`, `iss`, `sub`, `aud` on every `jwt.decode` call. +- `GCSClient._sanitize_gcs_path` now rejects control characters, + backslashes, and `.`/`..` segments. +- `GCSClient.delete_directory` rejects empty prefixes to prevent + accidental bucket-wide wipes. +- All `.github/workflows/*.yml` reusable-workflow references are pinned + to a commit SHA rather than `@main`, and `step-security/harden-runner` + with `egress-policy: audit` has been added to every job that runs + inline steps. diff --git a/packages/cloudflare-auth/src/cloudflare_auth/validators.py b/packages/cloudflare-auth/src/cloudflare_auth/validators.py index 4cb1cd4..d3638fd 100644 --- a/packages/cloudflare-auth/src/cloudflare_auth/validators.py +++ b/packages/cloudflare-auth/src/cloudflare_auth/validators.py @@ -34,12 +34,25 @@ import jwt from jwt import PyJWKClient +from jwt.exceptions import PyJWKClientError +from pydantic import ValidationError from cloudflare_auth.config import CloudflareSettings, get_cloudflare_settings from cloudflare_auth.models import CloudflareJWTClaims logger = logging.getLogger(__name__) +# Allowlist of safe asymmetric JWT algorithms. Cloudflare Access signs with +# RS256; symmetric algorithms (HS*) and "none" are rejected to prevent +# algorithm-confusion and signature-stripping attacks where an attacker +# crafts a token using the public key (treated as HMAC secret) or no signature. +_ALLOWED_JWT_ALGORITHMS: frozenset[str] = frozenset( + {"RS256", "RS384", "RS512", "ES256", "ES384", "ES512", "PS256", "PS384", "PS512"} +) + +# Maximum tolerated clock skew between issuer and verifier (seconds). +_JWT_LEEWAY_SECONDS: int = 30 + class CloudflareJWTValidator: """Validates JWT tokens from Cloudflare Access. @@ -69,9 +82,23 @@ def __init__(self, settings: CloudflareSettings | None = None) -> None: Args: settings: Optional CloudflareSettings instance (uses default if not provided) + + Raises: + ValueError: If configured ``jwt_algorithm`` is not in the + allowlist of safe asymmetric algorithms. """ self.settings = settings or get_cloudflare_settings() + # #CRITICAL: Security: reject unsafe algorithms at construction time. + # If misconfigured to "none" or an HS* algorithm, an attacker could + # forge tokens or bypass signature verification entirely. + if self.settings.jwt_algorithm not in _ALLOWED_JWT_ALGORITHMS: + msg = ( + f"Unsupported JWT algorithm: {self.settings.jwt_algorithm!r}. " + f"Allowed algorithms: {sorted(_ALLOWED_JWT_ALGORITHMS)}" + ) + raise ValueError(msg) + if not self.settings.cloudflare_team_domain: logger.warning( "Cloudflare team domain not configured. JWT validation will fail." @@ -89,23 +116,26 @@ def __init__(self, settings: CloudflareSettings | None = None) -> None: self._last_key_refresh: datetime | None = None - def validate_token( + def validate_token( # noqa: C901, PLR0912 -- per-PyJWT-exception handlers are flat and clearer than nesting self, token: str, - verify_exp: bool = True, ) -> CloudflareJWTClaims: """Validate a Cloudflare Access JWT token. This method performs comprehensive validation: 1. Signature verification using Cloudflare's public keys - 2. Expiration time validation - 3. Issuer validation - 4. Audience validation - 5. Required claims presence + 2. Algorithm enforcement (asymmetric only, allowlist) + 3. Expiration (``exp``) and not-before (``nbf``) checking + 4. Issued-at (``iat``) sanity checking + 5. Issuer validation + 6. Audience validation + 7. Required claims presence + + Signature verification, expiration, and required-claim checks + cannot be disabled by callers. Args: token: JWT token string from Cf-Access-Jwt-Assertion header - verify_exp: Whether to verify token expiration (default: True) Returns: CloudflareJWTClaims object with validated claims @@ -136,25 +166,46 @@ def validate_token( msg = "JWT validator not configured. Set CLOUDFLARE_TEAM_DOMAIN." raise RuntimeError(msg) + # #CRITICAL: Security: an unset audience tag means PyJWT skips + # audience verification entirely, which would let any token + # signed by this issuer pass -- not only tokens minted for this + # application. Refuse to validate at all in that case. + if not self.settings.cloudflare_audience_tag: + msg = ( + "JWT validator misconfigured: cloudflare_audience_tag must " + "be set so PyJWT can enforce the audience claim." + ) + raise RuntimeError(msg) + try: # Get the signing key from the JWT header signing_key = self.jwks_client.get_signing_key_from_jwt(token) - # Decode and validate the token + # Decode and validate the token. All security-relevant options + # are explicit so future PyJWT default changes cannot silently + # weaken validation. The ``algorithms`` argument is fixed to + # the constructor-validated value, so PyJWT will reject any + # token whose header advertises an algorithm outside the + # asymmetric allowlist. payload = jwt.decode( token, signing_key.key, algorithms=[self.settings.jwt_algorithm], audience=self.settings.cloudflare_audience_tag, issuer=self.settings.issuer, + leeway=_JWT_LEEWAY_SECONDS, options={ - "verify_exp": verify_exp, - "verify_aud": bool(self.settings.cloudflare_audience_tag), - "verify_iss": bool(self.settings.issuer), + "verify_signature": True, + "verify_exp": True, + "verify_nbf": True, + "verify_iat": True, + "verify_aud": True, + "verify_iss": True, + "require": ["exp", "iat", "iss", "sub", "aud"], }, ) - # Validate required claims + # Validate required claims (defence-in-depth on top of "require") self._validate_required_claims(payload) # Create and return claims object @@ -182,6 +233,16 @@ def validate_token( msg = "Token has expired" raise ValueError(msg) from e + except jwt.ImmatureSignatureError as e: + logger.warning("JWT token not yet valid: %s", str(e)) + msg = "Token is not yet valid (nbf)" + raise ValueError(msg) from e + + except jwt.MissingRequiredClaimError as e: + logger.warning("JWT missing required claim: %s", str(e)) + msg = f"Missing required claim: {e!s}" + raise ValueError(msg) from e + except jwt.InvalidAudienceError as e: logger.warning("Invalid JWT audience: %s", str(e)) msg = "Invalid token audience" @@ -192,6 +253,11 @@ def validate_token( msg = "Invalid token issuer" raise ValueError(msg) from e + except jwt.InvalidAlgorithmError as e: + logger.warning("Invalid JWT algorithm: %s", str(e)) + msg = "Invalid token algorithm" + raise ValueError(msg) from e + except jwt.InvalidSignatureError as e: logger.warning("Invalid JWT signature: %s", str(e)) msg = "Invalid token signature" @@ -202,9 +268,29 @@ def validate_token( msg = "Invalid token format" raise ValueError(msg) from e - except Exception as e: - logger.exception("Unexpected error validating JWT: %s", str(e)) - msg = f"Token validation failed: {e!s}" + except jwt.InvalidTokenError as e: + # Catch-all for any other PyJWT validation failure rather than + # a blanket ``except Exception`` (which would mask programmer + # errors and ruff BLE001). + logger.warning("JWT validation failed: %s", str(e)) + msg = "Token validation failed" + raise ValueError(msg) from e + + except PyJWKClientError as e: + # Raised by ``get_signing_key_from_jwt`` when the ``kid`` is + # missing, unknown to the JWKS endpoint, or the JWKS fetch + # fails. Treat as an authentication failure so the middleware + # returns 401 rather than 500. + logger.warning("JWKS lookup failed: %s", str(e)) + msg = "Could not resolve JWT signing key" + raise ValueError(msg) from e + + except ValidationError as e: + # ``CloudflareJWTClaims(**payload)`` rejected the decoded + # claims (e.g. malformed ``email``). Surface as an auth + # failure rather than a 500. + logger.warning("JWT claims failed schema validation: %s", str(e)) + msg = "Invalid token claims" raise ValueError(msg) from e def _validate_required_claims(self, payload: dict[str, Any]) -> None: @@ -227,7 +313,6 @@ def _validate_required_claims(self, payload: dict[str, Any]) -> None: async def validate_token_async( self, token: str, - verify_exp: bool = True, ) -> CloudflareJWTClaims: """Async version of validate_token. @@ -237,7 +322,6 @@ async def validate_token_async( Args: token: JWT token string - verify_exp: Whether to verify token expiration Returns: CloudflareJWTClaims object with validated claims @@ -247,7 +331,7 @@ async def validate_token_async( """ # JWT validation is CPU-bound, not I/O bound # But we provide async interface for consistency - return self.validate_token(token, verify_exp=verify_exp) + return self.validate_token(token) def refresh_keys(self) -> None: """Force refresh of cached public keys. @@ -284,27 +368,31 @@ def is_configured(self) -> bool: ) def get_unverified_claims(self, token: str) -> dict[str, Any]: - """Get claims from token without verification. + """Get claims from token without verification. DEBUG USE ONLY. + + WARNING: This method does NOT verify the token signature, + expiration, issuer, audience, or any other claim. The returned + data MUST NOT be used for authentication, authorization, or any + security-relevant decision. Use ``validate_token`` instead. - WARNING: This method does NOT verify the token signature. - Only use for debugging or non-security-critical inspection. + A warning is logged on every call so misuse is visible in + production logs. Args: token: JWT token string Returns: - Dictionary of unverified claims - - Example: - # For debugging only - claims = validator.get_unverified_claims(token) - print(f"Token issued for: {claims.get('email')}") + Dictionary of unverified claims, or empty dict on parse error. """ + logger.warning( + "get_unverified_claims() called - claims are NOT verified and " + "MUST NOT be trusted for any security decision." + ) try: return jwt.decode( token, options={"verify_signature": False}, ) - except Exception as e: - logger.exception("Failed to decode token: %s", str(e)) + except jwt.InvalidTokenError as e: + logger.warning("Failed to decode unverified token: %s", str(e)) return {} diff --git a/packages/cloudflare-auth/tests/test_validators.py b/packages/cloudflare-auth/tests/test_validators.py new file mode 100644 index 0000000..674e5df --- /dev/null +++ b/packages/cloudflare-auth/tests/test_validators.py @@ -0,0 +1,259 @@ +"""Tests for CloudflareJWTValidator security hardening. + +These tests cover behaviour added in the security-hardening pass: +- Constructor algorithm allowlist (rejects ``none``, HS*). +- Required-claim validation. +- ``get_unverified_claims`` warning + safe fallback. +- Configuration property (``is_configured``). +- Configured-but-unconfigured-domain warning path. +""" + +import logging +from unittest.mock import MagicMock, patch + +import pytest + +from cloudflare_auth.config import CloudflareSettings +from cloudflare_auth.validators import ( + _ALLOWED_JWT_ALGORITHMS, + CloudflareJWTValidator, +) + + +def _settings(**overrides) -> CloudflareSettings: + """Build a CloudflareSettings instance with a populated team domain.""" + defaults = { + "cloudflare_team_domain": "myteam", + "cloudflare_audience_tag": "aud-123", + } + defaults.update(overrides) + return CloudflareSettings(**defaults) + + +class TestAlgorithmAllowlist: + """Constructor must reject unsafe / non-asymmetric algorithms.""" + + @pytest.mark.parametrize( + "alg", + ["RS256", "RS384", "RS512", "ES256", "ES384", "ES512", "PS256"], + ) + def test_accepts_asymmetric_algorithms(self, alg): + """Each algorithm in the allowlist must be accepted at construction.""" + assert alg in _ALLOWED_JWT_ALGORITHMS + v = CloudflareJWTValidator(_settings(jwt_algorithm=alg)) + assert v.settings.jwt_algorithm == alg + + @pytest.mark.parametrize( + "alg", + ["none", "None", "NONE", "HS256", "HS384", "HS512", "", "rs256", "garbage"], + ) + def test_rejects_unsafe_or_unknown_algorithms(self, alg): + """``none``, HS*, lowercase, and unknown values must be rejected.""" + with pytest.raises(ValueError, match="Unsupported JWT algorithm"): + CloudflareJWTValidator(_settings(jwt_algorithm=alg)) + + +class TestConstructorConfigState: + """Validator captures configuration state and warns when incomplete.""" + + def test_missing_team_domain_warns_and_jwks_client_none(self, caplog): + """Without a team domain, JWKS client is None and a warning is logged.""" + caplog.set_level(logging.WARNING) + v = CloudflareJWTValidator( + CloudflareSettings( + cloudflare_team_domain="", + cloudflare_audience_tag="aud", + ) + ) + assert v.jwks_client is None + assert any( + "team domain not configured" in record.message.lower() + for record in caplog.records + ) + + def test_is_configured_true_when_complete(self): + """``is_configured`` is True only when all required settings are present.""" + v = CloudflareJWTValidator(_settings()) + assert v.is_configured is True + + def test_is_configured_false_without_audience(self): + """Missing audience tag means the validator is not fully configured.""" + v = CloudflareJWTValidator(_settings(cloudflare_audience_tag="")) + assert v.is_configured is False + + +class TestValidateTokenWithoutJWKS: + """``validate_token`` raises RuntimeError when configuration is incomplete.""" + + def test_validate_token_raises_runtime_error_when_unconfigured(self): + """Calling validate_token without a JWKS client must fail loudly.""" + v = CloudflareJWTValidator( + CloudflareSettings( + cloudflare_team_domain="", + cloudflare_audience_tag="aud", + ) + ) + with pytest.raises(RuntimeError, match="not configured"): + v.validate_token("token.value.here") + + def test_validate_token_raises_when_audience_missing(self): + """Missing audience must abort validation rather than skip aud check.""" + v = CloudflareJWTValidator( + _settings(cloudflare_audience_tag="") + ) + with pytest.raises(RuntimeError, match="cloudflare_audience_tag"): + v.validate_token("token.value.here") + + +class TestRequiredClaimsValidation: + """``_validate_required_claims`` enforces presence of standard claims.""" + + def test_complete_payload_passes(self, sample_jwt_payload): + v = CloudflareJWTValidator(_settings()) + # Should not raise + v._validate_required_claims(sample_jwt_payload) + + @pytest.mark.parametrize( + "missing", + ["email", "iss", "aud", "sub", "iat", "exp"], + ) + def test_missing_claim_raises(self, sample_jwt_payload, missing): + sample_jwt_payload.pop(missing) + v = CloudflareJWTValidator(_settings()) + with pytest.raises(ValueError, match="Missing required JWT claims"): + v._validate_required_claims(sample_jwt_payload) + + +class TestGetUnverifiedClaims: + """``get_unverified_claims`` returns claims, logs a warning, and never raises.""" + + def test_returns_claims_and_logs_warning_for_valid_token( + self, valid_jwt_payload, caplog + ): + """A parseable token returns its claims; a warning is always logged.""" + import jwt as pyjwt + + token = pyjwt.encode(valid_jwt_payload, "secret", algorithm="HS256") + v = CloudflareJWTValidator(_settings()) + + caplog.set_level(logging.WARNING) + claims = v.get_unverified_claims(token) + + assert claims["email"] == valid_jwt_payload["email"] + assert claims["iss"] == valid_jwt_payload["iss"] + assert any( + "get_unverified_claims" in r.message for r in caplog.records + ), "expected warning log on every call to get_unverified_claims" + + def test_returns_empty_dict_on_malformed_token(self, caplog): + """A malformed token must not propagate -- returns ``{}``.""" + v = CloudflareJWTValidator(_settings()) + caplog.set_level(logging.WARNING) + result = v.get_unverified_claims("not.a.real.token") + assert result == {} + + +class TestRefreshKeys: + """``refresh_keys`` replaces the JWKS client without crashing.""" + + def test_refresh_keys_creates_new_client(self): + v = CloudflareJWTValidator(_settings()) + original = v.jwks_client + v.refresh_keys() + assert v.jwks_client is not None + # New PyJWKClient instance, not the same object + assert v.jwks_client is not original + assert v._last_key_refresh is not None + + +class TestValidateTokenDecodeOptions: + """``validate_token`` passes the hardened option set to ``jwt.decode``.""" + + def test_decode_options_enforce_signature_exp_nbf_iat_and_require( + self, valid_jwt_payload + ): + """Verify the security-relevant options are spelled out on every call.""" + v = CloudflareJWTValidator(_settings()) + + fake_signing_key = MagicMock(key="fake-key") + v.jwks_client = MagicMock() + v.jwks_client.get_signing_key_from_jwt.return_value = fake_signing_key + + with patch( + "cloudflare_auth.validators.jwt.decode", + return_value=valid_jwt_payload, + ) as mock_decode: + v.validate_token("fake.jwt.token") + + # ``options`` is passed as a kwarg + call_kwargs = mock_decode.call_args.kwargs + opts = call_kwargs["options"] + assert opts["verify_signature"] is True + assert opts["verify_exp"] is True + assert opts["verify_nbf"] is True + assert opts["verify_iat"] is True + assert "exp" in opts["require"] + assert "iat" in opts["require"] + assert "iss" in opts["require"] + assert "sub" in opts["require"] + assert "aud" in opts["require"] + # ``algorithms`` is constrained to the configured (validated) value + assert call_kwargs["algorithms"] == [v.settings.jwt_algorithm] + # Some leeway is allowed for clock skew + assert call_kwargs["leeway"] > 0 + + +class TestValidateTokenErrorMapping: + """PyJWT-specific exceptions are mapped to ``ValueError`` with a message.""" + + @pytest.mark.parametrize( + ("exc_cls", "expected_substring"), + [ + ("ExpiredSignatureError", "expired"), + ("ImmatureSignatureError", "not yet valid"), + ("InvalidAudienceError", "audience"), + ("InvalidIssuerError", "issuer"), + ("InvalidAlgorithmError", "algorithm"), + ("InvalidSignatureError", "signature"), + ("DecodeError", "format"), + ], + ) + def test_pyjwt_exceptions_become_value_errors(self, exc_cls, expected_substring): + import jwt as pyjwt + + v = CloudflareJWTValidator(_settings()) + v.jwks_client = MagicMock() + v.jwks_client.get_signing_key_from_jwt.return_value = MagicMock(key="k") + + with patch( + "cloudflare_auth.validators.jwt.decode", + side_effect=getattr(pyjwt, exc_cls)("boom"), + ), pytest.raises(ValueError, match=expected_substring): + v.validate_token("any.jwt.token") + + def test_pyjwk_client_error_becomes_value_error(self): + """JWKS resolution failures should surface as 401-equivalent errors.""" + from jwt.exceptions import PyJWKClientError + + v = CloudflareJWTValidator(_settings()) + v.jwks_client = MagicMock() + v.jwks_client.get_signing_key_from_jwt.side_effect = PyJWKClientError( + "kid not found" + ) + + with pytest.raises(ValueError, match="signing key"): + v.validate_token("any.jwt.token") + + def test_pydantic_validation_error_becomes_value_error(self, valid_jwt_payload): + """A malformed claims payload should not produce a 500.""" + v = CloudflareJWTValidator(_settings()) + v.jwks_client = MagicMock() + v.jwks_client.get_signing_key_from_jwt.return_value = MagicMock(key="k") + + bad_payload = dict(valid_jwt_payload) + bad_payload["email"] = "not-an-email" + with patch( + "cloudflare_auth.validators.jwt.decode", + return_value=bad_payload, + ), pytest.raises(ValueError, match="Invalid token claims"): + v.validate_token("any.jwt.token") diff --git a/packages/gcs-utilities/src/gcs_utilities/client.py b/packages/gcs-utilities/src/gcs_utilities/client.py index 009a019..fdbc473 100644 --- a/packages/gcs-utilities/src/gcs_utilities/client.py +++ b/packages/gcs-utilities/src/gcs_utilities/client.py @@ -10,6 +10,7 @@ from pathlib import Path from typing import Any +from google.auth.exceptions import GoogleAuthError from google.cloud import storage from google.cloud.exceptions import GoogleCloudError, NotFound @@ -190,9 +191,12 @@ def _get_or_create_bucket(self, auto_create: bool = False) -> storage.Bucket: ) raise GCSNotFoundError(msg) return bucket - except Exception as e: - if isinstance(e, GCSNotFoundError): - raise + except GCSNotFoundError: + raise + except (GoogleCloudError, GoogleAuthError, OSError, ValueError) as e: + # GoogleAuthError covers DefaultCredentialsError / RefreshError + # which are not subclasses of GoogleCloudError but should still + # surface as GCSAuthError to callers. msg = f"Failed to access bucket '{self.bucket_name}': {e}" raise GCSAuthError(msg) from e @@ -221,6 +225,12 @@ def _validate_local_path(path: Path, must_exist: bool = False) -> Path: ValueError: If path is invalid or contains traversal attempts. FileNotFoundError: If must_exist is True and path doesn't exist. """ + # #CRITICAL: Security: reject null bytes in local paths (can be used + # to truncate paths in some OS calls and bypass downstream checks). + if "\x00" in str(path): + msg = "Local path contains null byte" + raise ValueError(msg) + try: # Resolve to absolute path to prevent traversal attacks resolved_path = path.resolve() @@ -238,17 +248,32 @@ def _validate_local_path(path: Path, must_exist: bool = False) -> Path: @staticmethod def _sanitize_gcs_path(gcs_path: str) -> str: - """Sanitize GCS path to prevent issues. + """Sanitize GCS object path/prefix and reject traversal attempts. Args: gcs_path: GCS blob path to sanitize. Returns: - Sanitized path. + Sanitized path with leading slashes stripped. Raises: - ValueError: If path is invalid. + ValueError: If path is empty, contains a ``..`` segment, + contains backslashes, null bytes, or other control + characters. """ + # Reject null bytes and ASCII control characters anywhere in the + # path -- these have no legitimate use in object names and have + # historically been used to bypass downstream validation. + if any(ord(c) < 0x20 or c == "\x7f" for c in gcs_path): + msg = "GCS path contains control characters" + raise ValueError(msg) + + # Reject backslashes: GCS uses forward slashes; backslashes can + # confuse Windows-aware downstream tooling and obscure ``..``. + if "\\" in gcs_path: + msg = "GCS path cannot contain backslashes" + raise ValueError(msg) + # Remove leading slashes (GCS paths shouldn't start with /) gcs_path = gcs_path.lstrip("/") @@ -257,9 +282,17 @@ def _sanitize_gcs_path(gcs_path: str) -> str: msg = "GCS path cannot be empty" raise ValueError(msg) - # Check for suspicious patterns + # Reject any ``.`` / ``..`` path segment AND any ``..`` substring + # anywhere in the path. This is deliberately stricter than the + # POSIX traversal rule: callers with legitimate names that + # happen to contain ``..`` (e.g. ``v1..1``) must rename. The + # segment check is kept first so the more specific error wins. + segments = gcs_path.split("/") + if any(seg in {"..", "."} for seg in segments): + msg = "GCS path cannot contain '.' or '..' segments" + raise ValueError(msg) if ".." in gcs_path: - msg = "GCS path cannot contain '..' segments" + msg = "GCS path cannot contain '..' sequences" raise ValueError(msg) return gcs_path @@ -364,8 +397,20 @@ def upload_directory( logger.debug(f"Skipping excluded file: {rel_path}") continue - # Construct GCS path - gcs_path = f"{gcs_prefix.rstrip('/')}/{rel_path}" + # Construct and re-sanitize the joined GCS path. ``rel_path`` + # comes from a filesystem walk so should be safe, but symlinks + # or unusual filenames could still introduce traversal + # sequences -- defence in depth. ``as_posix()`` ensures + # Windows ``\`` separators don't get rejected by the + # sanitizer's backslash check. + try: + gcs_path = self._sanitize_gcs_path( + f"{gcs_prefix.rstrip('/')}/{rel_path.as_posix()}" + ) + except ValueError as e: + logger.warning("Skipping file with unsafe path %s: %s", rel_path, e) + failed.append(str(rel_path)) + continue try: blob = bucket.blob(gcs_path) @@ -609,13 +654,28 @@ def delete_directory( """Delete all files with a given prefix (directory-like deletion). Args: - prefix: Prefix of files to delete. + prefix: Prefix of files to delete. Must be non-empty; empty + prefixes are rejected to prevent accidental full-bucket + wipes. bucket_name: Bucket name (uses default if not specified). Returns: Number of files deleted. + + Raises: + ValueError: If ``prefix`` is empty / whitespace-only. """ - prefix = self._sanitize_gcs_path(prefix) if prefix else "" + # #CRITICAL: Security: an empty prefix matches every object in + # the bucket. Reject it explicitly so a logic bug in the caller + # cannot trigger a bucket-wide deletion. + if not prefix or not prefix.strip(): + msg = ( + "delete_directory requires a non-empty prefix; " + "use explicit per-blob deletion to clear an entire bucket" + ) + raise ValueError(msg) + + prefix = self._sanitize_gcs_path(prefix) bucket = self._get_bucket(bucket_name) blobs = bucket.list_blobs(prefix=prefix)