diff --git a/.gitignore b/.gitignore index e9aa04a8b..f5de0754b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ target/ **/*.rs.bk +x509-cert/tests/limbo/limbo.json # CLion IDE .idea diff --git a/Cargo.lock b/Cargo.lock index 21dceea0d..536fa8c5b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -439,6 +439,15 @@ dependencies = [ "rand_core 0.10.1", ] +[[package]] +name = "ct-disasm" +version = "0.0.0" +dependencies = [ + "base16ct", + "base32ct", + "base64ct", +] + [[package]] name = "ctutils" version = "0.4.2" @@ -2123,6 +2132,8 @@ dependencies = [ "rand 0.10.1", "rsa", "rstest", + "serde", + "serde_json", "sha1", "sha2", "signature", diff --git a/Cargo.toml b/Cargo.toml index 76339a17c..5206b3aec 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,7 +27,8 @@ members = [ "x509-tsp", "x509-cert", "x509-cert/test-support", - "x509-ocsp" + "x509-ocsp", + "dev-tools/ct-disasm" ] [profile.dev] diff --git a/base32ct/src/encoding.rs b/base32ct/src/encoding.rs index 6b50cd3bb..e364e8f02 100644 --- a/base32ct/src/encoding.rs +++ b/base32ct/src/encoding.rs @@ -242,20 +242,33 @@ fn decoded_len(input_len: usize) -> usize { } /// Remove padding from the provided input. -fn remove_padding(mut input: &[u8]) -> Result<&[u8]> { - // TODO(tarcieri): properly validate padding +fn remove_padding(input: &[u8]) -> Result<&[u8]> { if input.len() % 8 != 0 { return Err(Error::InvalidEncoding); } - for _ in 0..6 { - match input.split_last() { - Some((b'=', rest)) => input = rest, - _ => break, - } + let n = input.len(); + if n == 0 { + return Ok(input); + } + + // Count trailing `=` over a fixed six-byte window without short-circuiting + // on the first non-`=` byte. `all_pad` stays 0xFF only while every byte + // walked so far (from the right) has been `=`; once a non-`=` is seen it + // flips to 0 and contributes nothing to `pad_count` for the remainder of + // the loop. This avoids leaking, via timing on malformed input, *which* + // of the trailing positions held the first non-`=` byte. + let tail = &input[n - 6..]; + let mut all_pad: u8 = 0xff; + let mut pad_count: u8 = 0; + for &b in tail.iter().rev() { + let is_eq = u8::from(b == b'='); + let mask = 0u8.wrapping_sub(is_eq); // 0xff if `=`, 0 otherwise + all_pad &= mask; + pad_count = pad_count.wrapping_add(all_pad & 1); } - Ok(input) + Ok(&input[..n - pad_count as usize]) } /// Get the length of Base32 produced by encoding the given amount of bytes. diff --git a/dev-tools/ct-disasm/Cargo.toml b/dev-tools/ct-disasm/Cargo.toml new file mode 100644 index 000000000..e90ab38cc --- /dev/null +++ b/dev-tools/ct-disasm/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "ct-disasm" +version = "0.0.0" +edition = "2024" +publish = false + +# This crate exists only to anchor the constant-time-regression disassembly +# check (`./check.sh`). It is NOT part of the public API surface and is not +# published. + +[lib] +# rlib so we don't need a panic_handler / std runtime; we only consume the +# emitted asm, never link a final image. +crate-type = ["lib"] + +[dependencies] +base16ct = { path = "../../base16ct", default-features = false } +base32ct = { path = "../../base32ct", default-features = false } +base64ct = { path = "../../base64ct", default-features = false } + +[profile.release] +# Match the profile downstream consumers actually ship. +opt-level = 3 +lto = "thin" +codegen-units = 1 +debug = 1 +overflow-checks = false diff --git a/dev-tools/ct-disasm/README.md b/dev-tools/ct-disasm/README.md new file mode 100644 index 000000000..e2166bc4c --- /dev/null +++ b/dev-tools/ct-disasm/README.md @@ -0,0 +1,119 @@ +# ct-disasm — constant-time disassembly check (research preview) + +A regression-detection tool for the constant-time hot paths in `base16ct`, +`base32ct`, and `base64ct`. Compiles a small wrapper crate that drives each +crate's public encode/decode API on a fixed-length buffer, dumps the +emitted assembly via `rustc --emit=asm`, and reports per-wrapper +conditional-branch counts. Designed to be diffed against a snapshotted +baseline as a CI gate. + +## Why this exists + +The CT-critical inner functions (`decode_nibble`, `decode_5bits`, +`decode_6bits`, `is_pad_ct`) are `#[inline(always)]` and not exported. +Checking them in isolation isn't useful — what matters is what they look +like *after* LLVM has inlined them into the public API path that real +callers use. This crate places `#[inline(never)]` `extern "C"` shims around +those public APIs, with input/output buffers sized as compile-time +constants, so length-dependent branches in the public API collapse and +the asm scanner sees the actual inlined CT machinery. + +A constant-time-analysis audit of these crates (Trail of Bits) confirmed +the source-level CT idioms are correct. This tool exists to catch +**compiler regressions** — a future LLVM that decides to lower one of the +arithmetic-mask idioms back into a branch. + +## Usage + +```sh +# Native target (auto-detects): +./check.sh + +# Explicit target (requires `rustup target add `): +./check.sh --target x86_64-unknown-linux-gnu + +# Machine-readable output for use as a baseline / regression diff: +./check.sh --baseline > baseline.txt +git diff baseline.txt # any change → manual review +``` + +No external tools beyond a Rust toolchain plus standard `awk` / `grep` / +`find` / `sed`. + +## Current baseline (aarch64-apple-darwin, rustc 1.85) + +```text +ct_disasm_base16_lower_decode branches=0 lines=164 +ct_disasm_base16_lower_encode branches=0 lines=43 +ct_disasm_base16_upper_decode branches=0 lines=164 +ct_disasm_base16_upper_encode branches=0 lines=43 +ct_disasm_base16_mixed_decode branches=0 lines=225 +ct_disasm_base32_lower_decode branches=20 lines=426 +ct_disasm_base32_lower_encode branches=0 lines=252 +ct_disasm_base32_upper_decode branches=20 lines=426 +ct_disasm_base64_padded_decode branches=14 lines=369 +ct_disasm_base64_padded_encode branches=0 lines=30 +ct_disasm_base64_unpadded_decode branches=3 lines=516 +``` + +Reading this: + +* **`branches=0` is the strongest signal.** Five of `base16ct`'s wrappers + and two of `base64ct`/`base32ct`'s are fully branch-free at the chosen + fixed size. Any future regression that flips one of these to nonzero + is a CT-violation candidate that needs immediate review. +* **`branches > 0` does NOT mean a CT bug exists today.** The remaining + branches are length-dependent (chunk-loop iteration boundaries, + `if src_rem.len() >= N` tail handling) or panic-trampoline branches + (bounds checks LLVM didn't fold). The crate documentation explicitly + states timing depends on message *length*, not content. The number is + a fingerprint of how LLVM chose to lower the code. +* **The metric is whether the count, and the structure of branch targets, + changes between revisions.** A toolchain bump that drops 20 branches + to 18 is fine. A source change that turns 0 into 1 demands review. A + source change that turns 14 into 14 with different target labels also + demands review (LLVM may have rewritten the same control flow into + different but equivalent shape — or it may have introduced a new + data-dependent branch). + +## Layout + +* `src/lib.rs` — `#[no_mangle] extern "C"` shims around the public APIs. + Buffer sizes are fixed at compile time and inputs/outputs are routed + through `core::hint::black_box` to anchor the optimizer. +* `check.sh` — bash script: builds, locates the emitted `.s`, scans each + wrapper body delimited by `.cfi_endproc`, counts conditional branches. +* `Cargo.toml` — workspace member; depends on the three CT crates as + in-tree path deps. + +## Known limitations + +1. **Length-dependent and panic-trampoline branches are counted.** These + are not CT bugs. To filter them out automatically the script would + need to walk the asm CFG and identify branch targets that are panic + blocks (`bl ___rust_alloc_error_handler`, `udf`, `brk`, etc.) or + non-cold loop-iteration blocks. Out of scope for this preview. +2. **Conditional moves (`cmov` / `csel`) are not flagged.** They're + branch-free at the pipeline level. If a hyper-strict posture is + wanted, extend `BRANCH_REGEX` in `check.sh`. +3. **Memory-access-pattern leaks aren't caught.** A function that + indexed a table by a secret byte would still register as + `branches=0` here. The audited crates avoid such patterns by + construction; this tool doesn't reverify that. +4. **Some `base64ct` variants aren't wrapped** (`Base64Url`, + `Base64Bcrypt`, `Base64Crypt`, `Base64ShaCrypt`, `Base64Pbkdf2`). + They share the `Alphabet::decode_6bits` / `encode_6bits` machinery + with `Base64`, so the existing wrappers transitively exercise the + same code. If a variant ever diverges, add a wrapper. +5. **Not yet wired into CI.** To promote, add a step that runs + `./dev-tools/ct-disasm/check.sh --baseline | diff - baseline.txt` + and fails on any non-empty diff. + +## Suggested next steps + +* Snapshot a `baseline.{aarch64,x86_64}.txt` on a known-good revision and + commit it. Add a CI step that diffs the two. +* Add panic-target filtering to `check.sh` so it can become a true + pass/fail gate rather than a baseline-diff gate. +* Extend the wrapper list to `serdect` once that crate's public API is + also fixed-length-callable. diff --git a/dev-tools/ct-disasm/baseline.aarch64-apple-darwin.txt b/dev-tools/ct-disasm/baseline.aarch64-apple-darwin.txt new file mode 100644 index 000000000..afa0425cf --- /dev/null +++ b/dev-tools/ct-disasm/baseline.aarch64-apple-darwin.txt @@ -0,0 +1,11 @@ +ct_disasm_base16_lower_decode 0 164 +ct_disasm_base16_lower_encode 0 43 +ct_disasm_base16_upper_decode 0 164 +ct_disasm_base16_upper_encode 0 43 +ct_disasm_base16_mixed_decode 0 225 +ct_disasm_base32_lower_decode 20 426 +ct_disasm_base32_lower_encode 0 252 +ct_disasm_base32_upper_decode 20 426 +ct_disasm_base64_padded_decode 14 369 +ct_disasm_base64_padded_encode 0 30 +ct_disasm_base64_unpadded_decode 3 516 diff --git a/dev-tools/ct-disasm/check.sh b/dev-tools/ct-disasm/check.sh new file mode 100755 index 000000000..3f16c12e4 --- /dev/null +++ b/dev-tools/ct-disasm/check.sh @@ -0,0 +1,130 @@ +#!/bin/bash +# Constant-time regression check (research preview). +# +# Builds the `ct-disasm` wrapper crate at release optimization, dumps the +# emitted assembly via `rustc --emit=asm`, and reports per-wrapper +# conditional-branch counts. The wrapper crate places `#[inline(never)]` +# `extern "C"` shims around the public `base16ct` / `base32ct` / `base64ct` +# encode/decode entry points so that the `#[inline(always)]` CT-critical +# helpers (`decode_nibble`, `decode_5bits`, `decode_6bits`, `is_pad_ct`) +# get inlined into our wrapper, where the asm scanner can see them. +# +# IMPORTANT — current limitation: +# The reported branch count includes BOTH genuine data-dependent branches +# AND panic-trampoline branches that LLVM emits for things like bounds +# checks and integer overflow checks on cold paths. Filtering the latter +# out automatically requires walking the asm CFG to identify which targets +# are panic blocks. This prototype does NOT do that. Use the script as: +# +# 1. Baseline: run it once on a known-good revision, snapshot the per- +# wrapper counts (`./check.sh --baseline > baseline.txt`). +# 2. Regression gate: rerun and diff against the baseline. Any *increase* +# in count, or change in branch-target structure, warrants manual +# asm review. +# +# Usage: +# ./check.sh # report counts +# ./check.sh --baseline # machine-readable counts +# ./check.sh --baseline > b.txt +# diff b.txt baseline.txt # regression gate + +set -euo pipefail +cd "$(dirname "$0")" + +MODE="report" +TARGET="" +while [[ $# -gt 0 ]]; do + case "$1" in + --baseline) MODE="baseline"; shift ;; + --target) TARGET="$2"; shift 2 ;; + *) echo "unknown arg: $1" >&2; exit 2 ;; + esac +done + +if [[ -z "$TARGET" ]]; then + HOST_OS=$(uname -s) + HOST_ARCH=$(uname -m) + case "${HOST_OS}-${HOST_ARCH}" in + Darwin-arm64) TARGET="aarch64-apple-darwin" ;; + Darwin-x86_64) TARGET="x86_64-apple-darwin" ;; + Linux-aarch64) TARGET="aarch64-unknown-linux-gnu" ;; + Linux-x86_64) TARGET="x86_64-unknown-linux-gnu" ;; + *) echo "unknown host: ${HOST_OS}-${HOST_ARCH}; pass --target" >&2; exit 2 ;; + esac +fi + +case "$TARGET" in + aarch64-*|arm64-*) ISA="arm64" ;; + x86_64-*) ISA="x86" ;; + *) echo "unsupported target $TARGET" >&2; exit 2 ;; +esac + +# Conditional branches only (no unconditional jumps, no conditional moves). +case "$ISA" in + arm64) + BRANCH_REGEX='b\.(eq|ne|cs|hs|cc|lo|mi|pl|vs|vc|hi|ls|ge|lt|gt|le)|cbz|cbnz|tbz|tbnz' + ;; + x86) + BRANCH_REGEX='j(e|ne|z|nz|l|le|g|ge|a|ae|b|be|c|nc|o|no|p|np|s|ns|cxz|ecxz|rcxz)' + ;; +esac + +DEPS_DIR="../../target/$TARGET/release/deps" +rm -f "$DEPS_DIR"/ct_disasm-*.s 2>/dev/null || true +touch src/lib.rs +cargo rustc --release --target "$TARGET" --quiet -- --emit=asm + +ASM_FILE=$(find "$DEPS_DIR" -maxdepth 1 -name 'ct_disasm-*.s' | head -n1) +if [[ -z "$ASM_FILE" || ! -f "$ASM_FILE" ]]; then + echo "FAIL: could not locate emitted asm under $DEPS_DIR" >&2 + exit 1 +fi + +# Wrapper symbol names — keep in sync with src/lib.rs. +WRAPPERS=( + ct_disasm_base16_lower_decode + ct_disasm_base16_lower_encode + ct_disasm_base16_upper_decode + ct_disasm_base16_upper_encode + ct_disasm_base16_mixed_decode + ct_disasm_base32_lower_decode + ct_disasm_base32_lower_encode + ct_disasm_base32_upper_decode + ct_disasm_base64_padded_decode + ct_disasm_base64_padded_encode + ct_disasm_base64_unpadded_decode +) + +if [[ "$MODE" == "report" ]]; then + echo "=== ct-disasm: target=$TARGET isa=$ISA ===" + echo "asm: $ASM_FILE ($(wc -l <"$ASM_FILE") lines)" +fi + +for W in "${WRAPPERS[@]}"; do + # Mach-O prefixes symbols with `_`. Try both. + BODY=$(awk -v sym="$W" ' + BEGIN { in_body = 0 } + # Match either `:` or `_:` at the start of a line. + $0 ~ ("^_?" sym ":$") { in_body = 1; next } + # Stop on .cfi_endproc — emitted by both Mach-O and ELF assemblers + # at the end of every function. This is more reliable than scanning + # for the next label, since the asm contains many local debug + # labels (Lfunc_begin0, Ltmp1, etc.) inside a function body. + in_body && /\.cfi_endproc/ { in_body = 0 } + in_body { print } + ' "$ASM_FILE") + + if [[ -z "$BODY" ]]; then + echo "WARN: $W not found in asm" >&2 + continue + fi + + COUNT=$(printf '%s\n' "$BODY" | grep -cE "(^|[[:space:]])($BRANCH_REGEX)([[:space:]]|$)" || true) + LINES=$(printf '%s\n' "$BODY" | wc -l | tr -d ' ') + + if [[ "$MODE" == "baseline" ]]; then + printf '%s\t%s\t%s\n' "$W" "$COUNT" "$LINES" + else + printf ' %-40s branches=%-3s lines=%s\n' "$W" "$COUNT" "$LINES" + fi +done diff --git a/dev-tools/ct-disasm/src/lib.rs b/dev-tools/ct-disasm/src/lib.rs new file mode 100644 index 000000000..f0b23b352 --- /dev/null +++ b/dev-tools/ct-disasm/src/lib.rs @@ -0,0 +1,155 @@ +//! Wrappers used by `dev-tools/ct-disasm/check.sh` to verify that the +//! constant-time hot paths in `base16ct`, `base32ct`, and `base64ct` compile +//! to branch-free machine code. +//! +//! Each wrapper: +//! * is `#[unsafe(no_mangle)]` so the disassembly script can find it by name, +//! * is `#[inline(never)]` so it shows up as its own symbol, +//! * calls a public crate API on a fixed-length buffer with `black_box`-ed +//! inputs/outputs so the inner `#[inline(always)]` CT helpers +//! (`decode_nibble`, `decode_5bits`, `decode_6bits`, `is_pad_ct`, etc.) +//! get inlined into our wrapper, where the checker can scan them. +//! +//! The wrappers deliberately use compile-time-known buffer lengths so that +//! length-dependent branches in the public API collapse to constants and the +//! only branches that *can* remain are byte-value-dependent — which is what +//! we want to flag. +//! +//! This crate is `no_std`-compatible to keep the disassembly tight; it never +//! allocates. + +#![no_std] +#![allow(missing_docs)] // public ABI of these wrappers is intentionally minimal + +use core::hint::black_box; + +use base16ct::{lower as hex_lower, mixed as hex_mixed, upper as hex_upper}; +use base32ct::{Base32, Base32Upper, Encoding as Base32Encoding}; +use base64ct::{Base64, Base64Unpadded, Encoding as Base64Encoding}; + +// ---- base16ct ------------------------------------------------------------- + +#[unsafe(no_mangle)] +#[inline(never)] +pub extern "C" fn ct_disasm_base16_lower_decode(input: &[u8; 64], output: &mut [u8; 32]) -> i32 { + let input = black_box(input); + let output = black_box(output); + match hex_lower::decode(input, output) { + Ok(_) => 0, + Err(_) => -1, + } +} + +#[unsafe(no_mangle)] +#[inline(never)] +pub extern "C" fn ct_disasm_base16_lower_encode(input: &[u8; 32], output: &mut [u8; 64]) -> i32 { + let input = black_box(input); + let output = black_box(output); + match hex_lower::encode(input, output) { + Ok(_) => 0, + Err(_) => -1, + } +} + +#[unsafe(no_mangle)] +#[inline(never)] +pub extern "C" fn ct_disasm_base16_upper_decode(input: &[u8; 64], output: &mut [u8; 32]) -> i32 { + let input = black_box(input); + let output = black_box(output); + match hex_upper::decode(input, output) { + Ok(_) => 0, + Err(_) => -1, + } +} + +#[unsafe(no_mangle)] +#[inline(never)] +pub extern "C" fn ct_disasm_base16_upper_encode(input: &[u8; 32], output: &mut [u8; 64]) -> i32 { + let input = black_box(input); + let output = black_box(output); + match hex_upper::encode(input, output) { + Ok(_) => 0, + Err(_) => -1, + } +} + +#[unsafe(no_mangle)] +#[inline(never)] +pub extern "C" fn ct_disasm_base16_mixed_decode(input: &[u8; 64], output: &mut [u8; 32]) -> i32 { + let input = black_box(input); + let output = black_box(output); + match hex_mixed::decode(input, output) { + Ok(_) => 0, + Err(_) => -1, + } +} + +// ---- base32ct ------------------------------------------------------------- + +#[unsafe(no_mangle)] +#[inline(never)] +pub extern "C" fn ct_disasm_base32_lower_decode(input: &[u8; 56], output: &mut [u8; 35]) -> i32 { + let input = black_box(input); + let output = black_box(output); + match Base32::decode(input, output) { + Ok(_) => 0, + Err(_) => -1, + } +} + +#[unsafe(no_mangle)] +#[inline(never)] +pub extern "C" fn ct_disasm_base32_lower_encode(input: &[u8; 35], output: &mut [u8; 56]) -> i32 { + let input = black_box(input); + let output = black_box(output); + match Base32::encode(input, output) { + Ok(_) => 0, + Err(_) => -1, + } +} + +#[unsafe(no_mangle)] +#[inline(never)] +pub extern "C" fn ct_disasm_base32_upper_decode(input: &[u8; 56], output: &mut [u8; 35]) -> i32 { + let input = black_box(input); + let output = black_box(output); + match Base32Upper::decode(input, output) { + Ok(_) => 0, + Err(_) => -1, + } +} + +// ---- base64ct ------------------------------------------------------------- + +#[unsafe(no_mangle)] +#[inline(never)] +pub extern "C" fn ct_disasm_base64_padded_decode(input: &[u8; 64], output: &mut [u8; 48]) -> i32 { + let input = black_box(input); + let output = black_box(output); + match Base64::decode(input, output) { + Ok(_) => 0, + Err(_) => -1, + } +} + +#[unsafe(no_mangle)] +#[inline(never)] +pub extern "C" fn ct_disasm_base64_padded_encode(input: &[u8; 48], output: &mut [u8; 64]) -> i32 { + let input = black_box(input); + let output = black_box(output); + match Base64::encode(input, output) { + Ok(_) => 0, + Err(_) => -1, + } +} + +#[unsafe(no_mangle)] +#[inline(never)] +pub extern "C" fn ct_disasm_base64_unpadded_decode(input: &[u8; 64], output: &mut [u8; 48]) -> i32 { + let input = black_box(input); + let output = black_box(output); + match Base64Unpadded::decode(input, output) { + Ok(_) => 0, + Err(_) => -1, + } +} diff --git a/x509-cert/Cargo.toml b/x509-cert/Cargo.toml index fcef6c240..2750abec7 100644 --- a/x509-cert/Cargo.toml +++ b/x509-cert/Cargo.toml @@ -34,6 +34,8 @@ rsa = { version = "0.10.0-rc.18", features = ["sha2"] } ecdsa = { version = "0.17.0-rc.16", features = ["digest", "pem"] } p256 = "0.14.0-rc.7" rstest = "0.26" +serde = { version = "1.0.184", features = ["derive"] } +serde_json = "1" sha2 = { version = "0.11", features = ["oid"] } tempfile = "3.5" tokio = { version = "1.45", features = ["macros", "rt"] } diff --git a/x509-cert/tests/limbo.rs b/x509-cert/tests/limbo.rs new file mode 100644 index 000000000..1d335a2d7 --- /dev/null +++ b/x509-cert/tests/limbo.rs @@ -0,0 +1,591 @@ +//! C2SP x509-limbo certificate decoding tests. + +use std::{ + collections::{BTreeMap, BTreeSet}, + fmt::Write as _, + fs, + path::{Path, PathBuf}, +}; + +use serde::Deserialize; +use serde_json::Value; +use x509_cert::{ + Certificate, + der::{Decode, Document, Encode}, + ext::pkix::{BasicConstraints, ExtendedKeyUsage, KeyUsage, NameConstraints, SubjectAltName}, +}; + +#[derive(Debug, Deserialize)] +#[serde(untagged)] +enum LimboFixture { + Cases(Vec), + Suite { + #[serde(default, alias = "testCases", alias = "tests")] + testcases: Vec, + }, +} + +impl LimboFixture { + fn testcases(&self) -> &[Value] { + match self { + Self::Cases(testcases) | Self::Suite { testcases } => testcases, + } + } +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +enum ExpectedResult { + Success, + Failure, + Unknown, +} + +#[derive(Debug)] +struct CertInput { + role: String, + pem: String, +} + +#[derive(Debug, Default)] +struct LimboReport { + total: usize, + expected_success: usize, + expected_success_decoded: usize, + expected_failure: usize, + expected_failure_rejected: usize, + expected_failure_decoded: usize, + unknown_expected_result: usize, + certificates_checked: usize, + field_check_failures: Vec, + expected_success_decode_failures: Vec, + expected_failure_decoded_without_error: Vec, + malformed_failures_decoded_without_error: Vec, + features: BTreeMap, +} + +impl LimboReport { + fn record_features(&mut self, features: &BTreeSet) { + for feature in features { + *self.features.entry(feature.clone()).or_default() += 1; + } + } + + fn summary(&self) -> String { + let mut summary = String::new(); + + let _ = writeln!(summary, "x509-limbo decode report"); + let _ = writeln!(summary, " total testcases: {}", self.total); + let _ = writeln!( + summary, + " certificates checked: {}", + self.certificates_checked + ); + let _ = writeln!( + summary, + " expected-pass decoded: {}/{}", + self.expected_success_decoded, self.expected_success + ); + let _ = writeln!( + summary, + " expected-fail rejected by Certificate::from_der: {}/{}", + self.expected_failure_rejected, self.expected_failure + ); + let _ = writeln!( + summary, + " expected-fail decoded without Certificate::from_der error: {}", + self.expected_failure_decoded + ); + let _ = writeln!( + summary, + " unknown expected_result testcases: {}", + self.unknown_expected_result + ); + + append_list( + &mut summary, + "first expected-pass decode failures", + &self.expected_success_decode_failures, + ); + append_list( + &mut summary, + "first expected-fail cases decoded without error", + &self.expected_failure_decoded_without_error, + ); + append_list( + &mut summary, + "first malformed-cert failures decoded without error", + &self.malformed_failures_decoded_without_error, + ); + append_list( + &mut summary, + "first field-level invariant failures", + &self.field_check_failures, + ); + + if !self.features.is_empty() { + let _ = writeln!(summary, " feature inventory:"); + for (feature, count) in &self.features { + let _ = writeln!(summary, " {feature}: {count}"); + } + } + + summary + } +} + +#[test] +fn limbo_decode_round_trips() { + let Some(fixture) = load_limbo_fixture() else { + return; + }; + + let mut report = LimboReport::default(); + + for (index, testcase) in fixture.testcases().iter().enumerate() { + report.total += 1; + + let id = testcase_id(testcase, index); + let expected = expected_result(testcase); + let features = feature_names(testcase); + let certs = certificate_inputs(testcase); + report.record_features(&features); + report.certificates_checked += certs.len(); + + let mut all_decoded = !certs.is_empty(); + let mut decode_errors = Vec::new(); + + if certs.is_empty() { + all_decoded = false; + decode_errors.push("no PEM certificates found in testcase".to_owned()); + } + + for cert in &certs { + match decode_round_trip(&cert.pem) { + Ok(certificate) => { + if let Err(err) = exercise_field_decoders(&certificate) { + push_first_ten( + &mut report.field_check_failures, + format!("{id} {}: {err}", cert.role), + ); + } + } + Err(err) => { + all_decoded = false; + decode_errors.push(format!("{}: {err}", cert.role)); + } + } + } + + match expected { + ExpectedResult::Success => { + report.expected_success += 1; + if all_decoded { + report.expected_success_decoded += 1; + } else { + push_first_ten( + &mut report.expected_success_decode_failures, + format!("{id}: {}", decode_errors.join("; ")), + ); + } + } + ExpectedResult::Failure => { + report.expected_failure += 1; + if all_decoded { + report.expected_failure_decoded += 1; + push_first_ten( + &mut report.expected_failure_decoded_without_error, + id.clone(), + ); + + if is_malformed_cert_failure(testcase, &features) { + push_first_ten(&mut report.malformed_failures_decoded_without_error, id); + } + } else { + report.expected_failure_rejected += 1; + } + } + ExpectedResult::Unknown => { + report.unknown_expected_result += 1; + } + } + } + + println!("{}", report.summary()); + + assert!( + report.expected_success_decode_failures.is_empty(), + "{}", + report.summary() + ); + + assert!( + report.malformed_failures_decoded_without_error.is_empty(), + "{}", + report.summary() + ); +} + +fn fixture_path() -> PathBuf { + Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/limbo/limbo.json") +} + +fn load_limbo_fixture() -> Option { + let path = fixture_path(); + let contents = match fs::read_to_string(&path) { + Ok(contents) => contents, + Err(err) if err.kind() == std::io::ErrorKind::NotFound => { + eprintln!( + "skipping x509-limbo tests: fixture not found at {}", + path.display() + ); + return None; + } + Err(err) => panic!("failed to read {}: {err}", path.display()), + }; + + Some( + serde_json::from_str(&contents) + .unwrap_or_else(|err| panic!("failed to parse {}: {err}", path.display())), + ) +} + +fn decode_round_trip(pem: &str) -> Result { + let (label, document) = Document::from_pem(pem.trim()) + .map_err(|err| format!("PEM/DER document decode failed: {err}"))?; + + if label != "CERTIFICATE" { + return Err(format!("unexpected PEM label {label:?}")); + } + + let certificate = Certificate::from_der(document.as_bytes()) + .map_err(|err| format!("Certificate::from_der failed: {err}"))?; + let encoded = certificate + .to_der() + .map_err(|err| format!("Certificate::to_der failed: {err}"))?; + + if encoded != document.as_bytes() { + return Err(format!( + "DER round-trip changed encoding length from {} to {} bytes", + document.as_bytes().len(), + encoded.len() + )); + } + + let reparsed = Certificate::from_der(&encoded) + .map_err(|err| format!("reparse of round-tripped DER failed: {err}"))?; + let reencoded = reparsed + .to_der() + .map_err(|err| format!("re-encode of reparsed DER failed: {err}"))?; + + if reencoded != encoded { + return Err("second DER round-trip changed encoding".to_owned()); + } + + Ok(certificate) +} + +fn exercise_field_decoders(certificate: &Certificate) -> Result<(), String> { + let tbs = certificate.tbs_certificate(); + let validity = tbs.validity(); + + if validity.not_before.to_unix_duration() > validity.not_after.to_unix_duration() { + return Err(format!( + "validity not_before {} is after not_after {}", + validity.not_before, validity.not_after + )); + } + + if let Some((_critical, basic_constraints)) = tbs + .get_extension::() + .map_err(|err| format!("basicConstraints extension decode failed: {err}"))? + { + let _ca_flag = basic_constraints.ca; + } + + if let Some((critical, _key_usage)) = tbs + .get_extension::() + .map_err(|err| format!("keyUsage extension decode failed: {err}"))? + { + let _key_usage_criticality = critical; + } + + if let Some((_critical, extended_key_usage)) = tbs + .get_extension::() + .map_err(|err| format!("extendedKeyUsage extension decode failed: {err}"))? + { + let _eku_present = !extended_key_usage.0.is_empty(); + } + + if let Some((_critical, subject_alt_name)) = tbs + .get_extension::() + .map_err(|err| format!("subjectAltName extension decode failed: {err}"))? + { + let _san_present = !subject_alt_name.0.is_empty(); + } + + let _name_constraints = tbs + .get_extension::() + .map_err(|err| format!("nameConstraints extension decode failed: {err}"))?; + + Ok(()) +} + +fn append_list(summary: &mut String, title: &str, items: &[String]) { + if items.is_empty() { + return; + } + + let _ = writeln!(summary, " {title}:"); + for item in items { + let _ = writeln!(summary, " - {item}"); + } +} + +fn push_first_ten(items: &mut Vec, item: String) { + if items.len() < 10 { + items.push(item); + } +} + +fn testcase_id(testcase: &Value, index: usize) -> String { + field_string( + testcase, + &[ + "id", + "testcase", + "testcase_id", + "testcaseId", + "name", + "description", + ], + ) + .unwrap_or_else(|| format!("case-{index}")) +} + +fn expected_result(testcase: &Value) -> ExpectedResult { + let Some(value) = field_string( + testcase, + &[ + "expected_result", + "expectedResult", + "expected", + "result", + "outcome", + ], + ) else { + return ExpectedResult::Unknown; + }; + + let uppercase = value.to_ascii_uppercase(); + if uppercase.contains("SUCCESS") || uppercase == "PASS" || uppercase == "VALID" { + ExpectedResult::Success + } else if uppercase.contains("FAIL") || uppercase.contains("ERROR") || uppercase == "INVALID" { + ExpectedResult::Failure + } else { + ExpectedResult::Unknown + } +} + +fn certificate_inputs(testcase: &Value) -> Vec { + let mut certificates = Vec::new(); + + append_certificates( + testcase, + &[ + "peer_certificate", + "peerCertificate", + "leaf_certificate", + "leafCertificate", + "leaf_cert", + "leafCert", + "leaf", + "cert", + "certificate", + ], + "leaf", + &mut certificates, + ); + append_certificates( + testcase, + &[ + "untrusted_intermediates", + "untrustedIntermediates", + "intermediates", + "intermediate_certs", + "intermediateCerts", + "intermediate_certificates", + "intermediateCertificates", + ], + "intermediate", + &mut certificates, + ); + append_certificates( + testcase, + &[ + "trusted_certs", + "trustedCerts", + "trusted_roots", + "trustedRoots", + "roots", + "trust_anchors", + "trustAnchors", + ], + "root", + &mut certificates, + ); + + if certificates.is_empty() { + let mut pems = Vec::new(); + collect_pem_strings(testcase, &mut pems); + for (index, pem) in pems.into_iter().enumerate() { + certificates.push(CertInput { + role: format!("certificate[{index}]"), + pem, + }); + } + } + + certificates +} + +fn append_certificates( + testcase: &Value, + keys: &[&str], + role: &str, + certificates: &mut Vec, +) { + let Some(value) = find_field(testcase, keys) else { + return; + }; + + let mut pems = Vec::new(); + collect_pem_strings(value, &mut pems); + + for (index, pem) in pems.into_iter().enumerate() { + certificates.push(CertInput { + role: format!("{role}[{index}]"), + pem, + }); + } +} + +fn collect_pem_strings(value: &Value, output: &mut Vec) { + match value { + Value::String(value) if value.contains("BEGIN CERTIFICATE") => { + output.push(value.clone()); + } + Value::Array(values) => { + for value in values { + collect_pem_strings(value, output); + } + } + Value::Object(values) => { + for value in values.values() { + collect_pem_strings(value, output); + } + } + Value::Null | Value::Bool(_) | Value::Number(_) | Value::String(_) => {} + } +} + +fn feature_names(testcase: &Value) -> BTreeSet { + let mut features = BTreeSet::new(); + + if let Some(value) = find_field(testcase, &["features", "feature", "feature_tags"]) { + collect_feature_names(value, &mut features); + } + + features +} + +fn collect_feature_names(value: &Value, features: &mut BTreeSet) { + match value { + Value::String(value) => { + features.insert(normalize_feature(value)); + } + Value::Array(values) => { + for value in values { + collect_feature_names(value, features); + } + } + Value::Object(values) => { + for (key, value) in values { + features.insert(normalize_feature(key)); + collect_feature_names(value, features); + } + } + Value::Null | Value::Bool(_) | Value::Number(_) => {} + } +} + +fn is_malformed_cert_failure(testcase: &Value, features: &BTreeSet) -> bool { + if features.iter().any(|feature| { + feature == "malformed-cert" || feature.contains("malformed") || feature.contains("asn1") + }) { + return true; + } + + [ + "failure_kind", + "failureKind", + "failure_reason", + "failureReason", + "expected_error", + "expectedError", + "reason", + ] + .iter() + .filter_map(|key| field_string(testcase, &[*key])) + .any(|reason| { + let reason = normalize_feature(&reason); + reason == "malformed-cert" || reason.contains("malformed") || reason.contains("asn1") + }) +} + +fn field_string(value: &Value, keys: &[&str]) -> Option { + find_field(value, keys).and_then(value_to_string) +} + +fn find_field<'a>(value: &'a Value, keys: &[&str]) -> Option<&'a Value> { + let Value::Object(values) = value else { + return None; + }; + + for key in keys { + if let Some(value) = values.get(*key) { + return Some(value); + } + } + + values.iter().find_map(|(candidate, value)| { + keys.iter() + .any(|key| normalize_key(candidate) == normalize_key(key)) + .then_some(value) + }) +} + +fn value_to_string(value: &Value) -> Option { + match value { + Value::String(value) => Some(value.clone()), + Value::Bool(value) => Some(value.to_string()), + Value::Number(value) => Some(value.to_string()), + Value::Object(values) => ["kind", "type", "value", "result", "expected"] + .iter() + .filter_map(|key| values.get(*key)) + .find_map(value_to_string), + Value::Array(values) => values.first().and_then(value_to_string), + Value::Null => None, + } +} + +fn normalize_key(value: &str) -> String { + value + .chars() + .filter(|ch| ch.is_ascii_alphanumeric()) + .flat_map(char::to_lowercase) + .collect() +} + +fn normalize_feature(value: &str) -> String { + value.trim().to_ascii_lowercase().replace(['_', ' '], "-") +} diff --git a/x509-cert/tests/limbo/README.md b/x509-cert/tests/limbo/README.md new file mode 100644 index 000000000..576f23916 --- /dev/null +++ b/x509-cert/tests/limbo/README.md @@ -0,0 +1,53 @@ +# C2SP x509-limbo fixtures + +This directory holds the local integration point for the C2SP `x509-limbo` certificate path-validation corpus. + +## Fetching the fixture + +`limbo.json` is intentionally ignored by git because it is large. Fetch it with the helper script, which uses the GitHub CLI, resolves the current default-branch commit SHA, checks out that exact commit, copies the fixture, and removes the temporary clone: + +```sh +./x509-cert/tests/limbo/fetch.sh +``` + +Then run the limbo-targeted tests: + +```sh +cargo test --package x509-cert -- limbo +``` + +If `tests/limbo/limbo.json` is absent, the tests print a skip message and return successfully so offline builds are not broken. + +## Harness scope + +`x509-cert` is a decode-only crate. It does not build chains, validate DNS names, enforce name constraints, check revocation, or apply server/client validation policy. The limbo harness therefore treats the corpus as a DER and extension-decoding corpus, not as a full RFC 5280 validator conformance suite. + +For every PEM certificate found in each testcase leaf, intermediate, and trusted-root field, the harness: + +- Decodes the PEM payload to DER and parses it with `Certificate::from_der`. +- Re-encodes the parsed certificate and verifies the DER bytes round-trip exactly. +- Re-parses the round-tripped DER to catch unstable encodings. +- Exercises single-certificate extension decoders for `basicConstraints`, `keyUsage`, `extendedKeyUsage`, `subjectAltName`, and `nameConstraints` when present. +- Checks that the parsed validity interval is ordered as `notBefore <= notAfter`. + +Expected-success limbo cases must decode all certificates. Expected-failure cases tagged as malformed certificates are expected to fail certificate decoding. Other expected-failure cases are reported as decoded-without-error when the certificates are syntactically valid but the failure requires validation behavior outside this crate. + +## Feature gap report + +| Limbo feature category | Status | Notes | +| --- | --- | --- | +| `malformed-cert`, malformed ASN.1/DER | EXERCISED | Expected failures should be rejected by PEM/DER or `Certificate::from_der` decoding. | +| `basic-constraints` | EXERCISED | Decodes the extension and observes the `cA` flag when present; no chain role enforcement. | +| `ku`, `key-usage` | EXERCISED | Decodes the extension and records criticality when present; no path-validation semantics. | +| `eku`, `extended-key-usage` | EXERCISED | Decodes the extension and checks presence when present; no server/client policy matching. | +| `san`, `subject-alt-name` | EXERCISED | Decodes general names when present; no peer-name matching. | +| `validity` | EXERCISED | Parses `notBefore`/`notAfter` and checks local ordering; validation-time checks are out of scope. | +| `name-constraints` | PUNTED | Syntax is decoded when present, but subtree enforcement requires chain validation. | +| `path-len`, CA depth, chain building | OUT-OF-SCOPE | Requires path construction and issuer/subject chaining. | +| Trust roots, validation kind, validation policy | OUT-OF-SCOPE | Requires a validator and policy engine. | +| DNS/IP/email peer-name validation | OUT-OF-SCOPE | Requires name matching against the testcase peer name. | +| Signature algorithm/path signature validation | OUT-OF-SCOPE | Requires signature verification across the chain. | +| Revocation, CRL, OCSP | OUT-OF-SCOPE | `x509-cert` parses related structures but does not perform revocation checks. | +| Certificate policies, policy mappings, inhibit-any-policy | OUT-OF-SCOPE | Requires policy-tree processing during path validation. | + +The test harness also prints the normalized feature inventory it finds in the local fixture when run with `-- --nocapture` or when an assertion fails. diff --git a/x509-cert/tests/limbo/fetch.sh b/x509-cert/tests/limbo/fetch.sh new file mode 100755 index 000000000..2736a47af --- /dev/null +++ b/x509-cert/tests/limbo/fetch.sh @@ -0,0 +1,66 @@ +#!/bin/bash +# Fetch the C2SP/x509-limbo `limbo.json` fixture into this directory. +# +# Usage: ./fetch.sh +# +# The fixture is large and is gitignored. The harness in +# `x509-cert/tests/limbo.rs` skips cleanly when it's absent, so running +# `cargo test` without first calling this script is safe — it just won't +# exercise the limbo corpus. +# +# We pin to a specific upstream commit so test results are reproducible +# across machines and time. Bump `LIMBO_SHA` when you want to rebase +# against a newer x509-limbo release. + +set -euo pipefail + +REPO="C2SP/x509-limbo" +LIMBO_SHA="086b0da8b83d78ed0f491d6df6672b2673406500" + +script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +fixture_path="${script_dir}/limbo.json" + +if ! command -v gh >/dev/null 2>&1; then + echo "error: gh CLI is required" >&2 + exit 1 +fi +if ! command -v git >/dev/null 2>&1; then + echo "error: git is required" >&2 + exit 1 +fi + +# Verify the pinned SHA still exists upstream before we clone. +if ! gh api "repos/${REPO}/commits/${LIMBO_SHA}" --jq '.sha' >/dev/null 2>&1; then + echo "error: pinned SHA ${LIMBO_SHA} not found in ${REPO}" >&2 + exit 1 +fi + +# When run inside a Claude Code session a hook redirects clone targets +# to a session-scoped directory; honor that so we don't conflict with +# other concurrent agents. +if [[ -n "${CLAUDE_SESSION_ID:-}" ]]; then + tmp_root="${TMPDIR:-/tmp}/gh-clones-${CLAUDE_SESSION_ID}" + mkdir -p "${tmp_root}" + clone_dir="${tmp_root}/x509-limbo" + rm -rf "${clone_dir}" + trap 'rm -rf "${clone_dir}"' EXIT +else + clone_dir="$(mktemp -d "${TMPDIR:-/tmp}/x509-limbo.XXXXXX")" + trap 'rm -rf "${clone_dir}"' EXIT +fi + +# Shallow-clone default branch first, then fetch + check out the pinned +# SHA. `--depth 1` alone cannot fetch arbitrary SHAs, so we widen with +# a follow-up `git fetch`. +gh repo clone "${REPO}" "${clone_dir}" -- --depth 1 +git -C "${clone_dir}" fetch --depth 1 origin "${LIMBO_SHA}" +git -C "${clone_dir}" checkout --detach "${LIMBO_SHA}" + +if [[ ! -f "${clone_dir}/limbo.json" ]]; then + echo "error: limbo.json missing at pinned SHA — upstream layout change?" >&2 + exit 1 +fi + +cp "${clone_dir}/limbo.json" "${fixture_path}" +echo "copied ${REPO}@${LIMBO_SHA} limbo/limbo.json to ${fixture_path}" +echo "fixture size: $(wc -c <"${fixture_path}") bytes"