Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 85 additions & 17 deletions hooks/scripts/cc-task-gate.impl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -420,24 +420,92 @@ fi
# a claim may create a new request or offered cc-task note, but only through a
# path-scoped, content-validated Write event. Ordinary source/runtime/system
# mutation and manual claim-file writes still fail closed below.
set +e
_bootstrap_output="$(
printf '%s' "$input" | python3 "$SCRIPT_DIR/cc-task-gate-bootstrap.py" 2>&1
)"
_bootstrap_rc=$?
set -e
case "$_bootstrap_rc" in
0)
[[ -n "$_bootstrap_output" ]] && printf '%s\n' "$_bootstrap_output" >&2
#
# FAIL-OPEN ON INFRA ERROR (reform — bootstrap-failopen-atomic-swap). This is the
# roleless session's ONLY sanctioned write path, so it MUST mirror the shim's
# INV-5 posture (master design §2.2 / FM-15 / NEW-2): when the validator helper
# itself cannot run — unreadable, mid atomic-swap, or it crashes — a bootstrap
# CANDIDATE write fails OPEN (advisory + ledger) instead of fail-closed-blocking,
# and any other mutation falls through to the normal claim/authority gate. ONLY a
# genuine BLOCKED verdict (rc==12) from a helper that actually ran blocks; python's
# own "can't open file" rc==2 and every other non-{0,10} code are infra signals,
# never a deny. Before this fix the case mapped EVERY non-{0,10} code to exit 2, so
# a redeploy that briefly unlinked the helper fail-closed even a properly CLAIMED
# session (the S2 incident).
_bootstrap_helper="$SCRIPT_DIR/cc-task-gate-bootstrap.py"

# _bootstrap_is_candidate_target — mirror the helper's candidate test in pure bash
# so the fail-OPEN stays narrow: only a Write of a .md note under the governance
# intake roots (hapax-requests/active or hapax-cc-tasks/active) fails open when the
# helper can't run. Any other mutation falls through to the normal gate — an infra
# error must never widen what a non-bootstrap mutation may do.
_bootstrap_is_candidate_target() {
[[ "$tool_name" == "Write" ]] || return 1
local p="${edit_path/#\~/$HOME}"
[[ -n "$p" && "$p" == *.md ]] || return 1
case "$p" in
"$HOME"/Documents/Personal/20-projects/hapax-requests/active/*) return 0 ;;
"$HOME"/Documents/Personal/20-projects/hapax-cc-tasks/active/*) return 0 ;;
Comment on lines +444 to +448
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Require canonical bootstrap fail-open targets

When the helper is unavailable, this string-prefix check treats paths like $HOME/Documents/Personal/20-projects/hapax-requests/active/../../outside.md as bootstrap candidates and _bootstrap_infra_failopen exits 0, so an unclaimed Write can bypass the normal gate for markdown outside the intake roots; it also allows overwriting existing active notes because it does not mirror the helper's resolve(...).is_relative_to(...) and target.exists() checks. In the helper-infra-error cases introduced here, please canonicalize the target and reject existing files before failing open so the fallback stays limited to new governance-intake notes.

Useful? React with 👍 / 👎.

esac
return 1
}

# _bootstrap_infra_failopen — shared "the validator could not run" handler: emit a
# loud ledger line + stderr advisory, then mirror INV-5 — fail OPEN (exit 0) for a
# candidate, else return so the caller falls through to the normal claim gate.
_bootstrap_infra_failopen() {
local reason="$1" detail="$2" is_cand="false"
if _bootstrap_is_candidate_target; then is_cand="true"; fi
local _bs_role="${HAPAX_AGENT_ROLE:-${CODEX_ROLE:-${CLAUDE_ROLE:-unknown}}}"
local _bs_ledger="${HAPAX_METHODOLOGY_LEDGER:-$HOME/.cache/hapax/methodology-emergency-ledger.jsonl}"
mkdir -p "$(dirname "$_bs_ledger")" 2>/dev/null || true
printf '{"ts":"%s","kind":"bootstrap_helper_infra_failopen","reason":"%s","detail":"%s","role":"%s","tool":"%s","path":"%s","candidate":%s}\n' \
"$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$reason" "$detail" "$_bs_role" "$tool_name" "${edit_path:-}" "$is_cand" \
>> "$_bs_ledger" 2>/dev/null || true
if [[ "$is_cand" == "true" ]]; then
echo "cc-task-gate: bootstrap validator unavailable ($reason) — FAILING OPEN for governance-intake write (advisory, ledgered, INV-5): ${edit_path:-}" >&2
exit 0
;;
10)
;;
*)
[[ -n "$_bootstrap_output" ]] && printf '%s\n' "$_bootstrap_output" >&2
exit 2
;;
esac
fi
echo "cc-task-gate: bootstrap validator unavailable ($reason) — non-candidate mutation falls through to the normal gate (advisory, ledgered)." >&2
return 0
}

if [[ ! -r "$_bootstrap_helper" ]]; then
# Absent/unreadable (e.g. a concurrent hooks-doctor redeploy briefly unlinked
# it). Don't exec python on it — that would surface as rc==2 and historically
# fail closed. Go straight to the INV-5 fail-open handler.
_bootstrap_infra_failopen "helper_unreadable" "$_bootstrap_helper"
else
set +e
_bootstrap_output="$(
printf '%s' "$input" | python3 "$_bootstrap_helper" 2>&1
)"
_bootstrap_rc=$?
set -e
case "$_bootstrap_rc" in
0)
[[ -n "$_bootstrap_output" ]] && printf '%s\n' "$_bootstrap_output" >&2
exit 0
;;
10)
# NOT_CANDIDATE — fall through to the normal claim/authority gate.
;;
12)
# The ONLY blocking verdict: the helper ran and judged the bootstrap note
# invalid. A genuine deny.
[[ -n "$_bootstrap_output" ]] && printf '%s\n' "$_bootstrap_output" >&2
exit 2
;;
*)
# Any other code (python rc 2 = can't open file, 1 = uncaught exception,
# 127 = python missing, …) is an INFRA signal, never a deny. Mirror INV-5:
# fail OPEN for a candidate, else fall through. Sanitize the captured output
# before it enters the JSONL ledger.
_bs_det="$(printf '%s' "${_bootstrap_output:-}" | tr '\n\r\t"\\' ' ' | cut -c1-160)"
_bootstrap_infra_failopen "helper_rc_${_bootstrap_rc}" "$_bs_det"
;;
esac
fi

# --- 3c. Shadow decision log (reform 3b PRODUCER source) ---------------------
# From here on every exit is a genuine GATED decision (the non-mutating / cognition
Expand Down
72 changes: 60 additions & 12 deletions hooks/scripts/hooks-doctor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -179,29 +179,77 @@ deploy_canonical() {
echo "deploy: REFUSING to deploy an impl that lacks INV-5 is_cognition_path: $src/cc-task-gate.impl.sh" >&2
return 1
fi
# REFUSE an incomplete closure UP FRONT, before touching the live canonical: a
# missing sibling would make the deployed gate exit 2 on every mutation (the
# cc-task-gate-bootstrap.py omission incident). The OLD code installed the impl
# first and only THEN discovered a missing sibling, leaving a half-swapped
# closure live; checking + staging first makes a refused deploy a clean no-op.
local s
for s in "${CLOSURE_SIBLINGS[@]}"; do
if [[ ! -r "$src/$s" ]]; then
echo "deploy: REFUSING incomplete closure — source missing $src/$s" >&2
return 1
fi
done
if [[ "$DRY" = 1 ]]; then
echo "[dry-run] would deploy gate closure: $src -> $CANONICAL_DIR"
echo "[dry-run] would atomically deploy gate closure: $src -> $CANONICAL_DIR"
return 0
fi

mkdir -p "$CANONICAL_DIR"
# impl deploys AS cc-task-gate.sh (the name shims + settings.json resolve to).
install -m 0755 "$src/cc-task-gate.impl.sh" "$CANONICAL_DIR/cc-task-gate.sh"
# REFUSE an incomplete closure: a missing sibling would make the deployed gate
# exit 2 on every mutation (the cc-task-gate-bootstrap.py omission incident).
local s

# ATOMIC DEPLOY (reform — bootstrap-failopen-atomic-swap). The old path used
# `install` (unlinkat+create) per file with the impl deployed FIRST, so during a
# redeploy every file was briefly ABSENT and a concurrent PreToolUse exec could
# run the new impl while its siblings (agent-role.sh / escape-grant.sh /
# cc-task-gate-bootstrap.py) were missing → fail-closed. We instead stage the
# whole closure + MANIFEST into a temp dir on the SAME filesystem, then rename(2)
# each file into place. rename(2) atomically replaces the destination (POSIX: no
# point at which a reader finds it missing), and we publish the impl
# (cc-task-gate.sh) LAST so a concurrent gate exec always sees a complete sibling
# set before the new impl becomes live.
local stage rc=0
stage="$(mktemp -d "$CANONICAL_DIR/.deploy.tmp.XXXXXX")" || {
echo "deploy: could not create staging dir under $CANONICAL_DIR" >&2
return 1
}

# Stage the impl (as cc-task-gate.sh) + every sibling into the temp dir.
if ! install -m 0755 "$src/cc-task-gate.impl.sh" "$stage/cc-task-gate.sh"; then
rm -rf "$stage" 2>/dev/null || true
echo "deploy: staging impl failed" >&2
return 1
fi
for s in "${CLOSURE_SIBLINGS[@]}"; do
if [[ ! -r "$src/$s" ]]; then
echo "deploy: REFUSING incomplete closure — source missing $src/$s" >&2
if ! install -m 0755 "$src/$s" "$stage/$s"; then
rm -rf "$stage" 2>/dev/null || true
echo "deploy: staging sibling $s failed" >&2
return 1
fi
install -m 0755 "$src/$s" "$CANONICAL_DIR/$s"
done
( cd "$CANONICAL_DIR" && sha256sum cc-task-gate.sh "${CLOSURE_SIBLINGS[@]}" 2>/dev/null ) \
> "$CANONICAL_DIR/MANIFEST.sha256" 2>/dev/null || true
( cd "$stage" && sha256sum cc-task-gate.sh "${CLOSURE_SIBLINGS[@]}" 2>/dev/null ) \
> "$stage/MANIFEST.sha256" 2>/dev/null || true

# Publish: siblings + MANIFEST FIRST, the impl LAST. Each mv is an atomic
# rename(2) within $CANONICAL_DIR (same filesystem as the staging dir) — no file
# is ever observed absent, and the impl never goes live ahead of its closure.
for s in "${CLOSURE_SIBLINGS[@]}"; do
mv -f "$stage/$s" "$CANONICAL_DIR/$s" || rc=1
done
if [[ -f "$stage/MANIFEST.sha256" ]]; then
mv -f "$stage/MANIFEST.sha256" "$CANONICAL_DIR/MANIFEST.sha256" || rc=1
fi
mv -f "$stage/cc-task-gate.sh" "$CANONICAL_DIR/cc-task-gate.sh" || rc=1
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Don't publish the impl after a failed sibling rename

If any earlier publish step sets rc=1 (for example because one canonical sibling cannot be replaced), this line still renames the new cc-task-gate.sh into place and only reports failure afterward. That leaves the live canonical with a new impl and a mixed or stale closure, which is exactly the half-deployed state this atomic deploy path is meant to avoid; abort before publishing the impl once any sibling or manifest rename has failed.

Useful? React with 👍 / 👎.

rm -rf "$stage" 2>/dev/null || true
if [[ "$rc" != 0 ]]; then
echo "deploy: FAILED publishing staged closure to $CANONICAL_DIR" >&2
return 1
fi

local bindir="${HAPAX_LOCAL_BIN:-$HOME/.local/bin}"
mkdir -p "$bindir"
ln -sf "$CANONICAL_DIR/hooks-doctor.sh" "$bindir/hapax-hooks-doctor"
echo "deployed gate closure -> $CANONICAL_DIR (from $src)"
echo "deployed gate closure -> $CANONICAL_DIR (from $src, atomic)"
check_canonical
}

Expand Down
Loading
Loading