diff --git a/CLAUDE.md b/CLAUDE.md index 1fcfdfab..50232314 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -5,7 +5,7 @@ **lightcone-cli** is Lightcone Research's agentic layer for ASTRA (Agentic Schema for Transparent Research Analysis). It ships the `lc` executable and Claude Code skills/hooks used during interactive analysis work. - **ASTRA** = pure specification: schema, validation, prior insights & findings, evidence verification, helpers, minimal CLI -- **lightcone-cli** = agentic layer: Claude Code skills, project scaffolding, **Snakemake-based execution**, container builds, telemetry +- **lightcone-cli** = agentic layer: Claude Code skills, project scaffolding, **Snakemake-based execution**, container builds lightcone-cli depends on ASTRA. The `astra` CLI handles spec operations; the `lc` CLI handles execution and agent operations. @@ -71,7 +71,6 @@ claude/lightcone/ # Claude plugin source — force-included into the w ├── agents/ # lc-extractor ├── guides/ # astra-reference, lightcone-cli-reference, ui-brand ├── templates/ # Project CLAUDE.md template -├── hooks/ # Langfuse telemetry hooks (Python) └── scripts/ # Session hooks (bash): venv activation, validate-on-save, status display tests/ # pytest — mirrors src/ structure diff --git a/README.md b/README.md index 5f104dda..b4c0db3d 100644 --- a/README.md +++ b/README.md @@ -116,10 +116,6 @@ Every materialized output gets a `.lightcone-manifest.json` capturing `code_vers `lc export wrroc` walks your manifests and emits a [Workflow Run RO-Crate](https://www.researchobject.org/workflow-run-crate/) bundle — a JSON-LD package readable by WorkflowHub, Zenodo's RO-Crate plugin, and any RO-Crate-aware archive. Each materialization becomes a `CreateAction` with `object` (inputs, including upstream datasets via stable `@id` references) and `result` (the output dataset); decisions become `PropertyValue` entities; the workflow is captured as a `ComputationalWorkflow`. The lightcone manifest format on disk is unchanged — WRROC is the publication view, generated on demand. Use `--metadata-only` to ship only the provenance graph (useful when data files are huge), `--zip` to package the bundle for upload, or `-u ` to restrict to specific universes. -### Telemetry - -Claude Code sessions are traced to Langfuse with full conversation structure, tool calls, and git commit linking. Disable with `TRACE_TO_LANGFUSE=false` in `.claude/settings.local.json`. - ## License BSD 3-Clause diff --git a/claude/lightcone/hooks/langfuse_git_commit_hook.py b/claude/lightcone/hooks/langfuse_git_commit_hook.py deleted file mode 100755 index f672f5cf..00000000 --- a/claude/lightcone/hooks/langfuse_git_commit_hook.py +++ /dev/null @@ -1,303 +0,0 @@ -#!/usr/bin/env python3 -# Copied from langfuse-cli (https://github.com/langfuse/langfuse-cli) -# Copyright (c) 2023-2026 Langfuse GmbH — MIT License -# See NOTICE file in the project root for full license text. -""" -Claude Code PostToolUse hook for git commit detection. - -Fires after Bash tool use, checks if a git commit occurred, and records -metadata in a trace manifest. Installed by langfuse-cli. -""" - -from __future__ import annotations - -import json -import os -import re -import sys -import uuid -from datetime import datetime, timezone -from pathlib import Path -from typing import Any - -try: - from langfuse_utils import ( - atomic_write_json, - build_github_commit_url, - debug, - extract_session_id, - get_remote_url, - read_hook_payload, - read_last_trace, - resolve_repo_root, - run_git, - tracing_enabled, - write_trace_manifest, - ) -except ImportError: - sys.exit(0) - -# Detect git commit in simple and chained shell commands: -# - git commit -m "..." -# - cd repo && git commit -# - VAR=1 git -C repo commit -GIT_COMMIT_RE = re.compile( - r"(?:^|&&|\|\||;)\s*(?:[A-Za-z_][A-Za-z0-9_]*=\S+\s+)*git(?:\s+-C\s+\S+)?\s+commit(?:\s|$)" -) - - -def _to_int(value: Any) -> int | None: - if isinstance(value, bool): - return int(value) - if isinstance(value, int): - return value - if isinstance(value, str): - stripped = value.strip() - if stripped and stripped.lstrip("-").isdigit(): - return int(stripped) - return None - - -def _command_succeeded(payload: dict) -> bool | None: - for key in ( - "exit_code", - "exitCode", - "status", - "status_code", - "tool_exit_code", - "toolExitCode", - ): - if key not in payload: - continue - code = _to_int(payload.get(key)) - if code is not None: - return code == 0 - - for key in ("success", "ok"): - if key in payload and isinstance(payload[key], bool): - return payload[key] - - result = payload.get("tool_result") - if isinstance(result, dict): - return _command_succeeded(result) - - return None - - -def _extract_tool_name(payload: dict) -> str: - value = payload.get("tool_name") or payload.get("toolName") - return value if isinstance(value, str) else "" - - -def _extract_command(payload: dict) -> str: - tool_input = payload.get("tool_input") - if not isinstance(tool_input, dict): - tool_input = payload.get("toolInput") - - if isinstance(tool_input, dict): - command = tool_input.get("command") - if isinstance(command, str): - return command - - command = payload.get("command") - if isinstance(command, str): - return command - - return "" - - -def _looks_like_git_commit_command(command: str) -> bool: - return bool(GIT_COMMIT_RE.search(command)) - - -def _find_repo_root(payload: dict) -> Path: - cwd = payload.get("cwd") - if not isinstance(cwd, str) or not cwd.strip(): - cwd = os.getcwd() - - root = resolve_repo_root(Path(cwd)) - if root: - return root - return Path(cwd).expanduser().resolve() - - -def _head_changed_from_orig_head(repo_root: Path, head_sha: str) -> bool: - orig_head = run_git(repo_root, ["rev-parse", "ORIG_HEAD"]) - if orig_head and orig_head == head_sha: - return False - return True - - -def _extract_host(trace_url: str | None) -> str | None: - if not trace_url or "://" not in trace_url: - return None - before_trace = trace_url.split("/trace/")[0] - return before_trace.rstrip("/") if before_trace else None - - -def _write_agent_trace_record( - repo_root: Path, - commit_sha: str, - trace_url: str | None, - session_id: str, -) -> None: - try: - changed_files = run_git(repo_root, ["diff-tree", "--no-commit-id", "--name-only", "-r", commit_sha]) - if not changed_files: - return - - files = [] - conversation_entry: dict[str, Any] = { - "contributor": {"type": "ai"}, - "ranges": [{"start_line": 1, "end_line": 1}], - } - if trace_url: - conversation_entry["url"] = trace_url - related: list[dict[str, str]] = [] - if trace_url: - related.append({"type": "trace", "url": trace_url}) - if related: - conversation_entry["related"] = related - - for fname in changed_files.strip().splitlines(): - fname = fname.strip() - if not fname: - continue - fpath = repo_root / fname - line_count = 1 - if fpath.is_file(): - try: - with open(fpath, "rb") as fh: - line_count = max(1, sum(1 for _ in fh)) - except Exception: - pass - conv = dict(conversation_entry) - conv["ranges"] = [{"start_line": 1, "end_line": line_count}] - files.append({"path": fname, "conversations": [conv]}) - - if not files: - return - - record: dict[str, Any] = { - "version": "0.1.0", - "id": str(uuid.uuid4()), - "timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), - "vcs": {"type": "git", "revision": commit_sha}, - "tool": {"name": "claude-code"}, - "files": files, - "metadata": {"sessionId": session_id}, - } - - traces_dir = repo_root / ".langfuse" / "traces" - record_path = traces_dir / f"agent-trace-{commit_sha[:12]}.json" - atomic_write_json(record_path, record) - debug(f"Wrote Agent Trace record to {record_path}") - except Exception as exc: - debug(f"_write_agent_trace_record failed: {exc}") - - -def main() -> int: - try: - if not tracing_enabled(): - return 0 - - payload = read_hook_payload() - if _extract_tool_name(payload) != "Bash": - return 0 - - command = _extract_command(payload).strip() - if not command or not _looks_like_git_commit_command(command): - return 0 - - command_success = _command_succeeded(payload) - if command_success is False: - return 0 - - creds_ok = ( - os.getenv("LANGFUSE_PUBLIC_KEY") - and os.getenv("LANGFUSE_SECRET_KEY") - ) - if not creds_ok: - return 0 - - host = ( - os.getenv("LANGFUSE_BASE_URL") - or os.getenv("LANGFUSE_HOST") - or "https://cloud.langfuse.com" - ).rstrip("/") - - repo_root = _find_repo_root(payload) - - session_id = extract_session_id(payload) - - # Read last-trace with session validation to avoid cross-session confusion - session_data = read_last_trace(expected_session_id=session_id) - - # Fallback: try per-repo session file (legacy) - if not session_data: - legacy_session_path = repo_root / ".langfuse" / "current-session.json" - if legacy_session_path.exists(): - try: - data = json.loads(legacy_session_path.read_text(encoding="utf-8")) - if isinstance(data, dict) and data.get("trace_id"): - if not session_id or data.get("session_id") == session_id: - session_data = data - except Exception: - pass - - if not session_data: - return 0 - - session_id = session_data.get("session_id", session_id or "") - trace_id = session_data.get("trace_id") - trace_url = session_data.get("trace_url") - - if not isinstance(session_id, str) or not session_id: - return 0 - if not isinstance(trace_id, str) or not trace_id: - return 0 - if not isinstance(trace_url, str) or not trace_url: - trace_url = f"{host}/trace/{trace_id}" - - commit_sha = run_git(repo_root, ["rev-parse", "HEAD"]) - if not commit_sha: - return 0 - - if command_success is None and not _head_changed_from_orig_head(repo_root, commit_sha): - return 0 - - branch = run_git(repo_root, ["rev-parse", "--abbrev-ref", "HEAD"]) or "unknown" - commit_message = run_git(repo_root, ["log", "-1", "--pretty=%s"]) or "" - - remote_url = get_remote_url(repo_root) - commit_url = build_github_commit_url(remote_url, commit_sha) - - metadata = { - "commit_sha": commit_sha, - "commit_url": commit_url, - "commit_message": commit_message, - "branch": branch, - "remote_url": remote_url, - "session_id": session_id, - "source": "claude-code", - } - - git_metadata = { - "git_commit_sha": commit_sha, - "git_commit_url": commit_url, - "git_remote_url": remote_url, - } - write_trace_manifest(repo_root, session_id, trace_id, _extract_host(trace_url) or host, git_metadata) - - _write_agent_trace_record( - repo_root, commit_sha, trace_url, session_id, - ) - - return 0 - except Exception as exc: - debug(str(exc)) - return 0 - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/claude/lightcone/hooks/langfuse_hook.py b/claude/lightcone/hooks/langfuse_hook.py deleted file mode 100755 index d8213fa8..00000000 --- a/claude/lightcone/hooks/langfuse_hook.py +++ /dev/null @@ -1,894 +0,0 @@ -#!/usr/bin/env python3 -# Copied from langfuse-cli (https://github.com/langfuse/langfuse-cli) -# Copyright (c) 2023-2026 Langfuse GmbH — MIT License -# See NOTICE file in the project root for full license text. -""" -Claude Code Stop hook -> Langfuse tracing. - -Reads the conversation transcript incrementally and emits turns to Langfuse. -Installed by langfuse-cli. -""" - -import hashlib -import json -import os -import re -import sys -import time -import time as _time_mod -from dataclasses import dataclass, field -from datetime import datetime, timezone -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple - -try: - from langfuse_utils import ( - DEBUG, - LAST_TRACE_FILE, - LOCK_FILE, - MAX_CHARS, - STATE_DIR, - STATE_FILE, - debug, - error, - extract_session_id, - extract_transcript_path, - get_claude_user_email, - get_git_metadata, - get_langfuse_credentials, - info, - read_hook_payload, - read_last_trace, - resolve_repo_root_with_fallback, - save_last_trace, - tracing_enabled, - write_trace_manifest, - ) -except ImportError: - sys.exit(0) - -try: - from langfuse import Langfuse, propagate_attributes -except Exception: - sys.exit(0) - - -# --------------- State locking (best-effort) --------------- -class FileLock: - def __init__(self, path: Path, timeout_s: float = 2.0): - self.path = path - self.timeout_s = timeout_s - self._fh = None - - def __enter__(self): - STATE_DIR.mkdir(parents=True, exist_ok=True) - self._fh = open(self.path, "a+", encoding="utf-8") - try: - import fcntl - - deadline = time.time() + self.timeout_s - while True: - try: - fcntl.flock(self._fh.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB) - break - except BlockingIOError: - if time.time() > deadline: - break - time.sleep(0.05) - except Exception: - pass - return self - - def __exit__(self, exc_type, exc, tb): - try: - import fcntl - - fcntl.flock(self._fh.fileno(), fcntl.LOCK_UN) - except Exception: - pass - try: - self._fh.close() - except Exception: - pass - - -def load_state() -> Dict[str, Any]: - try: - if not STATE_FILE.exists(): - return {} - return json.loads(STATE_FILE.read_text(encoding="utf-8")) - except Exception: - return {} - - -def save_state(state: Dict[str, Any]) -> None: - try: - STATE_DIR.mkdir(parents=True, exist_ok=True) - tmp = STATE_FILE.with_suffix(".tmp") - tmp.write_text(json.dumps(state, indent=2, sort_keys=True), encoding="utf-8") - os.replace(tmp, STATE_FILE) - except Exception as e: - debug(f"save_state failed: {e}") - - -def state_key(session_id: str, transcript_path: str) -> str: - raw = f"{session_id}::{transcript_path}" - return hashlib.sha256(raw.encode("utf-8")).hexdigest() - - -# --------------- Transcript parsing helpers --------------- -def get_content(msg: Dict[str, Any]) -> Any: - if not isinstance(msg, dict): - return None - if "message" in msg and isinstance(msg.get("message"), dict): - return msg["message"].get("content") - return msg.get("content") - - -def get_role(msg: Dict[str, Any]) -> Optional[str]: - t = msg.get("type") - if t in ("user", "assistant"): - return t - m = msg.get("message") - if isinstance(m, dict): - r = m.get("role") - if r in ("user", "assistant"): - return r - return None - - -def is_tool_result(msg: Dict[str, Any]) -> bool: - role = get_role(msg) - if role != "user": - return False - content = get_content(msg) - if isinstance(content, list): - return any(isinstance(x, dict) and x.get("type") == "tool_result" for x in content) - return False - - -def iter_tool_results(content: Any) -> List[Dict[str, Any]]: - out: List[Dict[str, Any]] = [] - if isinstance(content, list): - for x in content: - if isinstance(x, dict) and x.get("type") == "tool_result": - out.append(x) - return out - - -def iter_tool_uses(content: Any) -> List[Dict[str, Any]]: - out: List[Dict[str, Any]] = [] - if isinstance(content, list): - for x in content: - if isinstance(x, dict) and x.get("type") == "tool_use": - out.append(x) - return out - - -def extract_text(content: Any) -> str: - if isinstance(content, str): - return content - if isinstance(content, list): - parts: List[str] = [] - for x in content: - if isinstance(x, dict) and x.get("type") == "text": - parts.append(x.get("text", "")) - elif isinstance(x, str): - parts.append(x) - return "\n".join([p for p in parts if p]) - return "" - - -def truncate_text(s: str, max_chars: int = MAX_CHARS) -> Tuple[str, Dict[str, Any]]: - if s is None: - return "", {"truncated": False, "orig_len": 0} - orig_len = len(s) - if orig_len <= max_chars: - return s, {"truncated": False, "orig_len": orig_len} - head = s[:max_chars] - return head, { - "truncated": True, - "orig_len": orig_len, - "kept_len": len(head), - "sha256": hashlib.sha256(s.encode("utf-8")).hexdigest(), - } - - -def get_model(msg: Dict[str, Any]) -> str: - m = msg.get("message") - if isinstance(m, dict): - return m.get("model") or "claude" - return "claude" - - -def get_message_id(msg: Dict[str, Any]) -> Optional[str]: - m = msg.get("message") - if isinstance(m, dict): - mid = m.get("id") - if isinstance(mid, str) and mid: - return mid - return None - - -def parse_timestamp(msg: Dict[str, Any]) -> Optional[datetime]: - ts = msg.get("timestamp") - if isinstance(ts, str): - try: - return datetime.fromisoformat(ts.replace("Z", "+00:00")) - except Exception: - return None - return None - - -def get_version(msg: Dict[str, Any]) -> Optional[str]: - v = msg.get("version") - return v if isinstance(v, str) and v else None - - -def _duration_ns(start: Optional[datetime], end: Optional[datetime]) -> Optional[int]: - """Compute duration in nanoseconds between two transcript timestamps.""" - if not start or not end: - return None - delta = (end - start).total_seconds() - if delta < 0: - return None - return int(delta * 1_000_000_000) - - -def extract_bash_command_prefix(tool_input: Any) -> Optional[str]: - """Extract the first command word from a Bash tool input.""" - if isinstance(tool_input, dict): - cmd = tool_input.get("command", "") - elif isinstance(tool_input, str): - cmd = tool_input - else: - return None - if not isinstance(cmd, str) or not cmd.strip(): - return None - tokens = re.split(r"[\s|;&]", cmd.strip()) - first_word = tokens[0] if tokens else None - return first_word if first_word else None - - -# --------------- Incremental reader --------------- -@dataclass -class SessionState: - offset: int = 0 - buffer: str = "" - turn_count: int = 0 - - -def load_session_state(global_state: Dict[str, Any], key: str) -> SessionState: - s = global_state.get(key, {}) - return SessionState( - offset=int(s.get("offset", 0)), - buffer=str(s.get("buffer", "")), - turn_count=int(s.get("turn_count", 0)), - ) - - -def write_session_state(global_state: Dict[str, Any], key: str, ss: SessionState) -> None: - global_state[key] = { - "offset": ss.offset, - "buffer": ss.buffer, - "turn_count": ss.turn_count, - "updated": datetime.now(timezone.utc).isoformat(), - } - - -def read_new_jsonl(transcript_path: Path, ss: SessionState) -> Tuple[List[Dict[str, Any]], SessionState]: - if not transcript_path.exists(): - return [], ss - - try: - file_size = transcript_path.stat().st_size - if ss.offset > file_size: - # Transcript may have been rotated/truncated; restart incremental read. - ss.offset = 0 - ss.buffer = "" - with open(transcript_path, "rb") as f: - f.seek(ss.offset) - chunk = f.read() - new_offset = f.tell() - except Exception as e: - debug(f"read_new_jsonl failed: {e}") - return [], ss - - if not chunk: - return [], ss - - try: - text = chunk.decode("utf-8", errors="replace") - except Exception: - text = chunk.decode(errors="replace") - - combined = ss.buffer + text - lines = combined.split("\n") - ss.buffer = lines[-1] - ss.offset = new_offset - - msgs: List[Dict[str, Any]] = [] - for line in lines[:-1]: - line = line.strip() - if not line: - continue - try: - msgs.append(json.loads(line)) - except Exception: - continue - - return msgs, ss - - -# --------------- Turn assembly --------------- -@dataclass -class ToolResult: - content: Any - is_error: bool = False - timestamp: Optional[datetime] = None - - -@dataclass -class Turn: - user_msg: Dict[str, Any] - assistant_msgs: List[Dict[str, Any]] - tool_results_by_id: Dict[str, ToolResult] - user_timestamp: Optional[datetime] = None - first_assistant_timestamp: Optional[datetime] = None - last_assistant_timestamp: Optional[datetime] = None - tool_use_timestamps: Dict[str, Optional[datetime]] = field(default_factory=dict) - claude_code_version: Optional[str] = None - - -def build_turns(messages: List[Dict[str, Any]]) -> List[Turn]: - """Assemble a flat list of JSONL transcript messages into conversation turns. - - A *turn* groups a single user message with all the assistant messages that - follow it (including intermediate tool-use / tool-result exchanges) until - the next user message arrives. - - The function handles the multi-step structure produced by Claude Code: - - * One or more ``"user"`` messages may carry ``tool_result`` content blocks - that belong to the *preceding* assistant turn (i.e. the results of tool - calls the assistant requested). These are attached to the current turn, - not treated as a new user turn. - * Multiple assistant messages are issued during a single turn (one per - tool-use / response cycle). All of them are collected in - ``Turn.assistant_msgs``; only the *latest* message with a given - ``message_id`` is kept to avoid duplicates from streaming. - * When a new user message that is *not* purely tool-result content arrives, - the accumulated state is flushed into a ``Turn`` object and a new turn - begins. - - Args: - messages: Raw JSONL objects from a Claude Code transcript file. Each - entry must have at least a ``"role"`` key (``"user"`` or - ``"assistant"``). Entries without a recognized role are silently - skipped. - - Returns: - Ordered list of :class:`Turn` objects, one per user-initiated exchange. - """ - turns: List[Turn] = [] - current_user: Optional[Dict[str, Any]] = None - user_ts: Optional[datetime] = None - assistant_order: List[str] = [] - assistant_latest: Dict[str, Dict[str, Any]] = {} - assistant_timestamps: Dict[str, Optional[datetime]] = {} - tool_results_by_id: Dict[str, ToolResult] = {} - tool_use_timestamps: Dict[str, Optional[datetime]] = {} - version: Optional[str] = None - - def flush_turn(): - nonlocal current_user, user_ts, assistant_order, assistant_latest - nonlocal assistant_timestamps, tool_results_by_id, tool_use_timestamps - nonlocal turns, version - if current_user is None: - return - if not assistant_latest: - # No assistant response yet. If there are tool_results (from - # a denial recorded as a user-side tool_result), still skip - # because we have no assistant content to show. - return - ordered_mids = [mid for mid in assistant_order if mid in assistant_latest] - assistants = [assistant_latest[mid] for mid in ordered_mids] - first_ts = assistant_timestamps.get(ordered_mids[0]) if ordered_mids else None - last_ts = assistant_timestamps.get(ordered_mids[-1]) if ordered_mids else None - turns.append(Turn( - user_msg=current_user, - assistant_msgs=assistants, - tool_results_by_id=dict(tool_results_by_id), - user_timestamp=user_ts, - first_assistant_timestamp=first_ts, - last_assistant_timestamp=last_ts, - tool_use_timestamps=dict(tool_use_timestamps), - claude_code_version=version, - )) - - for msg_idx, msg in enumerate(messages): - msg_version = get_version(msg) - if msg_version: - version = msg_version - - role = get_role(msg) - msg_type = msg.get("type", "?") - debug(f"build_turns[{msg_idx}]: type={msg_type} role={role} is_tool_result={is_tool_result(msg)}") - - if is_tool_result(msg): - tr_ts = parse_timestamp(msg) - for tr in iter_tool_results(get_content(msg)): - tid = tr.get("tool_use_id") - if tid: - tool_results_by_id[str(tid)] = ToolResult( - content=tr.get("content"), - is_error=bool(tr.get("is_error", False)), - timestamp=tr_ts, - ) - continue - - if role == "user": - flush_turn() - current_user = msg - user_ts = parse_timestamp(msg) - assistant_order = [] - assistant_latest = {} - assistant_timestamps = {} - tool_results_by_id = {} - tool_use_timestamps = {} - continue - - if role == "assistant": - if current_user is None: - continue - mid = get_message_id(msg) or f"noid:{len(assistant_order)}" - if mid not in assistant_latest: - assistant_order.append(mid) - assistant_latest[mid] = msg - assistant_timestamps[mid] = parse_timestamp(msg) - for tu in iter_tool_uses(get_content(msg)): - tid = tu.get("id") - if tid: - tool_use_timestamps[str(tid)] = parse_timestamp(msg) - continue - - flush_turn() - return turns - - -# --------------- Langfuse emit --------------- -def _tool_calls_from_assistants(assistant_msgs: List[Dict[str, Any]]) -> List[Dict[str, Any]]: - calls: List[Dict[str, Any]] = [] - for am in assistant_msgs: - for tu in iter_tool_uses(get_content(am)): - tid = tu.get("id") or "" - raw_input = tu.get("input") if isinstance(tu.get("input"), (dict, list, str, int, float, bool)) else {} - calls.append({ - "id": str(tid), - "name": tu.get("name") or "unknown", - "input": raw_input, - }) - return calls - - -def _tool_calls_to_chatml(tool_calls: List[Dict[str, Any]]) -> List[Dict[str, Any]]: - """Convert internal tool call list to OpenAI ChatML tool_calls format.""" - out: List[Dict[str, Any]] = [] - for tc in tool_calls: - args = tc["input"] - args_str = args if isinstance(args, str) else json.dumps(args, ensure_ascii=False) - out.append({ - "id": tc["id"], - "type": "function", - "function": { - "name": tc["name"], - "arguments": args_str, - }, - }) - return out - - -def _merge_metadata(base: Dict[str, Any], extra: Dict[str, Any]) -> Dict[str, Any]: - merged = dict(base) - for key, value in extra.items(): - if value is not None and value != "": - merged[key] = value - return merged - - -def _build_propagated_metadata(git_metadata: Dict[str, Any]) -> Dict[str, str]: - out: Dict[str, str] = {} - commit_url = git_metadata.get("git_commit_url") - if isinstance(commit_url, str) and commit_url and len(commit_url) <= 200: - out["github_commit_url"] = commit_url - commit_sha = git_metadata.get("git_commit_sha") - if isinstance(commit_sha, str) and commit_sha: - out["commit_sha"] = commit_sha - return out - - -def emit_turn( - langfuse: Langfuse, - session_id: str, - turn_num: int, - turn: Turn, - transcript_path: Path, - pre_trace_id: Optional[str] = None, - git_metadata: Optional[Dict[str, Any]] = None, - propagated_metadata: Optional[Dict[str, str]] = None, - user_id: Optional[str] = None, -) -> Optional[str]: - """Emit a single conversation turn to Langfuse as a trace + generation span. - - Each call creates: - - * A **trace** (``langfuse.trace``) scoped to *session_id*. If - *pre_trace_id* is supplied (from the session-init hook) and this is - the first turn (``turn_num == 1``), the trace reuses that ID so the - full session appears as one trace in the Langfuse UI. - * A **generation span** (``trace.generation``) that carries the full - ChatML-formatted input/output, model name, tool calls with their - outputs, and timing metadata. - - Tool calls present in the assistant messages are extracted and added to - the generation span in OpenAI ChatML format (``tool_calls`` key). - Tool results (from subsequent ``"user"`` role messages) are looked up - by tool-call ID in ``turn.tool_results_by_id`` and attached as - ``output`` fields on each tool call. - - Args: - langfuse: An authenticated :class:`langfuse.Langfuse` client instance. - session_id: The Claude Code session identifier used to group traces. - turn_num: 1-based index of this turn within the session. - turn: The assembled :class:`Turn` object to emit. - transcript_path: Path to the transcript file (stored as metadata). - pre_trace_id: Deterministic trace ID from the session-init hook. - When provided and ``turn_num == 1``, the trace is created with - this ID so it links to the pre-session trace entry. - git_metadata: Dict of git context (commit SHA, GitHub URL, branch). - Merged into span metadata when present. - propagated_metadata: Extra string metadata to propagate from a prior - turn's trace (e.g. ``github_commit_url``). - user_id: The Claude user's email address (from ``~/.claude.json``). - Attached to the trace for per-user analytics in Langfuse. - - Returns: - The trace ID string if the emit succeeded, ``None`` on any error. - """ - user_text_raw = extract_text(get_content(turn.user_msg)) - user_text, user_text_meta = truncate_text(user_text_raw) - - last_assistant = turn.assistant_msgs[-1] - assistant_text_raw = extract_text(get_content(last_assistant)) - assistant_text, assistant_text_meta = truncate_text(assistant_text_raw) - - model = get_model(turn.assistant_msgs[0]) - tool_calls = _tool_calls_from_assistants(turn.assistant_msgs) - - for c in tool_calls: - tid = c["id"] - if tid and tid in turn.tool_results_by_id: - tr = turn.tool_results_by_id[tid] - out_raw = tr.content - out_str = out_raw if isinstance(out_raw, str) else json.dumps(out_raw, ensure_ascii=False) - out_trunc, out_meta = truncate_text(out_str) - c["output"] = out_trunc - c["output_meta"] = out_meta - c["is_error"] = tr.is_error - else: - c["output"] = None - c["is_error"] = True - - chatml_tool_calls = _tool_calls_to_chatml(tool_calls) - - # ChatML-formatted input (OpenAI-style request body) - generation_input: Dict[str, Any] = { - "model": model, - "messages": [{"role": "user", "content": user_text}], - } - - # ChatML-formatted output (assistant message with optional tool_calls) - generation_output: Dict[str, Any] = { - "role": "assistant", - "content": assistant_text, - } - if chatml_tool_calls: - generation_output["tool_calls"] = chatml_tool_calls - - span_input: Dict[str, Any] = { - "model": model, - "messages": [{"role": "user", "content": user_text}], - } - span_output: Dict[str, Any] = dict(generation_output) - - span_metadata: Dict[str, Any] = { - "source": "claude-code", - "session_id": session_id, - "turn_number": turn_num, - "transcript_path": str(transcript_path), - "user_text": user_text_meta, - } - if turn.claude_code_version: - span_metadata["claude_code_version"] = turn.claude_code_version - if git_metadata: - span_metadata = _merge_metadata(span_metadata, git_metadata) - - propagate_kwargs: Dict[str, Any] = { - "session_id": session_id, - "trace_name": f"Claude Code - Turn {turn_num}", - "tags": ["claude-code"], - } - if user_id: - propagate_kwargs["user_id"] = user_id - if propagated_metadata: - propagate_kwargs["metadata"] = propagated_metadata - - # Compute durations in nanoseconds from transcript timestamps - span_dur_ns = _duration_ns(turn.user_timestamp, turn.last_assistant_timestamp) - gen_dur_ns = _duration_ns(turn.first_assistant_timestamp, turn.last_assistant_timestamp) - - with propagate_attributes(**propagate_kwargs): - if pre_trace_id: - obs_kwargs: Dict[str, Any] = { - "as_type": "span", - "name": f"Claude Code - Turn {turn_num}", - "input": span_input, - "metadata": span_metadata, - "trace_context": {"trace_id": pre_trace_id}, - } - try: - span_ctx = langfuse.start_as_current_observation(**obs_kwargs) - except TypeError as exc: - if "trace_context" in str(exc): - obs_kwargs.pop("trace_context", None) - span_ctx = langfuse.start_as_current_observation(**obs_kwargs) - else: - raise - else: - span_ctx = langfuse.start_as_current_span( - name=f"Claude Code - Turn {turn_num}", - input=span_input, - metadata=span_metadata, - ) - - span_start_ns = _time_mod.time_ns() - - with span_ctx as trace_span: - gen_metadata = _merge_metadata({ - "assistant_text": assistant_text_meta, - "tool_count": len(tool_calls), - }, git_metadata or {}) - if turn.claude_code_version: - gen_metadata["claude_code_version"] = turn.claude_code_version - - gen_start_ns = _time_mod.time_ns() - gen_obs = langfuse.start_observation( - name="Claude Response", - as_type="generation", - model=model, - input=generation_input, - output=generation_output, - metadata=gen_metadata, - ) - if gen_dur_ns is not None: - gen_obs.end(end_time=gen_start_ns + gen_dur_ns) - else: - gen_obs.end() - - for tc in tool_calls: - # ChatML-formatted tool input (assistant's tool call) - chatml_tc = _tool_calls_to_chatml([tc])[0] - tool_chatml_input: Dict[str, Any] = { - "role": "assistant", - "tool_calls": [chatml_tc], - } - - # ChatML-formatted tool output (tool result message) - tool_chatml_output: Optional[Dict[str, Any]] = None - if tc.get("output") is not None: - tool_chatml_output = { - "role": "tool", - "tool_call_id": tc["id"], - "content": tc["output"], - } - - # Observation name: include bash command prefix for Bash tools - obs_name = f"Tool: {tc['name']}" - if tc["name"] == "Bash": - prefix = extract_bash_command_prefix(tc["input"]) - if prefix: - obs_name = f"Tool: Bash ({prefix})" - - tool_metadata = _merge_metadata({ - "tool_name": tc["name"], - "tool_id": tc["id"], - "output_meta": tc.get("output_meta"), - }, git_metadata or {}) - if turn.claude_code_version: - tool_metadata["claude_code_version"] = turn.claude_code_version - - # Level for denied/failed tools - level_kwargs: Dict[str, Any] = {} - if tc.get("is_error") or tc.get("output") is None: - level_kwargs["level"] = "ERROR" - level_kwargs["status_message"] = "Tool execution denied or failed" - - tool_start_ns = _time_mod.time_ns() - tool_obs = langfuse.start_observation( - name=obs_name, - as_type="tool", - input=tool_chatml_input, - output=tool_chatml_output, - metadata=tool_metadata, - **level_kwargs, - ) - - tu_ts = turn.tool_use_timestamps.get(tc["id"]) - tr_obj = turn.tool_results_by_id.get(tc["id"]) - tool_dur_ns = _duration_ns(tu_ts, tr_obj.timestamp if tr_obj else None) - if tool_dur_ns is not None: - tool_obs.end(end_time=tool_start_ns + tool_dur_ns) - else: - tool_obs.end() - - trace_span.update(output=span_output) - - # Set span end_time based on transcript duration - if span_dur_ns is not None: - try: - trace_span.end(end_time=span_start_ns + span_dur_ns) - except Exception: - pass - - return getattr(trace_span, "trace_id", None) - - -# --------------- Main --------------- -def main() -> int: - start = time.time() - debug("Hook started") - - if not tracing_enabled(): - return 0 - - creds = get_langfuse_credentials() - if not creds: - return 0 - - payload = read_hook_payload() - session_id = extract_session_id(payload) - transcript_path = extract_transcript_path(payload) - - if not session_id or not transcript_path: - debug("Missing session_id or transcript_path from hook payload; exiting.") - return 0 - - if not transcript_path.exists(): - debug(f"Transcript path does not exist: {transcript_path}") - return 0 - - cwd = Path(os.getcwd()) - git_metadata = get_git_metadata(transcript_path, cwd) - propagated_metadata = _build_propagated_metadata(git_metadata) - - user_email = get_claude_user_email() - if user_email: - debug(f"Resolved Claude Code user email: {user_email}") - - try: - langfuse = Langfuse( - public_key=creds["public_key"], - secret_key=creds["secret_key"], - host=creds["host"], - ) - except Exception: - return 0 - - pre_trace_id = None - last_trace = read_last_trace(expected_session_id=session_id) - if last_trace: - pre_trace_id = last_trace.get("trace_id") - debug(f"Using pre-generated trace_id: {pre_trace_id}") - - try: - with FileLock(LOCK_FILE): - state = load_state() - key = state_key(session_id, str(transcript_path)) - ss = load_session_state(state, key) - - msgs, ss = read_new_jsonl(transcript_path, ss) - if not msgs: - debug(f"No new messages in transcript (offset={ss.offset})") - write_session_state(state, key, ss) - save_state(state) - return 0 - - debug(f"Read {len(msgs)} new messages from transcript") - turns = build_turns(msgs) - if not turns: - # Log at INFO level to help diagnose missing traces - msg_types = [m.get("type", "?") for m in msgs] - info(f"No turns built from {len(msgs)} messages (types: {msg_types}, session={session_id})") - write_session_state(state, key, ss) - save_state(state) - return 0 - debug(f"Built {len(turns)} turns from messages") - - emitted = 0 - last_trace_id = None - for t in turns: - emitted += 1 - turn_num = ss.turn_count + emitted - # Only bind to the pre-generated trace_id for the very - # first turn of a session (so the commit-message URL - # matches). All subsequent turns get their own traces, - # grouped under the same session_id. - use_trace_id = pre_trace_id if (ss.turn_count == 0 and emitted == 1) else None - try: - tid = emit_turn( - langfuse, - session_id, - turn_num, - t, - transcript_path, - pre_trace_id=use_trace_id, - git_metadata=git_metadata, - propagated_metadata=propagated_metadata, - user_id=user_email, - ) - if tid: - last_trace_id = tid - except Exception as e: - debug(f"emit_turn failed: {e}") - - ss.turn_count += emitted - write_session_state(state, key, ss) - save_state(state) - - effective_trace_id = last_trace_id or pre_trace_id - - # Explicitly stamp git metadata onto the trace so it appears - # in the Langfuse trace-level metadata (propagate_attributes - # only applies to NEW traces; the trace may already exist). - if effective_trace_id and git_metadata: - trace_meta: Dict[str, Any] = {"source": "claude-code"} - commit_url = git_metadata.get("git_commit_url") - if commit_url: - trace_meta["github_commit_url"] = commit_url - commit_sha = git_metadata.get("git_commit_sha") - if commit_sha: - trace_meta["commit_sha"] = commit_sha - try: - langfuse.trace(id=effective_trace_id, metadata=trace_meta) - except Exception as e: - debug(f"trace metadata update failed: {e}") - - try: - langfuse.flush() - except Exception: - pass - - if effective_trace_id: - save_last_trace(session_id, effective_trace_id, creds["host"]) - - repo_root = resolve_repo_root_with_fallback(transcript_path, cwd) - if repo_root and effective_trace_id: - write_trace_manifest(repo_root, session_id, effective_trace_id, creds["host"], git_metadata) - - dur = time.time() - start - info(f"Processed {emitted} turns in {dur:.2f}s (session={session_id})") - return 0 - - except Exception as e: - debug(f"Unexpected failure: {e}") - return 0 - - finally: - try: - langfuse.shutdown() - except Exception: - pass - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/claude/lightcone/hooks/langfuse_prepare_commit_msg.py b/claude/lightcone/hooks/langfuse_prepare_commit_msg.py deleted file mode 100755 index 5921f0ee..00000000 --- a/claude/lightcone/hooks/langfuse_prepare_commit_msg.py +++ /dev/null @@ -1,142 +0,0 @@ -#!/usr/bin/env python3 -# Copied from langfuse-cli (https://github.com/langfuse/langfuse-cli) -# Copyright (c) 2023-2026 Langfuse GmbH — MIT License -# See NOTICE file in the project root for full license text. -""" -prepare-commit-msg hook: appends a Langfuse-Trace trailer to commit messages. -Installed by langfuse-cli. -""" - -from __future__ import annotations - -import os -import sys -from pathlib import Path - -try: - from langfuse_utils import ( - LAST_TRACE_FILE, - read_recent_trace, - resolve_repo_root, - tracing_enabled, - ) -except ImportError: - LAST_TRACE_FILE = Path.home() / ".claude" / "state" / "langfuse_last_trace.json" - import json - from datetime import datetime, timezone - - def tracing_enabled() -> bool: - return os.environ.get("TRACE_TO_LANGFUSE", "").lower() == "true" - - def read_recent_trace( - path: Path, max_age_hours: float, expected_session_id: str | None = None - ) -> dict | None: - if not path.exists(): - return None - try: - data = json.loads(path.read_text(encoding="utf-8")) - except Exception: - return None - if not isinstance(data, dict): - return None - trace_url = data.get("trace_url") - trace_id = data.get("trace_id") - if not isinstance(trace_url, str) or not trace_url: - return None - if not isinstance(trace_id, str) or not trace_id: - return None - if expected_session_id and data.get("session_id") != expected_session_id: - return None - updated_at = data.get("updated_at") - if isinstance(updated_at, str): - try: - ts = datetime.fromisoformat(updated_at) - if ts.tzinfo is None: - ts = ts.replace(tzinfo=timezone.utc) - age_hours = (datetime.now(timezone.utc) - ts).total_seconds() / 3600 - if age_hours > max_age_hours: - return None - except Exception: - return None - else: - return None - return data - - def resolve_repo_root(search_path: Path) -> Path | None: - _ = search_path - return None - - -MAX_AGE_HOURS = 4 -SESSION_TRAILER_KEY = "Langfuse-Session" - - -def _append_trailers(content: str, trailers: list[str]) -> str: - """Append one or more trailers to a commit message, preserving existing trailer blocks.""" - lines = content.rstrip("\n").split("\n") - - has_existing_trailers = False - for line in reversed(lines): - stripped = line.strip() - if not stripped: - break - if ": " in stripped and not stripped.startswith("#"): - has_existing_trailers = True - break - else: - break - - joined = "\n".join(trailers) - if has_existing_trailers: - return "\n".join(lines) + "\n" + joined + "\n" - return "\n".join(lines) + "\n\n" + joined + "\n" - - -def main() -> int: - try: - if not tracing_enabled(): - return 0 - - if len(sys.argv) < 2: - return 0 - - msg_file = sys.argv[1] - commit_source = sys.argv[2] if len(sys.argv) > 2 else "" - - if commit_source in ("merge", "squash"): - return 0 - - # Prefer the global last-trace file (updated eagerly by the PreToolUse - # session-init hook for the *current* session) over the per-repo - # current-session file (only updated by the Stop hook, which runs - # *after* the commit finishes). - data = read_recent_trace(LAST_TRACE_FILE, MAX_AGE_HOURS) - if not data: - repo_root = resolve_repo_root(Path.cwd()) or Path.cwd() - local_trace_path = repo_root / ".langfuse" / "current-session.json" - data = read_recent_trace(local_trace_path, MAX_AGE_HOURS) - if not data: - return 0 - - session_url = data.get("session_url") - if not isinstance(session_url, str) or not session_url: - return 0 - - try: - content = Path(msg_file).read_text(encoding="utf-8") - except Exception: - return 0 - - if f"{SESSION_TRAILER_KEY}:" in content: - return 0 - - result = _append_trailers(content, [f"{SESSION_TRAILER_KEY}: {session_url}"]) - Path(msg_file).write_text(result, encoding="utf-8") - return 0 - - except Exception: - return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/claude/lightcone/hooks/langfuse_session_init_hook.py b/claude/lightcone/hooks/langfuse_session_init_hook.py deleted file mode 100755 index e167c50c..00000000 --- a/claude/lightcone/hooks/langfuse_session_init_hook.py +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/bin/env python3 -# Copied from langfuse-cli (https://github.com/langfuse/langfuse-cli) -# Copyright (c) 2023-2026 Langfuse GmbH — MIT License -# See NOTICE file in the project root for full license text. -""" -PreToolUse hook: eagerly initializes the Langfuse trace ID for the current -Claude Code session so that prepare-commit-msg can reference it immediately. - -On the first tool use of a session, this hook: -1. Generates a deterministic trace_id from the session_id -2. Writes it to ~/.claude/state/langfuse_last_trace.json - -Subsequent invocations detect the matching session_id and exit immediately. -Installed by langfuse-cli. -""" - -import hashlib -import os -import sys - -try: - from langfuse_utils import ( - debug, - extract_session_id, - get_langfuse_credentials, - read_hook_payload, - read_last_trace, - save_last_trace, - tracing_enabled, - ) -except ImportError: - sys.exit(0) - - -def main() -> int: - try: - if not tracing_enabled(): - return 0 - - payload = read_hook_payload() - session_id = extract_session_id(payload) - if not session_id: - return 0 - - existing = read_last_trace(expected_session_id=session_id) - if existing: - return 0 - - creds = get_langfuse_credentials() - if not creds: - return 0 - - # Generate a deterministic trace_id from the session_id. - # Prefer the Langfuse SDK's create_trace_id (W3C-compatible 32-char hex) - # with a fallback to SHA-256 for environments without the SDK or older versions. - trace_id = None - try: - from langfuse import Langfuse - - lf = Langfuse( - public_key=creds["public_key"], - secret_key=creds["secret_key"], - host=creds["host"], - ) - trace_id = lf.create_trace_id(seed=session_id) - lf.shutdown() - except Exception: - pass - - if not trace_id: - trace_id = hashlib.sha256(session_id.encode("utf-8")).hexdigest()[:32] - - save_last_trace(session_id, trace_id, creds["host"]) - debug(f"Initialized trace_id {trace_id} for session {session_id}") - - return 0 - - except Exception: - return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/claude/lightcone/hooks/langfuse_utils.py b/claude/lightcone/hooks/langfuse_utils.py deleted file mode 100755 index ca08842f..00000000 --- a/claude/lightcone/hooks/langfuse_utils.py +++ /dev/null @@ -1,457 +0,0 @@ -#!/usr/bin/env python3 -# Copied from langfuse-cli (https://github.com/langfuse/langfuse-cli) -# Copyright (c) 2023-2026 Langfuse GmbH — MIT License -# See NOTICE file in the project root for full license text. -"""Shared utilities for Langfuse Claude Code hooks. - -Installed by langfuse-cli. All hook scripts import from this module. -""" - -import json -import os -import re -import subprocess -import sys -import tempfile -from datetime import datetime, timezone -from pathlib import Path -from typing import Any, Dict, List, Optional - -# --------------- Configuration --------------- -HOOK_DEBUG_ENV = "LANGFUSE_HOOK_DEBUG" -DEBUG = os.environ.get(HOOK_DEBUG_ENV, "").lower() == "true" - -STATE_DIR = Path.home() / ".claude" / "state" -LOG_FILE = STATE_DIR / "langfuse_hook.log" -STATE_FILE = STATE_DIR / "langfuse_state.json" -LOCK_FILE = STATE_DIR / "langfuse_state.lock" -LAST_TRACE_FILE = STATE_DIR / "langfuse_last_trace.json" - -MAX_CHARS = int(os.environ.get("LANGFUSE_HOOK_MAX_CHARS", "20000")) - - -# --------------- Logging --------------- -def _log(level: str, message: str) -> None: - try: - STATE_DIR.mkdir(parents=True, exist_ok=True) - ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - with open(LOG_FILE, "a", encoding="utf-8") as f: - f.write(f"{ts} [{level}] {message}\n") - except Exception: - pass - - -def debug(msg: str) -> None: - if DEBUG: - _log("DEBUG", msg) - - -def info(msg: str) -> None: - _log("INFO", msg) - - -def warn(msg: str) -> None: - _log("WARN", msg) - - -def error(msg: str) -> None: - _log("ERROR", msg) - - -# --------------- Environment --------------- -def tracing_enabled() -> bool: - return os.environ.get("TRACE_TO_LANGFUSE", "").lower() == "true" - - -def get_langfuse_credentials() -> Optional[Dict[str, str]]: - public_key = os.environ.get("LANGFUSE_PUBLIC_KEY") - secret_key = os.environ.get("LANGFUSE_SECRET_KEY") - if not public_key or not secret_key: - return None - host = ( - os.environ.get("LANGFUSE_BASE_URL") - or os.environ.get("LANGFUSE_HOST") - or "https://cloud.langfuse.com" - ).rstrip("/") - return {"public_key": public_key, "secret_key": secret_key, "host": host} - - -# --------------- Hook payload --------------- -def read_hook_payload() -> Dict[str, Any]: - try: - data = sys.stdin.read() - if not data.strip(): - return {} - return json.loads(data) - except Exception: - return {} - - -def extract_session_id(payload: Dict[str, Any]) -> Optional[str]: - return ( - payload.get("sessionId") - or payload.get("session_id") - or (payload.get("session") or {}).get("id") - ) - - -def extract_transcript_path(payload: Dict[str, Any]) -> Optional[Path]: - transcript = ( - payload.get("transcriptPath") - or payload.get("transcript_path") - or (payload.get("transcript") or {}).get("path") - ) - if not transcript: - return None - try: - return Path(transcript).expanduser().resolve() - except Exception: - return None - - -# --------------- Git helpers --------------- -def run_git(cwd: Path, args: List[str]) -> Optional[str]: - try: - output = subprocess.check_output( - ["git", *args], - cwd=str(cwd), - stderr=subprocess.DEVNULL, - text=True, - ) - value = output.strip() - return value or None - except Exception: - return None - - -def resolve_repo_root(search_path: Path) -> Optional[Path]: - cwd = search_path.parent if search_path.is_file() else search_path - root = run_git(cwd, ["rev-parse", "--show-toplevel"]) - if not root: - return None - try: - return Path(root).expanduser().resolve() - except Exception: - return None - - -def first_remote(repo_root: Path) -> Optional[str]: - remotes = run_git(repo_root, ["remote"]) - if not remotes: - return None - for line in remotes.splitlines(): - remote = line.strip() - if remote: - return remote - return None - - -def build_github_commit_url(remote_url: Optional[str], commit_sha: str) -> Optional[str]: - if not remote_url: - return None - remote = remote_url.strip() - if not remote: - return None - patterns = [ - r"^https?://github\.com/(.+?)(?:\.git)?/?$", - r"^git@github\.com:(.+?)(?:\.git)?$", - r"^ssh://git@github\.com/(.+?)(?:\.git)?/?$", - ] - for pattern in patterns: - match = re.match(pattern, remote, re.IGNORECASE) - if match and match.group(1): - return f"https://github.com/{match.group(1)}/commit/{commit_sha}" - return None - - -def get_remote_url(repo_root: Path) -> Optional[str]: - remote_url = run_git(repo_root, ["remote", "get-url", "origin"]) - if not remote_url: - remote_name = first_remote(repo_root) - if remote_name: - remote_url = run_git(repo_root, ["remote", "get-url", remote_name]) - return remote_url - - -def resolve_repo_root_with_fallback(*paths: Path) -> Optional[Path]: - """Try each path in order, returning the first that resolves to a git repo root.""" - for p in paths: - root = resolve_repo_root(p) - if root: - return root - return None - - -def get_git_metadata(*search_paths: Path) -> Dict[str, Any]: - """Build git metadata from the first search path that resolves to a repo. - - Pass multiple candidates (e.g. transcript path, then cwd) so we find - the repo even when the transcript lives outside the working tree. - """ - repo_root = resolve_repo_root_with_fallback(*search_paths) - if not repo_root: - return {} - commit_sha = run_git(repo_root, ["rev-parse", "HEAD"]) - if not commit_sha: - return {} - remote_url = get_remote_url(repo_root) - commit_url = build_github_commit_url(remote_url, commit_sha) - metadata: Dict[str, Any] = { - "git_commit_sha": commit_sha, - "git_remote_url": remote_url, - } - if commit_url: - metadata["git_commit_url"] = commit_url - return metadata - - -# --------------- Claude Code identity --------------- -_cached_user_email: Optional[str] = None - - -def get_claude_user_email() -> Optional[str]: - """Resolve the Claude Code user's email address. - - Checks ~/.claude.json for stored auth data (oauthAccount.emailAddress), - then falls back to running ``claude auth status`` and parsing the output. - """ - global _cached_user_email - if _cached_user_email is not None: - return _cached_user_email or None - - email_keys = ("emailAddress", "email", "userEmail", "user_email") - - # 1) Try ~/.claude.json (Claude Code stores oauthAccount here) - try: - claude_json_path = Path.home() / ".claude.json" - if claude_json_path.exists(): - data = json.loads(claude_json_path.read_text(encoding="utf-8")) - if isinstance(data, dict): - # Check top-level keys - for key in email_keys: - val = data.get(key) - if isinstance(val, str) and "@" in val: - _cached_user_email = val - return val - # Check nested objects (oauthAccount, auth, user, etc.) - for outer in ("oauthAccount", "auth", "oauth", "user", "account"): - nested = data.get(outer) - if isinstance(nested, dict): - for key in email_keys: - val = nested.get(key) - if isinstance(val, str) and "@" in val: - _cached_user_email = val - return val - except Exception: - pass - - # 2) Fallback: ``claude auth status`` - try: - out = subprocess.check_output( - ["claude", "auth", "status"], - stderr=subprocess.DEVNULL, - text=True, - timeout=5, - ) - # Try JSON output first - try: - status = json.loads(out.strip()) - if isinstance(status, dict): - for key in email_keys: - val = status.get(key) - if isinstance(val, str) and "@" in val: - _cached_user_email = val - return val - except (json.JSONDecodeError, ValueError): - pass - # Try plain text: "Logged in as user@example.com" - import re as _re - match = _re.search(r"[\w.+-]+@[\w-]+\.[\w.-]+", out) - if match: - _cached_user_email = match.group(0) - return _cached_user_email - except Exception: - pass - - _cached_user_email = "" - return None - - -# --------------- File I/O --------------- -def atomic_write_json(path: Path, data: dict) -> None: - path.parent.mkdir(parents=True, exist_ok=True) - fd, tmp_path = tempfile.mkstemp(prefix=f"{path.name}.", dir=str(path.parent)) - try: - with os.fdopen(fd, "w", encoding="utf-8") as f: - json.dump(data, f, indent=2) - f.write("\n") - os.replace(tmp_path, path) - finally: - if os.path.exists(tmp_path): - os.unlink(tmp_path) - - -def save_last_trace(session_id: str, trace_id: str, host: str) -> None: - try: - project_id = os.environ.get("LANGFUSE_PROJECT_ID", "") - - data: Dict[str, Any] = { - "session_id": session_id, - "trace_id": trace_id, - "trace_url": f"{host}/trace/{trace_id}", - "host": host, - "updated_at": datetime.now(timezone.utc).isoformat(), - } - - if project_id: - data["project_id"] = project_id - data["session_url"] = f"{host}/project/{project_id}/sessions/{session_id}" - - STATE_DIR.mkdir(parents=True, exist_ok=True) - tmp = LAST_TRACE_FILE.with_suffix(".tmp") - tmp.write_text(json.dumps(data, indent=2), encoding="utf-8") - os.replace(tmp, LAST_TRACE_FILE) - except Exception as e: - debug(f"save_last_trace failed: {e}") - - -def read_last_trace(expected_session_id: Optional[str] = None) -> Optional[Dict[str, Any]]: - """Read the last trace file, optionally validating the session_id matches.""" - if not LAST_TRACE_FILE.exists(): - return None - try: - data = json.loads(LAST_TRACE_FILE.read_text(encoding="utf-8")) - if not isinstance(data, dict) or not data.get("trace_id"): - return None - if expected_session_id and data.get("session_id") != expected_session_id: - return None - return data - except Exception: - return None - - -def _parse_iso_utc(value: Any) -> Optional[datetime]: - if not isinstance(value, str) or not value: - return None - try: - ts = datetime.fromisoformat(value) - if ts.tzinfo is None: - ts = ts.replace(tzinfo=timezone.utc) - return ts.astimezone(timezone.utc) - except Exception: - return None - - -def read_recent_trace( - path: Path, - max_age_hours: float, - expected_session_id: Optional[str] = None, -) -> Optional[Dict[str, Any]]: - """Read a trace file if present and recent enough.""" - if not path.exists(): - return None - try: - data = json.loads(path.read_text(encoding="utf-8")) - except Exception: - return None - - if not isinstance(data, dict): - return None - - trace_id = data.get("trace_id") - trace_url = data.get("trace_url") - if not isinstance(trace_id, str) or not trace_id: - return None - if not isinstance(trace_url, str) or not trace_url: - return None - - if expected_session_id and data.get("session_id") != expected_session_id: - return None - - updated_at = _parse_iso_utc(data.get("updated_at")) - if updated_at is None: - return None - - age_hours = (datetime.now(timezone.utc) - updated_at).total_seconds() / 3600 - if age_hours > max_age_hours: - return None - - return data - - -# --------------- Trace manifest --------------- -def write_trace_manifest( - repo_root: Path, - session_id: str, - trace_id: str, - host: str, - git_metadata: Optional[Dict[str, Any]] = None, -) -> None: - try: - safe_sid = re.sub(r"[^A-Za-z0-9._-]", "_", session_id) - manifest_dir = repo_root / ".langfuse" / "traces" - manifest_path = manifest_dir / f"{safe_sid}.json" - - existing: Dict[str, Any] = {} - if manifest_path.exists(): - try: - existing = json.loads(manifest_path.read_text(encoding="utf-8")) - except Exception: - pass - - trace_url = f"{host}/trace/{trace_id}" - project_id = os.environ.get("LANGFUSE_PROJECT_ID", "") - session_url = f"{host}/project/{project_id}/sessions/{session_id}" if project_id else "" - - commit_sha = (git_metadata or {}).get("git_commit_sha", "") - remote_url = (git_metadata or {}).get("git_remote_url") - commit_url = (git_metadata or {}).get("git_commit_url") - - git_block = existing.get("git", {}) if isinstance(existing.get("git"), dict) else {} - if commit_sha: - git_block["commit_sha"] = commit_sha - if commit_url: - git_block["commit_url"] = commit_url - if remote_url: - git_block["remote_url"] = remote_url - branch = run_git(repo_root, ["rev-parse", "--abbrev-ref", "HEAD"]) - if branch: - git_block["branch"] = branch - msg = run_git(repo_root, ["log", "-1", "--pretty=%s"]) - if msg: - git_block["commit_message"] = msg - - langfuse_block: Dict[str, Any] = { - "trace_id": trace_id, - "trace_url": trace_url, - "session_id": session_id, - "host": host.rstrip("/"), - } - if session_url: - langfuse_block["session_url"] = session_url - - manifest = { - "schema_version": 1, - "langfuse": langfuse_block, - "git": git_block, - "created_at": existing.get("created_at", datetime.now(timezone.utc).isoformat()), - "updated_at": datetime.now(timezone.utc).isoformat(), - } - - atomic_write_json(manifest_path, manifest) - - current_session_data: Dict[str, Any] = { - "session_id": session_id, - "trace_id": trace_id, - "trace_url": trace_url, - "host": host.rstrip("/"), - "updated_at": datetime.now(timezone.utc).isoformat(), - } - if session_url: - current_session_data["session_url"] = session_url - - current_session_path = repo_root / ".langfuse" / "current-session.json" - atomic_write_json(current_session_path, current_session_data) - debug(f"Wrote trace manifest to {manifest_path}") - except Exception as exc: - debug(f"write_trace_manifest failed: {exc}") diff --git a/claude/lightcone/skills/lc-feedback/SKILL.md b/claude/lightcone/skills/lc-feedback/SKILL.md index 824f1857..7cf6f09c 100644 --- a/claude/lightcone/skills/lc-feedback/SKILL.md +++ b/claude/lightcone/skills/lc-feedback/SKILL.md @@ -35,7 +35,7 @@ The user should have provided a description inline (e.g., `/lc-feedback pipeline Triage the repo from context: - **ASTRA** — `astra` CLI, schema validation, YAML parsing, helpers -- **lightcone-cli** — `lc` CLI, recipe execution, container builds, scaffolding, skills, telemetry hooks +- **lightcone-cli** — `lc` CLI, recipe execution, container builds, scaffolding, skills Default to **lightcone-cli** if ambiguous. diff --git a/docs/api/cli.md b/docs/api/cli.md index ee6f0f69..d0634b3d 100644 --- a/docs/api/cli.md +++ b/docs/api/cli.md @@ -87,8 +87,6 @@ new projects look like. `_install_claude_plugin(project_dir, plugin_source, permissions)` copies the bundled plugin into `project_dir/.claude/` (`skills`, `agents`, -`hooks`, `scripts`, `guides`, `templates`) and writes -`.claude/settings.json` from the chosen permission tier. Existing -subdirectories are removed before copying. **No telemetry env vars are -written today** — the older `.claude/settings.local.json` file with -Langfuse credentials is no longer generated by `lc init`. +`scripts`, `guides`, `templates`) and writes `.claude/settings.json` +from the chosen permission tier. Existing subdirectories are removed +before copying. diff --git a/docs/architecture.md b/docs/architecture.md index c6d8ad11..6122f3d1 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -259,9 +259,8 @@ paths, and `git push`. ### Hooks The plugin registers Claude Code hooks for venv activation, -auto-validation on save, integrity-aware "did you forget `lc run`?" -warnings, and Langfuse telemetry. See -[Telemetry](telemetry/index.md) and [`hooks` source](https://github.com/LightconeResearch/lightcone-cli/tree/main/claude/lightcone/hooks). +auto-validation on save, and integrity-aware "did you forget `lc run`?" +warnings. --- @@ -275,7 +274,6 @@ warnings, and Langfuse telemetry. See | `.lightcone/lightcone.yaml` | Project | Tiny scratchpad — currently writes only `target: local`. Not consumed by today's code. | | `~/.lightcone/config.yaml` | User | `container.runtime` (and historically `extraction_model`). | | `.claude/settings.json` | Project | Claude Code permissions. | -| `.claude/settings.local.json` | Project | Telemetry env vars. Not committed. | The `dagster.yaml` and `~/.lightcone/targets/*.yaml` files referenced in older docs are no longer used — historical residue. diff --git a/docs/index.md b/docs/index.md index 27f9e001..56144227 100644 --- a/docs/index.md +++ b/docs/index.md @@ -26,7 +26,7 @@ nav. | Layer | Package | Role | |-------|---------|------| | **ASTRA** | `astra-tools` | Pure specification: schema, validation, prior insights & findings, evidence verification helpers, the `astra` CLI. | -| **lightcone-cli** | `lightcone-cli` | Agentic layer: project scaffolding, Snakemake-based execution, Dask cluster management, container builds, Claude Code skills, telemetry. | +| **lightcone-cli** | `lightcone-cli` | Agentic layer: project scaffolding, Snakemake-based execution, Dask cluster management, container builds, Claude Code skills. | `lightcone-cli` depends on `astra-tools`. The `astra` CLI handles the spec itself (validation, paper management, evidence verification); the @@ -60,7 +60,6 @@ claude/lightcone/ # Claude Code plugin (force-included into the wh ├── agents/ # lc-extractor (literature subagent) ├── guides/ # astra-reference, lightcone-cli-reference, ui-brand ├── templates/ # project CLAUDE.md template -├── hooks/ # Langfuse telemetry hooks (Python) └── scripts/ # session hooks (bash): venv, validate-on-save, … tests/ # pytest, mirrors src/ @@ -131,5 +130,4 @@ just docs-serve # live docs preview - [CLI Reference](cli/index.md) — every command currently shipped - [Python API](api/index.md) — the engine modules - [Skills](skills/index.md) — what each `/lc-*` skill is supposed to do -- [Telemetry](telemetry/index.md) — Langfuse tracing hooks - [Contributing](contributing/setup.md) — getting the dev loop running diff --git a/docs/skills/index.md b/docs/skills/index.md index 17b6f114..5100013b 100644 --- a/docs/skills/index.md +++ b/docs/skills/index.md @@ -52,7 +52,6 @@ claude/lightcone/ ├── agents/lc-extractor.md # subagent definition ├── guides/ # reference docs loaded by skills ├── templates/CLAUDE.md # the project CLAUDE.md template -├── hooks/*.py # Langfuse telemetry hooks └── scripts/*.sh # session lifecycle hooks ``` diff --git a/docs/skills/lc-feedback.md b/docs/skills/lc-feedback.md index 7fe50b7b..3db3e3de 100644 --- a/docs/skills/lc-feedback.md +++ b/docs/skills/lc-feedback.md @@ -27,7 +27,7 @@ create`. - **ASTRA** — the `astra` CLI, schema validation, YAML parsing, helpers. - **lightcone-cli** — the `lc` CLI, recipes, container builds, - scaffolding, skills, telemetry, the engine layer. + scaffolding, skills, the engine layer. - Default to `lightcone-cli` if ambiguous. 4. **Collect environment** silently: ```bash @@ -78,6 +78,6 @@ Sections that don't apply are dropped. ## Notes for the maintainer who's looking The triage hint in the prompt currently says "lightcone-cli — `lc` CLI, -**Dagster execution**, recipes, container builds, scaffolding, skills, -telemetry." That's stale — the Dagster mention should be replaced -with "Snakemake/Dask execution." See the `SKILL.md` source. +**Dagster execution**, recipes, container builds, scaffolding, skills." +That's stale — the Dagster mention should be replaced with +"Snakemake/Dask execution." See the `SKILL.md` source. diff --git a/docs/telemetry/hooks.md b/docs/telemetry/hooks.md deleted file mode 100644 index 12e652f2..00000000 --- a/docs/telemetry/hooks.md +++ /dev/null @@ -1,118 +0,0 @@ -# Hooks Architecture - -The telemetry hooks are shipped as part of the plugin and copied into -`.claude/hooks/` by `lc init`. They are dormant unless -`TRACE_TO_LANGFUSE=true` and the Langfuse credentials are present in -the environment — see [Telemetry overview](index.md). Today's -`lc init` does **not** seed `.claude/settings.local.json` automatically, -so a fresh project ships with the hooks installed but disabled until -you fill in the env vars. - -The system is five Python scripts in `claude/lightcone/hooks/`: - -``` -hooks/ -├── langfuse_session_init_hook.py # PreToolUse: create trace ID -├── langfuse_hook.py # Stop / SessionEnd: emit full session -├── langfuse_git_commit_hook.py # PostToolUse(Bash): attach git metadata -├── langfuse_prepare_commit_msg.py # git prepare-commit-msg hook -└── langfuse_utils.py # shared utilities -``` - -These five files are copied verbatim from -[langfuse-cli](https://github.com/langfuse/langfuse-cli) (MIT). See the -NOTICE file at the repo root. - ---- - -## `langfuse_session_init_hook.py` (PreToolUse) - -Fired before the very first tool use in a session. Creates a deterministic Langfuse trace ID from the session ID using SHA-256: - -```python -trace_id = sha256(session_id.encode()).hexdigest()[:32] -``` - -The trace ID is saved to `.langfuse/last_trace.json` so the main hook can reference it. This ensures the pre-session "empty" trace and the post-session full trace share an ID and appear as one entry in Langfuse. - ---- - -## `langfuse_hook.py` (Stop / SessionEnd) - -The main emission hook. Reads the Claude Code transcript file incrementally (using a byte-offset cursor stored in `.langfuse/state.json`) and emits new turns to Langfuse. - -### Processing pipeline - -``` -transcript.jsonl (JSONL) - ↓ -read_new_jsonl() # incremental read from last byte offset - ↓ -build_turns() # group messages into (user_msg, assistant_msgs, tool_results) - ↓ -emit_turn() # create Langfuse trace + generation span per turn -``` - -### Turn assembly (`build_turns`) - -The Claude Code transcript is a flat JSONL stream. `build_turns()` groups it into turns: - -- A **turn** starts with a user message (not a tool_result). -- All subsequent assistant messages (and their interleaved tool results) belong to that turn. -- The next non-tool-result user message starts a new turn. - -Multiple assistant messages with the same `message_id` are deduplicated — only the latest version is kept (handles streaming updates). - -### Langfuse data model - -Each turn emits: - -``` -trace (session_id) - └── generation span - ├── input: ChatML messages (user + assistant) - ├── output: ChatML assistant message with tool_calls - ├── model: claude-* model name - ├── metadata: session_id, turn_number, transcript_path, git_metadata, ... - └── tool_calls[]: {name, arguments, output, is_error} -``` - ---- - -## `langfuse_git_commit_hook.py` (PostToolUse Bash) - -Fired after every Bash tool use. Checks if the bash command was a git commit: - -1. Looks for patterns like `git commit`, `git commit -m "..."`, etc. -2. If a commit happened, reads the git log to get the commit SHA and author. -3. Builds a GitHub URL from the remote origin. -4. Saves this metadata to `.langfuse/git_trace.json`. -5. The main hook picks up this metadata when emitting the next turn. - ---- - -## `langfuse_utils.py` - -Shared utilities used by all hooks: - -- **Logging**: `debug()`, `info()`, `warn()`, `error()` → `.langfuse/hook.log` -- **Environment**: `tracing_enabled()`, `get_langfuse_credentials()` -- **Hook payload parsing**: `read_hook_payload()`, `extract_session_id()`, `extract_transcript_path()` -- **Git helpers**: `get_git_metadata()`, `build_github_commit_url()`, `resolve_repo_root()` -- **User identity**: `get_claude_user_email()` (reads `~/.claude.json`) -- **File I/O**: `atomic_write_json()`, `save_last_trace()`, `read_last_trace()` -- **Trace manifest**: `write_trace_manifest()` — written to `.langfuse/traces.json` - ---- - -## State files - -All state is stored in the project's `.langfuse/` directory (gitignored): - -| File | Contents | -|------|----------| -| `last_trace.json` | Current session's trace ID (from init hook) | -| `state.json` | Per-session byte offsets and turn counts for incremental reads | -| `git_trace.json` | Latest git commit metadata to attach to the next span | -| `traces.json` | Manifest of all traces emitted in this project | -| `hook.log` | Debug log for hook execution | diff --git a/docs/telemetry/index.md b/docs/telemetry/index.md deleted file mode 100644 index 25ae0b91..00000000 --- a/docs/telemetry/index.md +++ /dev/null @@ -1,65 +0,0 @@ -# Telemetry (Langfuse) - -The plugin ships Langfuse hooks that trace Claude Code sessions to -[Langfuse](https://langfuse.com/). The hooks are present in every -project that ran `lc init` (under `.claude/hooks/`), but **they don't -do anything until you give them credentials**: today's `lc init` does -not wire `.claude/settings.local.json` automatically. - -## What gets traced (when enabled) - -Each Claude Code session generates: - -- One **trace per turn** (user message + all assistant responses in - that turn). -- **Tool calls** with input arguments and output results (truncated to - 2000 chars). -- **Git metadata** attached to spans after commits: commit SHA, GitHub - URL, branch. -- **Session metadata**: Claude Code version, project name, transcript - path. -- **User identity**: Claude user email (from `~/.claude.json`). - -## Hooks at a glance - -| Hook event | Script | When | -|------------|--------|------| -| `PreToolUse` | `langfuse_session_init_hook.py` | Before the first tool call in a session — creates the trace id. | -| `PostToolUse` (Bash) | `langfuse_git_commit_hook.py` | After any bash command — attaches commit metadata if one happened. | -| `Stop` / `SessionEnd` | `langfuse_hook.py` | When Claude stops responding or the session ends — flushes the full trace. | - -`langfuse_utils.py` carries the shared state (`STATE_DIR`, `LOCK_FILE`, -`tracing_enabled()`, etc.) and is imported by the other hooks. - -See [Hooks Architecture](hooks.md) for the wiring detail. - -## Configuration - -Telemetry hooks read credentials from environment variables: - -| Var | Purpose | -|-----|---------| -| `TRACE_TO_LANGFUSE` | Enable / disable. Hooks no-op when this is unset or `false`. | -| `LANGFUSE_PUBLIC_KEY` | Langfuse project public key. | -| `LANGFUSE_SECRET_KEY` | Langfuse project secret key (or `relay` for the Cloudflare relay). | -| `LANGFUSE_HOST` | Langfuse endpoint. For Lightcone's relay: `https://telemetry.lightconeresearch.workers.dev`. | - -Set these in `.claude/settings.local.json` (per project, gitignored): - -```json -{ - "env": { - "TRACE_TO_LANGFUSE": "true", - "LANGFUSE_PUBLIC_KEY": "...", - "LANGFUSE_SECRET_KEY": "relay", - "LANGFUSE_HOST": "https://telemetry.lightconeresearch.workers.dev" - } -} -``` - -`settings.local.json` is in the default `.gitignore` written by -`lc init`. - -## Disabling - -See [Opt Out](opt-out.md). diff --git a/docs/telemetry/lifecycle.md b/docs/telemetry/lifecycle.md deleted file mode 100644 index c936c878..00000000 --- a/docs/telemetry/lifecycle.md +++ /dev/null @@ -1,71 +0,0 @@ -# Session Lifecycle - -How telemetry events flow from a Claude Code session to Langfuse. -Telemetry is dormant unless the credentials are wired (see -[Telemetry overview](index.md)). - -## Timeline - -``` -Claude Code session opens - │ - ▼ -[SessionStart] - activate-venv.sh → activates .venv/ - session-start.sh → shows ASTRA project summary; detects - interrupted lc-build loops - - │ - ▼ (first tool use) -[PreToolUse] - langfuse_session_init_hook.py - → trace_id = sha256(session_id)[:32] - → save to .langfuse/last_trace.json - - │ - ▼ (for each tool use) -[PostToolUse] - validate-on-save.sh → runs astra validate on Write/Edit of astra.yaml or universes/* - check-lc-run.sh → warns if python is run directly - langfuse_git_commit_hook.py → on bash git commits: - extract commit SHA + GitHub URL, - write .langfuse/git_trace.json - - │ - ▼ (session ends or Claude stops) -[Stop / SessionEnd] - langfuse_hook.py - → read transcript.jsonl from last byte offset - → build_turns(): group messages into turns - → for each new turn: - emit_turn() → Langfuse trace + generation span - → update .langfuse/state.json -``` - -## Incremental processing - -`langfuse_hook.py` runs at every `Stop` event (not only `SessionEnd`). -It reads only new content since the last call using a stored byte -offset. Practical consequences: - -- Turns are emitted incrementally throughout the session. -- A crash or timeout does not lose already-emitted turns. -- Short sessions with one turn emit once; long sessions with many turns - emit progressively. - -## Trace linking - -The session-init hook generates the trace id deterministically from the -Claude Code session id, and writes it to `.langfuse/last_trace.json`. -The main hook reuses that id for the first turn so the pre-session -"empty" trace entry and the first turn's trace share an id — Langfuse -displays them as one entry. - -## Status of the project-side hooks - -The two non-telemetry session hooks (`session-start.sh` and -`check-lc-run.sh`) currently have outdated branches that look for -status terms (`pending`, `materialized`, `no_recipe`, …) that today's -`lc status` no longer emits. The hooks degrade silently in that case — -no false positives, just dimmer crash recovery hints. See the -[maintainer summary](../index.md) for the fix-list. diff --git a/docs/telemetry/opt-out.md b/docs/telemetry/opt-out.md deleted file mode 100644 index 51ea169a..00000000 --- a/docs/telemetry/opt-out.md +++ /dev/null @@ -1,49 +0,0 @@ -# Disabling Telemetry - -Telemetry hooks are present but **disabled by default in today's -`lc init`** — the bundled plugin no longer auto-writes -`.claude/settings.local.json`. The hooks no-op until they see -`TRACE_TO_LANGFUSE=true` in the environment. - -If you previously configured telemetry and want to turn it off, edit -`.claude/settings.local.json`: - -```json -{ - "env": { - "TRACE_TO_LANGFUSE": "false" - } -} -``` - -`tracing_enabled()` in `langfuse_utils.py` checks that variable; when -it's unset or `false`, every hook exits immediately without contacting -Langfuse. - -## Removing the hooks entirely - -If you want zero telemetry code in your project at all: - -```bash -rm -rf .claude/hooks # the Python hooks -# Remove any hook entries from .claude/settings.json (PreToolUse / PostToolUse / Stop / SessionEnd) -``` - -The plugin install in `lc init` will recreate them on the next init, -but `lc init` won't run inside a project that already has `astra.yaml`, -so existing projects are safe. - -## What is *not* collected (even when enabled) - -- File contents (only tool-call metadata and truncated outputs). -- Actual script outputs beyond the last 2000 characters. -- Passwords, tokens, or credential values. -- Any data outside the project directory. - -## Transparency - -The full telemetry implementation is in `claude/lightcone/hooks/`. All -hooks are plain Python scripts installed in each project's -`.claude/hooks/` directory — they can be inspected, modified, or -deleted per-project. They are vendored from the upstream -[langfuse-cli](https://github.com/langfuse/langfuse-cli) under MIT. diff --git a/pyproject.toml b/pyproject.toml index 75563a35..4f5f1212 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,6 @@ dependencies = [ "snakemake-interface-common>=1.14", "dask>=2024.1", "distributed>=2024.1", - "langfuse>=2.0", "rocrate>=0.11", ] diff --git a/src/lightcone/cli/commands.py b/src/lightcone/cli/commands.py index 03d10871..79471ff1 100644 --- a/src/lightcone/cli/commands.py +++ b/src/lightcone/cli/commands.py @@ -311,7 +311,7 @@ def _install_claude_plugin( """ claude_dir = project_dir / ".claude" claude_dir.mkdir(exist_ok=True) - for sub in ("skills", "agents", "hooks", "scripts", "guides", "templates"): + for sub in ("skills", "agents", "scripts", "guides", "templates"): src = plugin_source / sub if src.exists(): dest = claude_dir / sub diff --git a/src/lightcone/eval/sandbox.py b/src/lightcone/eval/sandbox.py index 22a5c8ca..6be41097 100644 --- a/src/lightcone/eval/sandbox.py +++ b/src/lightcone/eval/sandbox.py @@ -84,7 +84,7 @@ def create(self) -> None: "astra-tools astra-spec" " jinja2 jsonschema" " snakemake snakemake-interface-executor-plugins" - " snakemake-interface-common dask distributed langfuse" + " snakemake-interface-common dask distributed" ) image = ( Image.debian_slim("3.12") @@ -120,19 +120,16 @@ def create(self) -> None: "trial": self.trial_id, } - # Merge env vars: host ANTHROPIC_API_KEY + Langfuse creds + eval metadata + # Merge env vars: host ANTHROPIC_API_KEY + eval metadata sandbox_env = { "LIGHTCONE_EVAL": "true", "LIGHTCONE_EVAL_TRIAL_ID": self.trial_id, "LIGHTCONE_EVAL_TASK_ID": self.task_id, } - # Pass through host API keys, OAuth token, and Langfuse config + # Pass through host API keys and OAuth token. for key in ( "ANTHROPIC_API_KEY", "CLAUDE_CODE_OAUTH_TOKEN", - "LANGFUSE_PUBLIC_KEY", - "LANGFUSE_SECRET_KEY", - "LANGFUSE_HOST", ): val = os.environ.get(key) if val: diff --git a/zensical.toml b/zensical.toml index d3cc8b4c..f38bc093 100644 --- a/zensical.toml +++ b/zensical.toml @@ -53,12 +53,6 @@ nav = [ {"lc-feedback" = "skills/lc-feedback.md"}, {"Authoring Skills" = "skills/authoring.md"}, ]}, - {"Telemetry (Langfuse)" = [ - {"Overview" = "telemetry/index.md"}, - {"Hooks Architecture" = "telemetry/hooks.md"}, - {"Session Lifecycle" = "telemetry/lifecycle.md"}, - {"Disabling Telemetry" = "telemetry/opt-out.md"}, - ]}, {"Contributing" = [ {"Development Setup" = "contributing/setup.md"}, {"Testing" = "contributing/testing.md"},