diff --git a/archaeology/analysis_runner.py b/archaeology/analysis_runner.py index 33e3b56..8da8d17 100644 --- a/archaeology/analysis_runner.py +++ b/archaeology/analysis_runner.py @@ -293,10 +293,12 @@ def run_all(self, vectors: list[str] | None = None) -> dict[str, str]: if unknown: raise ValueError(f"Unknown analysis vector(s): {', '.join(unknown)}") self.deliverables_dir.mkdir(parents=True, exist_ok=True) + analysis_dir = self.deliverables_dir / "analysis" + analysis_dir.mkdir(parents=True, exist_ok=True) for vector_name in target: runner_func = runners[vector_name] try: - output_path = self.deliverables_dir / f"analysis-{vector_name}.json" + output_path = analysis_dir / f"analysis-{vector_name}.json" result = runner_func() atomic_write(output_path, json.dumps(result, indent=2, ensure_ascii=False) + "\n") results[vector_name] = str(output_path) diff --git a/archaeology/api.py b/archaeology/api.py new file mode 100644 index 0000000..70e61f8 --- /dev/null +++ b/archaeology/api.py @@ -0,0 +1,689 @@ +"""JSON API for dev-archaeology strategic insights. + +Lightweight API that serves strategic analysis data to The-Factory +and other consumers. Stdlib-only — no Flask/FastAPI dependency. + +Endpoints: + GET /api/health — System status + GET /api/projects — All projects with summary metrics + GET /api/insights/ — All strategic analyses for a project + GET /api/swot/ — SWOT analysis + GET /api/wardley/ — Wardley Map + GET /api/value-chain/ — Porter's Value Chain + GET /api/bridge — Full bridge file for Factory consumption +""" + +import json +import re +import sqlite3 +from datetime import datetime +from http.server import BaseHTTPRequestHandler +from pathlib import Path +from urllib.parse import urlparse + +ROOT = Path(__file__).resolve().parents[1] # dev-archaeology/ +PROJECTS_DIR = ROOT / "projects" +BRIDGE_PATH = ROOT / "global" / "data" / "factory-bridge.json" + +__version__ = "1.0.0" + + +def _validate_project_name(name): + """Reject path traversal and invalid characters in project names.""" + if not name or not re.match(r'^[a-zA-Z0-9._-]+$', name): + return False + # Double-check no path traversal + resolved = (PROJECTS_DIR / name).resolve() + try: + resolved.relative_to(PROJECTS_DIR.resolve()) + except ValueError: + return False + return True + + +def _json_response(handler, data, status=200): + handler.send_response(status) + handler.send_header("Content-Type", "application/json") + handler.send_header("Access-Control-Allow-Origin", "*") + payload = json.dumps(data, indent=2, default=str).encode() + handler.send_header("Content-Length", str(len(payload))) + handler.end_headers() + handler.wfile.write(payload) + + +def _error_response(handler, message, status=404): + _json_response(handler, {"error": message, "status": status}, status) + + +def _load_json(path): + if not path.exists(): + return None + try: + return json.loads(path.read_text(encoding="utf-8")) + except (json.JSONDecodeError, UnicodeDecodeError, OSError): + return None + + +def _discover_projects(): + """Return list of project directories with data.""" + projects = [] + if not PROJECTS_DIR.exists(): + return projects + for d in sorted(PROJECTS_DIR.iterdir()): + if not d.is_dir(): + continue + eras_path = d / "data" / "commit-eras.json" + if eras_path.exists(): + projects.append(d) + return projects + + +def _project_metrics(project_dir): + """Extract core metrics for a project.""" + name = project_dir.name + eras_data = _load_json(project_dir / "data" / "commit-eras.json") or {} + metrics = _load_json(project_dir / "deliverables" / "canonical-metrics.json") or {} + config = _load_json(project_dir / "project.json") or {} + + return { + "name": name, + "description": config.get("description", ""), + "total_commits": eras_data.get("total_commits", 0), + "era_count": len(eras_data.get("eras", [])), + "active_days": metrics.get("active_days", 0), + "span_days": metrics.get("span_days", 0), + "contributors": len(eras_data.get("contributors", [])), + "commit_types": eras_data.get("commit_types", {}), + "lifespan": eras_data.get("lifespan", ""), + "repo_url": config.get("repo_url", ""), + } + + +def _parse_md_sections(md_path): + """Parse a markdown file into {heading: [lines]} dict.""" + if not md_path.exists(): + return {} + text = md_path.read_text(encoding="utf-8") + sections = {} + current_heading = "header" + current_lines = [] + + for line in text.splitlines(): + if line.startswith("## "): + if current_lines: + sections[current_heading] = "\n".join(current_lines) + current_heading = line[3:].strip() + current_lines = [] + else: + current_lines.append(line) + if current_lines: + sections[current_heading] = "\n".join(current_lines) + return sections + + +def _parse_list_items(text): + """Extract numbered or bulleted list items from text.""" + items = [] + for line in text.splitlines(): + line = line.strip() + if re.match(r"^(\d+\.|[-*])\s+", line): + clean = re.sub(r"^(\d+\.|[-*])\s+", "", line) + clean = re.sub(r"\*\*([^*]+)\*\*", r"\1", clean) + if clean: + items.append(clean) + return items + + +def _extract_score(text, label): + """Extract a score like 'Margin: 58/100' from text.""" + pattern = rf"{re.escape(label)}[^0-9]*(\d+)/100" + m = re.search(pattern, text, re.IGNORECASE) + return int(m.group(1)) if m else None + + +def _parse_swot(project_dir): + """Parse SWOT-ANALYSIS.md into structured data.""" + md_path = project_dir / "deliverables" / "strategy" / "SWOT-ANALYSIS.md" + sections = _parse_md_sections(md_path) + if not sections: + return None + + result = {"project": project_dir.name} + for key in ("Strengths", "Weaknesses", "Opportunities", "Threats"): + section = sections.get(key, "") + result[key.lower()] = _parse_list_items(section) + + # Extract matrix counts + matrix_text = sections.get("header", "") + for quadrant in ("strengths", "weaknesses", "opportunities", "threats"): + pattern = rf"{quadrant}.*?(\d+)\s+found" + m = re.search(pattern, matrix_text, re.IGNORECASE) + if m: + result.setdefault(f"{quadrangle if quadrant == 'strengths' else quadrant}_count", int(m.group(1))) + + return result + + +def _parse_wardley(project_dir): + """Parse WARDLEY-MAP.md into structured data.""" + md_path = project_dir / "deliverables" / "strategy" / "WARDLEY-MAP.md" + sections = _parse_md_sections(md_path) + if not sections: + return None + + overview = sections.get("Overview", "") + result = { + "project": project_dir.name, + "maturity": None, + "avg_evolution": None, + } + + m = re.search(r"Maturity:\s+(\w+)", overview) + if m: + result["maturity"] = m.group(1) + m = re.search(r"avg evolution:\s+([\d.]+)", overview) + if m: + result["avg_evolution"] = float(m.group(1)) + + # Parse component table + table_text = sections.get("Component Evolution Table", "") + components = [] + for line in table_text.splitlines(): + if "|" in line and not line.strip().startswith("|-") and not line.strip().startswith("| Component"): + cells = [c.strip() for c in line.split("|") if c.strip()] + if len(cells) >= 3: + components.append({ + "component": cells[0], + "stage": cells[1], + "evidence": cells[2] if len(cells) > 2 else "", + }) + result["components"] = components + + # Parse recommendations + recs_text = sections.get("Movement Recommendations", "") + result["recommendations"] = _parse_list_items(recs_text) + + return result + + +def _parse_value_chain(project_dir): + """Parse VALUE-CHAIN-ANALYSIS.md into structured data.""" + md_path = project_dir / "deliverables" / "strategy" / "VALUE-CHAIN-ANALYSIS.md" + sections = _parse_md_sections(md_path) + if not sections: + return None + + summary = sections.get("Value Chain Summary", sections.get("header", "")) + result = { + "project": project_dir.name, + "margin_score": _extract_score(summary, "MARGIN"), + "primary_activities": {}, + "support_activities": {}, + } + + # Primary activities with scores + primary = [ + ("Inbound Logistics", "inbound_logistics"), + ("Operations", "operations"), + ("Outbound Logistics", "outbound_logistics"), + ("Marketing & Sales", "marketing_sales"), + ("Service", "service"), + ] + for heading, key in primary: + section_text = sections.get(heading, "") + result["primary_activities"][key] = { + "score": _extract_score(section_text, heading.split()[0]), + "details": _parse_list_items(section_text)[:5], + } + + # Support activities + support = [ + ("Infrastructure", "infrastructure"), + ("Technology Development", "technology"), + ("Human Resource Management", "hr"), + ("Procurement", "procurement"), + ] + for heading, key in support: + section_text = sections.get(heading, "") + result["support_activities"][key] = { + "score": _extract_score(section_text, heading.split()[0]), + "details": _parse_list_items(section_text)[:3], + } + + # Recommendations + recs = sections.get("Recommendations", "") + result["recommendations"] = _parse_list_items(recs) + + return result + + +def _parse_bcg(project_dir): + """Parse BCG-MATRIX.md into structured data.""" + md_path = project_dir / "deliverables" / "strategy" / "BCG-MATRIX.md" + sections = _parse_md_sections(md_path) + if not sections: + return None + + overview = sections.get("Overview", "") + result = {"project": project_dir.name} + + m = re.search(r"Total commits\*\*: (\d+)", overview) + if m: + result["total_commits"] = int(m.group(1)) + m = re.search(r"Velocity trend\*\*: (\w+)", overview) + if m: + result["velocity_trend"] = m.group(1) + + components = [] + table_text = sections.get("Component Classification", "") + for line in table_text.splitlines(): + if "|" in line and not line.strip().startswith("|-") and not line.strip().startswith("| Component"): + cells = [c.strip() for c in line.split("|") if c.strip()] + if len(cells) >= 4: + components.append({ + "component": cells[0], + "commits": cells[1], + "share": cells[2], + "quadrant": cells[3], + }) + result["components"] = components + + for quadrant_key in ("Stars", "Cash Cows", "Question Marks", "Dogs"): + section = sections.get(f"{quadrant_key}", "") + count_match = re.search(rf"\((\d+)\)", sections.get("Quadrant Analysis", "")) + result[quadrant_key.lower().replace(" ", "_")] = _parse_list_items(section) + + result["recommendations"] = _parse_list_items(sections.get("Strategic Recommendations", "")) + return result + + +def _parse_ansoff(project_dir): + """Parse ANSOFF-MATRIX.md into structured data.""" + md_path = project_dir / "deliverables" / "strategy" / "ANSOFF-MATRIX.md" + sections = _parse_md_sections(md_path) + if not sections: + return None + + overview = sections.get("Overview", "") + result = {"project": project_dir.name} + + m = re.search(r"Primary strategy\*\*: ([\w\s]+)", overview) + if m: + result["primary_strategy"] = m.group(1).strip() + m = re.search(r"Secondary strategy\*\*: ([\w\s]+)", overview) + if m: + result["secondary_strategy"] = m.group(1).strip() + + scores = {} + table_text = sections.get("Quadrant Scores", "") + for line in table_text.splitlines(): + if "|" in line and "Strategy" not in line and not line.strip().startswith("|-"): + cells = [c.strip() for c in line.split("|") if c.strip()] + if len(cells) >= 2: + strategy = cells[0] + score_match = re.match(r"(\d+)/100", cells[1]) + if score_match: + scores[strategy] = int(score_match.group(1)) + result["scores"] = scores + + result["recommendations"] = _parse_list_items(sections.get("Recommendations", "")) + return result + + +def _parse_blue_ocean(project_dir): + """Parse BLUE-OCEAN.md into structured data.""" + md_path = project_dir / "deliverables" / "strategy" / "BLUE-OCEAN.md" + sections = _parse_md_sections(md_path) + if not sections: + return None + + overview = sections.get("Overview", "") + result = {"project": project_dir.name} + + m = re.search(r"Average value score\*\*: ([\d.]+)/10", overview) + if m: + result["avg_value_score"] = float(m.group(1)) + m = re.search(r"Critical gaps\*\*: (\d+)", overview) + if m: + result["critical_gaps"] = int(m.group(1)) + + factors = [] + table_text = sections.get("Strategy Canvas", "") + for line in table_text.splitlines(): + if "|" in line and "Value Factor" not in line and not line.strip().startswith("|-"): + cells = [c.strip() for c in line.split("|") if c.strip()] + if len(cells) >= 2: + score_match = re.match(r"(\d+)/10", cells[1]) + if score_match: + factors.append({"factor": cells[0], "score": int(score_match.group(1))}) + result["factors"] = factors + + for action in ("Eliminate", "Reduce", "Raise", "Create"): + section_key = f"{action} — What to {'stop doing' if action == 'Eliminate' else 'do less of' if action == 'Reduce' else 'do more of' if action == 'Raise' else 'build that doesn'}" + section = sections.get(section_key, "") + if not section: + section = sections.get(action, "") + result[action.lower()] = _parse_list_items(section) + + result["recommendations"] = _parse_list_items(sections.get("Recommendations", "")) + return result + + +def _get_pipeline_status(project_dir): + """Get latest pipeline run status from archaeology.db.""" + db_path = project_dir / "data" / "archaeology.db" + if not db_path.exists(): + return None + try: + with sqlite3.connect(str(db_path)) as conn: + conn.row_factory = sqlite3.Row + row = conn.execute( + "SELECT run_timestamp, status FROM pipeline_runs " + "ORDER BY run_timestamp DESC LIMIT 1" + ).fetchone() + if row: + return {"last_run": row["run_timestamp"], "status": row["status"]} + except sqlite3.OperationalError: + pass + return None + + +def _compute_health_score(metrics, swot, wardley, value_chain): + """Compute a 0-100 health score from available data.""" + score = 50 # baseline + + # Positive signals + ct = metrics.get("commit_types", {}) + if ct.get("test", 0) > 0: + score += 10 + if ct.get("ci", 0) > 0: + score += 10 + if ct.get("docs", 0) > 5: + score += 5 + if metrics.get("era_count", 0) >= 3: + score += 5 + if metrics.get("active_days", 0) > 10: + score += 5 + + # Negative signals + if ct.get("test", 0) == 0: + score -= 10 + if ct.get("ci", 0) == 0: + score -= 10 + + # SWOT balance (more strengths than weaknesses = healthy) + if swot: + s = len(swot.get("strengths", [])) + w = len(swot.get("weaknesses", [])) + if s > w: + score += 5 + elif w > s: + score -= 5 + + # Value chain margin + if value_chain and value_chain.get("margin_score"): + margin = value_chain["margin_score"] + if margin > 60: + score += 5 + elif margin < 30: + score -= 5 + + return max(0, min(100, score)) + + +# ── Endpoint handlers ────────────────────────────────────────── + + +def handle_health(handler): + """GET /api/health""" + projects = _discover_projects() + _json_response(handler, { + "status": "ok", + "version": __version__, + "projects": len(projects), + "bridge_available": BRIDGE_PATH.exists(), + }) + + +def handle_projects(handler): + """GET /api/projects""" + projects = [] + for pdir in _discover_projects(): + metrics = _project_metrics(pdir) + pipeline = _get_pipeline_status(pdir) + if pipeline: + metrics["last_pipeline"] = pipeline + projects.append(metrics) + _json_response(handler, {"projects": projects, "count": len(projects)}) + + +def handle_insights(handler, project_name): + """GET /api/insights/""" + pdir = PROJECTS_DIR / project_name + if not pdir.exists(): + return _error_response(handler, f"Project '{project_name}' not found") + + metrics = _project_metrics(pdir) + swot = _parse_swot(pdir) + wardley = _parse_wardley(pdir) + value_chain = _parse_value_chain(pdir) + bcg = _parse_bcg(pdir) + ansoff = _parse_ansoff(pdir) + blue_ocean = _parse_blue_ocean(pdir) + pipeline = _get_pipeline_status(pdir) + health = _compute_health_score(metrics, swot, wardley, value_chain) + + _json_response(handler, { + "project": project_name, + "health_score": health, + "metrics": metrics, + "swot": swot, + "wardley": wardley, + "value_chain": value_chain, + "bcg": bcg, + "ansoff": ansoff, + "blue_ocean": blue_ocean, + "pipeline": pipeline, + }) + + +def handle_swot(handler, project_name): + """GET /api/swot/""" + pdir = PROJECTS_DIR / project_name + if not pdir.exists(): + return _error_response(handler, f"Project '{project_name}' not found") + data = _parse_swot(pdir) + if not data: + return _error_response(handler, f"SWOT analysis not found for '{project_name}'") + _json_response(handler, data) + + +def handle_wardley(handler, project_name): + """GET /api/wardley/""" + pdir = PROJECTS_DIR / project_name + if not pdir.exists(): + return _error_response(handler, f"Project '{project_name}' not found") + data = _parse_wardley(pdir) + if not data: + return _error_response(handler, f"Wardley Map not found for '{project_name}'") + _json_response(handler, data) + + +def handle_value_chain(handler, project_name): + """GET /api/value-chain/""" + pdir = PROJECTS_DIR / project_name + if not pdir.exists(): + return _error_response(handler, f"Project '{project_name}' not found") + data = _parse_value_chain(pdir) + if not data: + return _error_response(handler, f"Value Chain not found for '{project_name}'") + _json_response(handler, data) + + +def handle_bcg(handler, project_name): + """GET /api/bcg/""" + pdir = PROJECTS_DIR / project_name + if not pdir.exists(): + return _error_response(handler, f"Project '{project_name}' not found") + data = _parse_bcg(pdir) + if not data: + return _error_response(handler, f"BCG Matrix not found for '{project_name}'") + _json_response(handler, data) + + +def handle_ansoff(handler, project_name): + """GET /api/ansoff/""" + pdir = PROJECTS_DIR / project_name + if not pdir.exists(): + return _error_response(handler, f"Project '{project_name}' not found") + data = _parse_ansoff(pdir) + if not data: + return _error_response(handler, f"Ansoff Matrix not found for '{project_name}'") + _json_response(handler, data) + + +def handle_blue_ocean(handler, project_name): + """GET /api/blue-ocean/""" + pdir = PROJECTS_DIR / project_name + if not pdir.exists(): + return _error_response(handler, f"Project '{project_name}' not found") + data = _parse_blue_ocean(pdir) + if not data: + return _error_response(handler, f"Blue Ocean analysis not found for '{project_name}'") + _json_response(handler, data) + + +def handle_bridge(handler): + """GET /api/bridge — Serve pre-generated bridge file or generate on the fly.""" + if BRIDGE_PATH.exists(): + data = _load_json(BRIDGE_PATH) + if data: + return _json_response(handler, data) + + # Fallback: generate on the fly + bridge = _generate_bridge() + _json_response(handler, bridge) + + +def handle_health_trend(handler, project_name): + """GET /api/health-trend/ — Pipeline history from SQLite.""" + db_path = PROJECTS_DIR / project_name / "data" / "archaeology.db" + if not db_path.exists(): + return _error_response(handler, f"No database for '{project_name}'") + try: + with sqlite3.connect(str(db_path)) as conn: + conn.row_factory = sqlite3.Row + rows = conn.execute( + "SELECT r.run_timestamp, r.status, pr.issues_count, pr.fixes_applied " + "FROM pipeline_runs r " + "JOIN pipeline_repo_results pr ON r.id = pr.run_id " + "WHERE pr.repo_name LIKE '%' || ? || '%' " + "ORDER BY r.run_timestamp DESC LIMIT 50", + (project_name,), + ).fetchall() + trend = [dict(r) for r in rows] + _json_response(handler, {"project": project_name, "trend": trend}) + except sqlite3.OperationalError: + _json_response(handler, {"project": project_name, "trend": []}) + + +# ── Bridge generation ────────────────────────────────────────── + + +def _generate_bridge(): + """Generate the bridge data structure.""" + projects_data = {} + for pdir in _discover_projects(): + metrics = _project_metrics(pdir) + swot = _parse_swot(pdir) + wardley = _parse_wardley(pdir) + value_chain = _parse_value_chain(pdir) + bcg = _parse_bcg(pdir) + ansoff = _parse_ansoff(pdir) + blue_ocean = _parse_blue_ocean(pdir) + pipeline = _get_pipeline_status(pdir) + health = _compute_health_score(metrics, swot, wardley, value_chain) + + projects_data[metrics["name"]] = { + "health_score": health, + "total_commits": metrics["total_commits"], + "active_days": metrics["active_days"], + "era_count": metrics["era_count"], + "swot_summary": { + "strengths": len(swot.get("strengths", [])) if swot else 0, + "weaknesses": len(swot.get("weaknesses", [])) if swot else 0, + "opportunities": len(swot.get("opportunities", [])) if swot else 0, + "threats": len(swot.get("threats", [])) if swot else 0, + } if swot else None, + "wardley_maturity": wardley.get("maturity") if wardley else None, + "value_chain_margin": value_chain.get("margin_score") if value_chain else None, + "bcg_velocity_trend": bcg.get("velocity_trend") if bcg else None, + "ansoff_primary": ansoff.get("primary_strategy") if ansoff else None, + "blue_ocean_avg_score": blue_ocean.get("avg_value_score") if blue_ocean else None, + "last_pipeline_status": pipeline.get("status") if pipeline else None, + "recommendations": ( + (wardley.get("recommendations", []) if wardley else []) + + (value_chain.get("recommendations", []) if value_chain else []) + )[:5], + } + + return { + "generated_at": datetime.now().isoformat(), + "version": __version__, + "projects": projects_data, + "cross_repo": { + "total_repos": len(projects_data), + "total_commits": sum(p["total_commits"] for p in projects_data.values()), + "frameworks_available": ["swot", "wardley", "value-chain", "bcg", "ansoff", "blue-ocean"], + }, + } + + +def generate_bridge_file(): + """Generate and write bridge file to disk. Called from pipeline.""" + bridge = _generate_bridge() + BRIDGE_PATH.parent.mkdir(parents=True, exist_ok=True) + BRIDGE_PATH.write_text(json.dumps(bridge, indent=2, default=str) + "\n", encoding="utf-8") + print(f"Bridge generated: {len(bridge['projects'])} projects → {BRIDGE_PATH}") + return len(bridge["projects"]) + + +# ── Router ───────────────────────────────────────────────────── + + +# Route table: (pattern, handler, needs_project) +ROUTES = [ + (r"^/api/health$", handle_health, False), + (r"^/api/projects$", handle_projects, False), + (r"^/api/bridge$", handle_bridge, False), + (r"^/api/insights/(.+)$", handle_insights, True), + (r"^/api/swot/(.+)$", handle_swot, True), + (r"^/api/wardley/(.+)$", handle_wardley, True), + (r"^/api/value-chain/(.+)$", handle_value_chain, True), + (r"^/api/bcg/(.+)$", handle_bcg, True), + (r"^/api/ansoff/(.+)$", handle_ansoff, True), + (r"^/api/blue-ocean/(.+)$", handle_blue_ocean, True), + (r"^/api/health-trend/(.+)$", handle_health_trend, True), +] + + +def route(handler): + """Route an incoming /api/* request to the appropriate handler.""" + if handler.command != "GET": + return _error_response(handler, "Method not allowed", 405) + + path = urlparse(handler.path).path + + for pattern, handler_func, needs_project in ROUTES: + m = re.match(pattern, path) + if m: + if needs_project: + project_name = m.group(1) + if not _validate_project_name(project_name): + return _error_response(handler, "Invalid project name", 400) + handler_func(handler, project_name) + else: + handler_func(handler) + return + + _error_response(handler, f"Unknown endpoint: {path}", 404) diff --git a/archaeology/cli.py b/archaeology/cli.py index f7d3294..a415848 100644 --- a/archaeology/cli.py +++ b/archaeology/cli.py @@ -266,14 +266,15 @@ def analyze(project_name, vectors, prompts, verbose): target = list(vectors) if vectors else list(available) project_dir = _project_dir(project_name) deliverables_dir = os.path.join(project_dir, "deliverables") - os.makedirs(deliverables_dir, exist_ok=True) + analysis_dir = os.path.join(deliverables_dir, "analysis") + os.makedirs(analysis_dir, exist_ok=True) if prompts: vectors_dir = os.path.join(os.path.dirname(__file__), "..", "analysis-vectors") click.echo(f"Analysis prompt templates for '{project_name}'") for vec_name in target: prompt_path = os.path.join(vectors_dir, f"{vec_name}.md") - output_path = os.path.join(deliverables_dir, f"analysis-{vec_name}.md") + output_path = os.path.join(analysis_dir, f"analysis-{vec_name}.md") click.echo(f" [{vec_name}] prompt={prompt_path} output={output_path}") return @@ -359,7 +360,7 @@ def visualize(project_name): project_dir = _project_dir(project_name) template = os.path.join("archaeology", "visualization", "template.html") data_json = os.path.join(project_dir, "deliverables", "data.json") - output_html = os.path.join(project_dir, "deliverables", "archaeology.html") + output_html = os.path.join(project_dir, "deliverables", "visuals", "archaeology.html") if not os.path.exists(template): click.echo(f"Template not found at {template}", err=True) @@ -419,6 +420,9 @@ def visualize(project_name): html = html.replace("{{TOTAL_COMMITS}}", str(total_commits or 803)) html = html.replace("{{TOTAL_LINES}}", str(total_lines or "35,600")) html = html.replace("{{AGENT_COUNT}}", str(agent_count or 6)) + # Compute era count for meta description + era_count = len(eras_data.get("eras", [])) if os.path.exists(eras_json) else 0 + html = html.replace("{{ERA_COUNT}}", str(era_count)) # Also update tag if it still has the old format html = html.replace( @@ -495,10 +499,10 @@ def ingest_pipeline(project_name, logs_dir, verbose): # Auto-detect pipeline logs dir if not logs_dir: candidates = [ - os.path.expanduser("~/Desktop/OMC/.omc/logs/repo-pipeline"), - os.path.expanduser("~/.claude/data/review"), + os.path.expanduser("~/workspaces/GITHUB_pipeline/.omc/logs/repo-pipeline"), os.path.expanduser("~/dev/GITHUB_pipeline/.omc/logs/repo-pipeline"), os.path.expanduser("~/Desktop/GITHUB_pipeline/.omc/logs/repo-pipeline"), + os.path.expanduser("~/Desktop/OMC/.omc/logs/repo-pipeline"), ] for c in candidates: if os.path.isdir(c): @@ -967,39 +971,6 @@ def global_viz(output_dir, top_n, year, verbose): click.echo(f" {meta.get('total_commits', '?')} commits across {meta.get('total_repos', '?')} repos") -@main.command("fetch-github") -@click.option("--owner", default="Pastorsimon1798", help="GitHub username/org") -@click.option("--output", "output_path", default="global/data/github-repos.json", help="Output JSON path") -def fetch_github(owner, output_path): - """Fetch repo metadata from GitHub API for all repos (no cloning).""" - from .visualization.github_fetcher import save_github_data - - click.echo(f"Fetching repos for {owner} from GitHub...") - data = save_github_data(output_path, owner=owner) - click.echo(f" {data['total_repos']} repos, {data['total_commits']} total commits") - - -@main.command() -@click.argument("project_name") -def benchmark(project_name): - """Generate agent performance benchmark visualization.""" - from .visualization.agent_benchmark import run_benchmark_analysis - - project_dir = _project_dir(project_name) - - click.echo(f"Analyzing agent performance for '{project_name}'...") - - try: - output_path = run_benchmark_analysis(project_dir) - click.echo(f"Benchmark visualization generated at {output_path}") - except FileNotFoundError as e: - click.echo(str(e), err=True) - sys.exit(1) - except Exception as e: - click.echo(f"Error generating benchmark: {e}", err=True) - sys.exit(1) - - @main.command("multi-project-dashboard") @click.option("--output", "output_dir", default="global/deliverables", help="Output directory for the dashboard") @click.option("--top", "top_n", type=int, help="Limit to top N repos by commit count") @@ -1017,15 +988,18 @@ def multi_project_dashboard(output_dir, top_n, year, verbose): click.echo("No GitHub data found. Run 'archaeology fetch-github' first.", err=True) sys.exit(1) + # Build dashboard data click.echo("Building dashboard data...") dashboard_data = prepare_dashboard_data(global_dir, top_n=top_n, year=year) + # Write dashboard data JSON dashboard_json_path = os.path.join(data_dir, "dashboard-data.json") with open(dashboard_json_path, "w") as f: json.dump(dashboard_data, f, indent=2) if verbose: click.echo(f" Data written to {dashboard_json_path}") + # Hydrate template template_path = os.path.join("archaeology", "visualization", "multi-project-dashboard.html") if not os.path.exists(template_path): click.echo(f"Template not found at {template_path}", err=True) @@ -1034,8 +1008,10 @@ def multi_project_dashboard(output_dir, top_n, year, verbose): with open(template_path, encoding="utf-8") as f: html = f.read() + # Inline the data JSON safe_data = json.dumps(dashboard_data).replace("<", "\\u003c").replace(">", "\\u003e").replace("&", "\\u0026") + # Replace the placeholder old_placeholder = "// DATA_PLACEHOLDER\nwindow.DASHBOARD_DATA = {};" new_inline = f"window.DASHBOARD_DATA = {safe_data};\n window.dispatchEvent(new Event('dashboard-data-loaded'));" if old_placeholder in html: @@ -1045,6 +1021,7 @@ def multi_project_dashboard(output_dir, top_n, year, verbose): else: click.echo("Warning: could not find DASHBOARD_DATA placeholder in template", err=True) + # Write output os.makedirs(output_dir, exist_ok=True) output_path = os.path.join(output_dir, "dashboard.html") with open(output_path, "w", encoding="utf-8") as f: @@ -1053,6 +1030,256 @@ def multi_project_dashboard(output_dir, top_n, year, verbose): click.echo(f"Multi-project dashboard generated at {output_path}") meta = dashboard_data.get("meta", {}) click.echo(f" {meta.get('total_commits', '?')} commits across {meta.get('total_repos', '?')} repos") + click.echo(f" Period: {meta.get('first_date', '?')} to {meta.get('last_date', '?')}") + + +@main.command("fetch-github") +@click.option("--owner", default="Pastorsimon1798", help="GitHub username/org") +@click.option("--output", "output_path", default="global/data/github-repos.json", help="Output JSON path") +def fetch_github(owner, output_path): + """Fetch repo metadata from GitHub API for all repos (no cloning).""" + from .visualization.github_fetcher import save_github_data + + click.echo(f"Fetching repos for {owner} from GitHub...") + data = save_github_data(output_path, owner=owner) + click.echo(f" {data['total_repos']} repos, {data['total_commits']} total commits") + + +@main.command() +@click.argument("project_name") +def benchmark(project_name): + """Generate agent performance benchmark visualization.""" + from .visualization.agent_benchmark import run_benchmark_analysis + + project_dir = _project_dir(project_name) + + click.echo(f"Analyzing agent performance for '{project_name}'...") + + try: + output_path = run_benchmark_analysis(project_dir) + click.echo(f"Benchmark visualization generated at {output_path}") + except FileNotFoundError as e: + click.echo(str(e), err=True) + sys.exit(1) + except Exception as e: + click.echo(f"Error generating benchmark: {e}", err=True) + sys.exit(1) + + +@main.command() +@click.option("--port", default=8080, help="Port to serve on") +@click.option("--no-open", is_flag=True, help="Don't open browser automatically") +def serve(port, no_open): + """Start local dashboard server for all project deliverables. + + Generates the master dashboard and serves all projects over HTTP. + Accessible from any device on your Tailscale network. + """ + import http.server + import threading + import webbrowser + + from .visualization.dashboard import discover_projects, generate_master_dashboard, generate_project_index, load_api_repos, generate_global_section + + root = Path.cwd() + projects_dir = root / "projects" + global_data_dir = root / "global" / "data" + + # Generate master dashboard + projects = discover_projects(projects_dir) + if not projects: + click.echo("No projects found. Run 'archaeology mine <repo>' first.", err=True) + sys.exit(1) + + # Load API-only repos (no cloning needed) + api_repos = load_api_repos(global_data_dir) if global_data_dir.exists() else [] + # Deduplicate: remove API repos already present as mined projects + mined_names = {p["name"].lower().replace("-", "").replace("_", "") for p in projects} + api_repos = [r for r in api_repos if r["name"].lower().replace("-", "").replace("_", "") not in mined_names] + print(f" After dedup: {len(api_repos)} API-only repos") + owner_labels = {"Pastorsimon1798": "Pastorsimon1798 (Personal)", "KyaniteLabs": "KyaniteLabs (Org)"} + api_section_html = generate_global_section(api_repos, owner_labels) if api_repos else "" + + dashboard_html = generate_master_dashboard(projects, api_section_html=api_section_html, api_repos=api_repos) + + # Symlink global visualizations if they exist + site_dir = root / ".serve" + site_dir.mkdir(exist_ok=True) + global_deliverables = root / "global" / "deliverables" + if global_deliverables.exists(): + for html_file in global_deliverables.glob("*.html"): + link_path = site_dir / html_file.name + if link_path.is_symlink() or link_path.exists(): + link_path.unlink() + link_path.symlink_to(html_file.resolve()) + (site_dir / "index.html").write_text(dashboard_html, encoding="utf-8") + + # Generate per-project index pages and symlink all deliverable files + for proj in projects: + proj_site_dir = site_dir / proj["name"] + proj_site_dir.mkdir(exist_ok=True) + + # Generate project index page + proj_index_html = generate_project_index(proj) + (proj_site_dir / "index.html").write_text(proj_index_html, encoding="utf-8") + + # Symlink ALL deliverable files from all subdirectories + deliverables_dir = projects_dir / proj["name"] / "deliverables" + if deliverables_dir.exists(): + # Symlink top-level data files (data.json, canonical-metrics.json) + for data_file in deliverables_dir.glob("*.json"): + link_path = proj_site_dir / data_file.name + if link_path.is_symlink() or link_path.exists(): + link_path.unlink() + link_path.symlink_to(data_file.resolve()) + + # Symlink all files from each deliverable subdirectory + for sub_dir in deliverables_dir.iterdir(): + if not sub_dir.is_dir(): + continue + target_dir = proj_site_dir / sub_dir.name + target_dir.mkdir(exist_ok=True) + for f in sub_dir.iterdir(): + if f.is_dir(): + continue + link_path = target_dir / f.name + if link_path.is_symlink() or link_path.exists(): + link_path.unlink() + link_path.symlink_to(f.resolve()) + + # Symlink global deliverables for cross-repo analysis + global_deliverables_dir = root / "global" / "deliverables" + if global_deliverables_dir.exists(): + global_site_dir = site_dir / "global" + global_site_dir.mkdir(exist_ok=True) + for f in global_deliverables_dir.rglob("*"): + if f.is_dir(): + continue + rel = f.relative_to(global_deliverables_dir) + link_path = global_site_dir / rel + link_path.parent.mkdir(parents=True, exist_ok=True) + if link_path.is_symlink() or link_path.exists(): + link_path.unlink() + link_path.symlink_to(f.resolve()) + + # Copy md-viewer.html to serve directory + md_viewer_src = root / "archaeology" / "templates" / "md-viewer.html" + if md_viewer_src.exists(): + md_viewer_dst = site_dir / "md-viewer.html" + if md_viewer_dst.exists(): + md_viewer_dst.unlink() + import shutil + shutil.copy2(md_viewer_src, md_viewer_dst) + + total_deliverables = sum(p.get("total_deliverables", 0) for p in projects) + click.echo(f" Master dashboard: {len(projects)} projects") + click.echo(f" Total deliverables: {total_deliverables}") + + # Custom handler: /api/* routes to JSON API, everything else is static files + import functools + from .api import route as api_route + + class DevArchHandler(http.server.SimpleHTTPRequestHandler): + def do_GET(self): + if self.path.startswith("/api/"): + api_route(self) + else: + super().do_GET() + + def log_message(self, fmt, *args): + # Suppress per-request logging for static files, keep for API + if self.path.startswith("/api/"): + click.echo(f" API: {self.path}") + + handler = functools.partial(DevArchHandler, directory=str(site_dir)) + + server = http.server.HTTPServer(("0.0.0.0", port), handler) + url = f"http://localhost:{port}" + + click.echo(f"\n Serving at {url}") + click.echo(f" Tailscale: http://100.115.175.18:{port}") + click.echo(f" Press Ctrl+C to stop\n") + + if not no_open: + threading.Timer(0.5, lambda: webbrowser.open(url)).start() + + try: + server.serve_forever() + except KeyboardInterrupt: + click.echo("\n Server stopped.") + server.server_close() + + +@main.command("publish-static") +@click.option("--output", "output_dir", default="site", help="Output directory for the static site") +def publish_static(output_dir): + """Generate a static site for deployment (GitHub Pages, nginx, etc.).""" + import shutil + + from .visualization.dashboard import discover_projects, generate_master_dashboard, generate_project_index, load_api_repos, generate_global_section + + root = Path.cwd() + projects_dir = root / "projects" + site = root / output_dir + + # Clean output directory + if site.exists(): + shutil.rmtree(site) + site.mkdir(parents=True) + + # Generate master dashboard + projects = discover_projects(projects_dir) + if not projects: + click.echo("No projects found.", err=True) + sys.exit(1) + + # Load and deduplicate API repos + global_data_dir = root / "global" / "data" + api_repos = load_api_repos(global_data_dir) if global_data_dir.exists() else [] + mined_names = {p["name"].lower().replace("-", "").replace("_", "") for p in projects} + api_repos = [r for r in api_repos if r["name"].lower().replace("-", "").replace("_", "") not in mined_names] + owner_labels = {"Pastorsimon1798": "Pastorsimon1798 (Personal)", "KyaniteLabs": "KyaniteLabs (Org)"} + api_section_html = generate_global_section(api_repos, owner_labels) if api_repos else "" + + dashboard_html = generate_master_dashboard(projects, api_section_html=api_section_html, api_repos=api_repos) + (site / "index.html").write_text(dashboard_html, encoding="utf-8") + + click.echo(f" Master dashboard: {len(projects)} projects, {len(api_repos)} API repos") + + # Copy global deliverables (dashboard.html, global.html) + global_deliverables = root / "global" / "deliverables" + if global_deliverables.exists(): + for html_file in global_deliverables.glob("*.html"): + shutil.copy2(html_file, site / html_file.name) + click.echo(f" Global visualizations copied") + + # Generate per-project pages + for proj in projects: + proj_site_dir = site / proj["name"] + proj_site_dir.mkdir() + + # Project index + proj_index_html = generate_project_index(proj) + (proj_site_dir / "index.html").write_text(proj_index_html, encoding="utf-8") + + # Copy HTML files from deliverables + deliverables_dir = projects_dir / proj["name"] / "deliverables" + visuals_dir = deliverables_dir / "visuals" + source_dir = visuals_dir if visuals_dir.exists() else deliverables_dir + + for html_file in source_dir.glob("*.html"): + shutil.copy2(html_file, proj_site_dir / html_file.name) + + # Copy data.json + data_json = deliverables_dir / "data.json" + if data_json.exists(): + shutil.copy2(data_json, proj_site_dir / "data.json") + + click.echo(f" {proj['name']}: {len(proj['visuals'])} pages") + + total = sum(len(p["visuals"]) for p in projects) + len(projects) + 1 + click.echo(f"\n Static site generated at {site}/ ({total} pages)") + click.echo(f" Deploy with: rsync -avz {site}/ user@host:/var/www/archaeology/") if __name__ == "__main__": diff --git a/archaeology/db/pipeline_ingest.py b/archaeology/db/pipeline_ingest.py index c18dab4..0089056 100644 --- a/archaeology/db/pipeline_ingest.py +++ b/archaeology/db/pipeline_ingest.py @@ -6,19 +6,27 @@ Pipeline JSON format (expected keys): { - "timestamp": "2026-04-09T22:17:10Z", - "status": "pass|fail|partial", + "run_timestamp": "2026-04-09T22:17:10Z", + "status": "COMPLETE|NEEDS_ATTENTION|RUNNING", "duration_seconds": 120, "repos": [ { "name": "repo-name", "tier": 1, - "issues": [...], + "issues": {...}, "fixes_applied": 2, "status": "clean" } ], - "agents_used": ["hygiene-agent", "secret-scanner"], + "supervision": { + "missions": [ + { + "repo": "owner/repo", + "status": "supervising|needs_attention", + ... + } + ] + }, "summary": { ... } } """ @@ -62,6 +70,48 @@ """ +def _normalize_status(raw_status: str) -> str: + """Normalize new pipeline status values to legacy format for backward compatibility.""" + if not raw_status or not isinstance(raw_status, str): + return "unknown" + + status_map = { + "COMPLETE": "pass", + "SUCCESS": "pass", + "NEEDS_ATTENTION": "partial", + "RUNNING": "partial", + "FAILED": "fail", + "ERROR": "fail", + } + return status_map.get(raw_status.upper(), raw_status.lower()) + + +def _extract_agents(run_json: dict) -> str: + """Extract agent list from either supervision.missions or agents_used field.""" + # Try new format: supervision.missions + supervision = run_json.get("supervision", {}) + if isinstance(supervision, dict): + missions = supervision.get("missions", []) + if missions and isinstance(missions, list): + # Extract unique agent types from mission assignments + agents = set() + for mission in missions: + if isinstance(mission, dict): + assigned_layers = mission.get("assigned_layers", []) + if assigned_layers and isinstance(assigned_layers, list): + agents.update(assigned_layers) + if agents: + return json.dumps(sorted(agents)) + + # Fall back to old format: agents_used + agents_used = run_json.get("agents_used", []) + if agents_used: + return json.dumps(agents_used) + + # Default: empty list + return "[]" + + def ensure_tables(db_path: Path) -> None: """Create pipeline tables if they don't exist.""" conn = sqlite3.connect(str(db_path), timeout=30) @@ -77,10 +127,14 @@ def ingest_run(db_path: Path, run_json: dict, source_file: str = "") -> int: ensure_tables(db_path) conn = sqlite3.connect(str(db_path), timeout=30) try: - ts = run_json.get("timestamp", datetime.utcnow().isoformat()) - status = run_json.get("status", "unknown") + # Support both old (timestamp) and new (run_timestamp) field names + ts = run_json.get("run_timestamp") or run_json.get("timestamp", datetime.utcnow().isoformat()) + # Map new status values to old format for backward compatibility + raw_status = run_json.get("status", "unknown") + status = _normalize_status(raw_status) duration = run_json.get("duration_seconds") - agents = json.dumps(run_json.get("agents_used", [])) + # Extract agents from supervision.missions if available, else use agents_used + agents = _extract_agents(run_json) summary = json.dumps(run_json.get("summary", {})) cursor = conn.execute( @@ -96,12 +150,16 @@ def ingest_run(db_path: Path, run_json: dict, source_file: str = "") -> int: issues_json = json.dumps(issues) if isinstance(issues, (list, dict)) else "[]" fixes_raw = repo.get("fixes_applied", 0) fixes_count = len(fixes_raw) if isinstance(fixes_raw, list) else fixes_raw if isinstance(fixes_raw, int) else 0 + + # Extract repo name from multiple possible fields + repo_name = (repo.get("full_name") or repo.get("path") or repo.get("name", "unknown")) + conn.execute( "INSERT INTO pipeline_repo_results (run_id, repo_name, tier, status, issues_count, fixes_applied, issues_json) " "VALUES (?, ?, ?, ?, ?, ?, ?)", ( run_id, - repo.get("name", "unknown"), + repo_name, repo.get("tier"), repo.get("status", "unknown"), issues_count, @@ -146,8 +204,10 @@ def ingest_directory(db_path: Path, logs_dir: Path, verbose: bool = False) -> di with open(json_file, encoding="utf-8") as f: data = json.load(f) - # Validate it looks like a pipeline run - if "repos" not in data and "timestamp" not in data: + # Validate it looks like a pipeline run (support both old and new formats) + has_repos = "repos" in data or "supervision" in data + has_timestamp = "run_timestamp" in data or "timestamp" in data + if not has_repos and not has_timestamp: if verbose: print(f" SKIP {json_file.name} (not a pipeline run)") stats["skipped"] += 1 diff --git a/archaeology/era_cascade.py b/archaeology/era_cascade.py index a5b27ac..b2b5502 100644 --- a/archaeology/era_cascade.py +++ b/archaeology/era_cascade.py @@ -83,9 +83,10 @@ def cascade( if data_json.exists() and data_js.exists(): _mirror_data_js(data_json, data_js, dry_run) - # Step 5: Fix HTML files + # Step 5: Fix HTML files (check both root and visuals/ subdirectory) deliverables = project_dir / "deliverables" - for html_file in deliverables.glob("*.html"): + html_files = list(deliverables.glob("*.html")) + list(deliverables.glob("visuals/*.html")) + for html_file in html_files: if html_file.name in EXEMPT_FILES: continue _fix_html_file(html_file, eras, n_eras, dry_run, result) diff --git a/archaeology/local_pipeline.py b/archaeology/local_pipeline.py index 7366376..c9393e3 100644 --- a/archaeology/local_pipeline.py +++ b/archaeology/local_pipeline.py @@ -83,15 +83,44 @@ def read_local_pipeline_status(pipeline_dir: str | Path, repo_name: str) -> Loca if not path.exists(): raise FileNotFoundError(f"Local pipeline latest.json not found: {path}") payload = json.loads(path.read_text(encoding="utf-8")) + + # Try to find repo in supervision.missions first (new format) target = None - for repo in payload.get("repos", []): - names = {str(repo.get("name", "")), str(repo.get("full_name", "")), str(repo.get("path", ""))} - if repo_name in names or repo_name.endswith("/" + str(repo.get("name", ""))): - target = repo - break + supervision = payload.get("supervision", {}) + if isinstance(supervision, dict): + missions = supervision.get("missions", []) + if missions and isinstance(missions, list): + for mission in missions: + if isinstance(mission, dict): + mission_repo = mission.get("repo", "") + # Match against owner/repo or just repo name + if (mission_repo == repo_name or + mission_repo.endswith("/" + repo_name) or + repo_name.endswith("/" + mission_repo.split("/")[-1])): + target = mission + break + + # Fall back to repos array (old format) + if target is None: + for repo in payload.get("repos", []): + names = {str(repo.get("name", "")), str(repo.get("full_name", "")), str(repo.get("path", ""))} + if repo_name in names or repo_name.endswith("/" + str(repo.get("name", ""))): + target = repo + break + if target is None: - reviewed = ", ".join(str(repo.get("name")) for repo in payload.get("repos", [])) + # Build helpful error message with available repos + reviewed_repos = [] + supervision = payload.get("supervision", {}) + if isinstance(supervision, dict): + missions = supervision.get("missions", []) + if missions: + reviewed_repos = [m.get("repo", "") for m in missions if isinstance(m, dict)] + if not reviewed_repos: + reviewed_repos = [repo.get("name", "") for repo in payload.get("repos", [])] + reviewed = ", ".join(str(r) for r in reviewed_repos) raise ValueError(f"Repo '{repo_name}' not found in latest local pipeline reviewed repos. Reviewed: {reviewed}") + summary = payload.get("summary", {}) # Normalize issues to dict - pipeline may return list or dict raw_issues = target.get("issues") @@ -99,12 +128,25 @@ def read_local_pipeline_status(pipeline_dir: str | Path, repo_name: str) -> Loca issues = {"items": raw_issues, "total": len(raw_issues)} else: issues = raw_issues or {} + + # Extract repo name from multiple possible fields + repo_full_name = ( + target.get("repo") or # new format + target.get("full_name") or # old format + target.get("path") or + target.get("name", "") + ) + + # Extract health/verdict from mission or repo data + repo_health = target.get("health") or target.get("status") + repo_verdict = target.get("verdict") + return LocalPipelineStatus( run_timestamp=payload.get("run_timestamp"), overall_health=summary.get("overall_health"), - repo=str(target.get("full_name") or target.get("path") or target.get("name")), - repo_health=target.get("health"), - repo_verdict=target.get("verdict"), + repo=str(repo_full_name), + repo_health=repo_health, + repo_verdict=repo_verdict, issues=issues, open_prs=target.get("open_prs"), open_issues=target.get("open_issues"), diff --git a/archaeology/report.py b/archaeology/report.py index b544a28..9c3cb50 100644 --- a/archaeology/report.py +++ b/archaeology/report.py @@ -42,11 +42,20 @@ def export_markdown_report(project_name: str, project_root: str | Path, output_p """Export a concise Markdown report from canonical metrics + analysis JSON.""" project_root = Path(project_root) deliverables = project_root / "deliverables" + analysis_dir = deliverables / "analysis" data_dir = project_root / "data" project = _load_json(project_root / "project.json") or {} canonical = _load_json(deliverables / "canonical-metrics.json") or {} eras = _load_json(data_dir / "commit-eras.json") or {} - analyses = {name.replace("analysis-", "").replace(".json", ""): _load_json(deliverables / name) for name in ANALYSIS_FILES} + # Look for analysis files in analysis/ subdirectory first, then fall back to deliverables root + analysis_search = [analysis_dir, deliverables] + analyses = {} + for name in ANALYSIS_FILES: + for search_dir in analysis_search: + loaded = _load_json(search_dir / name) + if loaded: + analyses[name.replace("analysis-", "").replace(".json", "")] = loaded + break title = project.get("visualization", {}).get("title") or project.get("name") or project_name out = [] @@ -62,8 +71,6 @@ def export_markdown_report(project_name: str, project_root: str | Path, output_p ("Total commits", canonical.get("total_commits") or eras.get("total_commits")), ("Span days", canonical.get("span_days")), ("Active days", canonical.get("active_days")), - ("Sessions", canonical.get("session_count")), - ("Human messages", canonical.get("human_messages")), ("Peak day", canonical.get("peak_day")), ("Peak day commits", canonical.get("peak_day_commits")), ] @@ -124,7 +131,7 @@ def export_markdown_report(project_name: str, project_root: str | Path, output_p out.append(_bullet("Run `archaeology audit <project> --fail-on HIGH` before publishing.")) if output_path is None: - output_path = deliverables / "ARCHAEOLOGY-REPORT.md" + output_path = deliverables / "reports" / "ARCHAEOLOGY-REPORT.md" output = Path(output_path) output.parent.mkdir(parents=True, exist_ok=True) output.write_text("".join(out), encoding="utf-8") @@ -142,6 +149,14 @@ def close_list() -> None: body.append("</ul>") in_list = False + def render_inline(text: str) -> str: + """Convert **bold** and `code` inline markup to HTML.""" + import re + text = html.escape(text) + text = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', text) + text = re.sub(r'`(.+?)`', r'<code>\1</code>', text) + return text + for raw_line in markdown.splitlines(): line = raw_line.rstrip() if not line: @@ -149,45 +164,70 @@ def close_list() -> None: continue if line.startswith("# "): close_list() - body.append(f"<h1>{html.escape(line[2:])}</h1>") + body.append(f"<h1>{render_inline(line[2:])}</h1>") elif line.startswith("## "): close_list() - body.append(f"<h2>{html.escape(line[3:])}</h2>") + body.append(f"<h2>{render_inline(line[3:])}</h2>") elif line.startswith("- "): if not in_list: body.append("<ul>") in_list = True - item = html.escape(line[2:]).replace("**", "") - body.append(f"<li>{item}</li>") + body.append(f"<li>{render_inline(line[2:])}</li>") else: close_list() - text = html.escape(line).replace("`", "") - body.append(f"<p>{text}</p>") + body.append(f"<p>{render_inline(line)}</p>") close_list() body_html = "\n ".join(body) escaped_title = html.escape(title) return f"""<!doctype html> -<html lang=\"en\"> +<html lang="en"> <head> - <meta charset=\"utf-8\"> - <meta name=\"viewport\" content=\"width=device-width, initial-scale=1\"> + <meta charset="utf-8"> + <meta name="viewport" content="width=device-width, initial-scale=1"> <title>{escaped_title} + + + + + +
-
Generated by Dev-Archaeology
+
Archaeology Report
{body_html}
@@ -204,7 +244,7 @@ def export_html_report(project_name: str, project_root: str | Path, output_path: project = _load_json(project_root / "project.json") or {} title = project.get("visualization", {}).get("title") or project.get("name") or project_name if output_path is None: - output_path = deliverables / "ARCHAEOLOGY-REPORT.html" + output_path = deliverables / "visuals" / "report.html" output = Path(output_path) output.parent.mkdir(parents=True, exist_ok=True) output.write_text(_markdown_to_html(markdown, f"{title} Archaeology Report"), encoding="utf-8") diff --git a/archaeology/templates/md-viewer.html b/archaeology/templates/md-viewer.html new file mode 100644 index 0000000..0f52472 --- /dev/null +++ b/archaeology/templates/md-viewer.html @@ -0,0 +1,125 @@ + + + + + +Document Viewer — DEV-ARCH + + + + + + + + + + + +
+
Loading document
+ +
+ + + + diff --git a/archaeology/visualization/agent_benchmark.py b/archaeology/visualization/agent_benchmark.py index 2ddba87..f8601fa 100644 --- a/archaeology/visualization/agent_benchmark.py +++ b/archaeology/visualization/agent_benchmark.py @@ -44,50 +44,30 @@ def analyze_agent_benchmarks(db_path: str) -> Dict[str, Any]: conn.row_factory = sqlite3.Row cursor = conn.cursor() - # Get era information - eras_data = cursor.execute( - "SELECT id, name FROM eras ORDER BY id" - ).fetchall() + # Check if eras table exists + has_eras = cursor.execute( + "SELECT name FROM sqlite_master WHERE type='table' AND name='eras'" + ).fetchone() is not None - eras = {row["id"]: row["name"] for row in eras_data} + # Get era information (optional) + eras = {} + era_date_ranges = {} + if has_eras: + eras_data = cursor.execute( + "SELECT id, name FROM eras ORDER BY id" + ).fetchall() + eras = {row["id"]: row["name"] for row in eras_data} era_ids = list(eras.keys()) # Build era date ranges for mapping commits era_date_ranges = {} - for row in eras_data: - era_id = row["id"] - # Parse dates like "Feb 28 - Mar 18" or "Mar 19 - Mar 31" - dates_str = cursor.execute( - f"SELECT dates FROM eras WHERE id = {era_id}" - ).fetchone()["dates"] - - # Simple parsing - assume format like "Feb 28 - Mar 18" - # We'll match commits by year-month pattern - era_date_ranges[era_id] = dates_str - - # Get all commits with era mapping - # We need to map commits to eras based on date - commits_query = """ - SELECT c.hash, c.date, c.message, c.author, e.id as era_id - FROM commits c - LEFT JOIN eras e ON c.date BETWEEN - substr(e.dates, 1, instr(e.dates, ' - ') - 1) || - CASE WHEN substr(e.dates, 1, 2) LIKE 'Jan%' THEN ', 2026' - WHEN substr(e.dates, 1, 2) LIKE 'Feb%' THEN ', 2026' - WHEN substr(e.dates, 1, 2) LIKE 'Mar%' THEN ', 2026' - WHEN substr(e.dates, 1, 2) LIKE 'Apr%' THEN ', 2026' - WHEN substr(e.dates, 1, 2) LIKE 'May%' THEN ', 2026' - ELSE ', 2026' END - AND - substr(e.dates, instr(e.dates, ' - ') + 4, 50) || - CASE WHEN substr(e.dates, instr(e.dates, ' - ') + 4, 2) LIKE 'Jan%' THEN ', 2026' - WHEN substr(e.dates, instr(e.dates, ' - ') + 4, 2) LIKE 'Feb%' THEN ', 2026' - WHEN substr(e.dates, instr(e.dates, ' - ') + 4, 2) LIKE 'Mar%' THEN ', 2026' - WHEN substr(e.dates, instr(e.dates, ' - ') + 4, 2) LIKE 'Apr%' THEN ', 2026' - WHEN substr(e.dates, instr(e.dates, ' - ') + 4, 2) LIKE 'May%' THEN ', 2026' - ELSE ', 2026' END - ORDER BY c.date - """ + if has_eras: + for row in eras_data: + era_id = row["id"] + dates_str = cursor.execute( + f"SELECT dates FROM eras WHERE id = {era_id}" + ).fetchone()["dates"] + era_date_ranges[era_id] = dates_str # Simpler approach: get all commits and map to eras in Python commits = cursor.execute( @@ -96,13 +76,14 @@ def analyze_agent_benchmarks(db_path: str) -> Dict[str, Any]: # Build era date mappings manually era_mappings = [] - for era_id, era_name in eras.items(): - era_row = cursor.execute( - f"SELECT dates, sub_phases FROM eras WHERE id = {era_id}" - ).fetchone() + if has_eras: + for era_id, era_name in eras.items(): + era_row = cursor.execute( + f"SELECT dates, sub_phases FROM eras WHERE id = {era_id}" + ).fetchone() - dates_str = era_row["dates"] - sub_phases_str = era_row["sub_phases"] + dates_str = era_row["dates"] + sub_phases_str = era_row["sub_phases"] # Parse the main era date range # Format: "Feb 28 - Mar 18" @@ -316,6 +297,8 @@ def generate_benchmark_html(benchmark_data: Dict[str, Any], project_name: str) - {project_name.upper()} — Agent Performance Benchmark + + @@ -338,7 +321,12 @@ def generate_benchmark_html(benchmark_data: Dict[str, Any], project_name: str) - .mono{{font-family:var(--font-mono)}} .container{{max-width:1200px;margin:0 auto;padding:2rem}} -.header{{margin-bottom:3rem}} +.site-nav{{position:sticky;top:0;z-index:100;background:var(--surface);border-bottom:1px solid var(--border);padding:0 24px;display:flex;align-items:center;gap:12px;height:52px;font-family:var(--font-display);backdrop-filter:blur(12px)}} +.site-nav .nav-back{{font-weight:500;font-size:13px;color:var(--text2);text-decoration:none;padding:4px 10px;border-radius:var(--radius-sm);transition:color .15s,background .15s;white-space:nowrap}} +.site-nav .nav-back:hover{{color:var(--text);background:var(--surface2)}} +.site-nav .nav-sep{{width:1px;height:24px;background:var(--border)}} +.site-nav .nav-title{{font-weight:600;font-size:15px;color:var(--text);letter-spacing:-.01em}} +.header{{margin-bottom:3rem;padding-top:1rem}} .header h1{{font-size:2.5rem;margin-bottom:.5rem}} .header p{{color:var(--text2);font-size:1.1rem}} @@ -371,6 +359,11 @@ def generate_benchmark_html(benchmark_data: Dict[str, Any], project_name: str) - +

{project_name.upper()} — Agent Performance Benchmark

@@ -658,7 +651,8 @@ def run_benchmark_analysis(project_dir: str) -> str: project_path = Path(project_dir) db_path = project_path / "data" / "archaeology.db" deliverables_dir = project_path / "deliverables" - output_path = deliverables_dir / "agent-benchmark.html" + visuals_dir = deliverables_dir / "visuals" + output_path = visuals_dir / "agent-benchmark.html" if not db_path.exists(): raise FileNotFoundError(f"Database not found: {db_path}") @@ -673,7 +667,7 @@ def run_benchmark_analysis(project_dir: str) -> str: html_content = generate_benchmark_html(benchmark_data, project_name) # Write output - deliverables_dir.mkdir(parents=True, exist_ok=True) + visuals_dir.mkdir(parents=True, exist_ok=True) output_path.write_text(html_content, encoding="utf-8") return str(output_path) diff --git a/archaeology/visualization/dashboard.py b/archaeology/visualization/dashboard.py new file mode 100644 index 0000000..80ff82a --- /dev/null +++ b/archaeology/visualization/dashboard.py @@ -0,0 +1,948 @@ +"""Master dashboard generator for dev-archaeology. + +Generates the top-level index.html that shows all projects as cards +with links to their individual visualizations. Served by `archaeology serve` +or deployed as a static site. +""" + +from __future__ import annotations + +import json +from datetime import datetime +from pathlib import Path +from typing import Any + +# Deliverable categories with display metadata and colors +CATEGORIES: dict[str, dict[str, str]] = { + "visuals": {"icon": "📊", "label": "Visualizations", "color": "#14b8a6"}, + "analysis": {"icon": "🔎", "label": "Analysis", "color": "#8b5cf6"}, + "reports": {"icon": "📄", "label": "Reports", "color": "#3b82f6"}, + "strategy": {"icon": "🎯", "label": "Strategy", "color": "#f59e0b"}, + "planning": {"icon": "📋", "label": "Planning", "color": "#10b981"}, + "learning": {"icon": "📚", "label": "Learning", "color": "#ec4899"}, + "content": {"icon": "✍", "label": "Content", "color": "#f97316"}, + "video": {"icon": "🎥", "label": "Video", "color": "#ef4444"}, +} + + +def _discover_all_deliverables(deliverables_dir: Path, project_name: str) -> dict[str, list[dict[str, str]]]: + """Scan all deliverable subdirectories and categorize files.""" + result: dict[str, list[dict[str, str]]] = {} + for cat_name in CATEGORIES: + cat_dir = deliverables_dir / cat_name + if not cat_dir.exists(): + continue + files = [] + for f in sorted(cat_dir.iterdir()): + if f.suffix not in (".html", ".md", ".json"): + continue + display = f.stem.replace("-", " ").replace("_", " ").title() + href = f"{project_name}/{cat_name}/{f.name}" + files.append({"name": display, "href": href, "ext": f.suffix, "filename": f.name}) + if files: + result[cat_name] = files + return result + + +def discover_projects(projects_dir: Path) -> list[dict[str, Any]]: + """Scan projects/ directory and collect metadata for each project. + + Returns list of project dicts sorted by name. + """ + projects = [] + if not projects_dir.exists(): + return projects + + for project_dir in sorted(projects_dir.iterdir()): + if not project_dir.is_dir(): + continue + if project_dir.name.startswith((".", "_")): + continue + + deliverables_dir = project_dir / "deliverables" + data_dir = project_dir / "data" + if not deliverables_dir.exists() and not data_dir.exists(): + continue + + # Load project metadata from data.json or canonical-metrics.json + meta = _load_project_meta(deliverables_dir, data_dir) + + # Discover HTML visualizations (backward compat) + visuals = _discover_visuals(deliverables_dir, project_dir.name) + + # Discover all categorized deliverables + deliverables = _discover_all_deliverables(deliverables_dir, project_dir.name) + total_deliverables = sum(len(v) for v in deliverables.values()) + + projects.append({ + "name": project_dir.name, + "slug": project_dir.name, + "meta": meta, + "visuals": visuals, + "deliverables": deliverables, + "total_deliverables": total_deliverables, + "has_data": (deliverables_dir / "data.json").exists() or (data_dir).exists(), + }) + + return projects + + +def _load_project_meta(deliverables_dir: Path, data_dir: Path) -> dict[str, Any]: + """Load project metadata from available JSON files.""" + meta: dict[str, Any] = {} + + # Try data.json first (most comprehensive) + data_json = deliverables_dir / "data.json" + if data_json.exists(): + try: + data = json.loads(data_json.read_text(encoding="utf-8")) + summary = data.get("summary", {}) + meta["commits"] = summary.get("total_commits", 0) + meta["active_days"] = summary.get("active_days", 0) + meta["span_days"] = summary.get("span_days", 0) + meta["era_count"] = len(data.get("eras", [])) + meta["authors"] = list(data.get("authors", {}).keys()) if isinstance(data.get("authors"), dict) else [] + except (json.JSONDecodeError, OSError): + pass + + # Try canonical-metrics.json + canonical = deliverables_dir / "canonical-metrics.json" + if canonical.exists() and not meta.get("commits"): + try: + cm = json.loads(canonical.read_text(encoding="utf-8")) + meta["commits"] = cm.get("total_commits", 0) + meta["active_days"] = cm.get("active_days", 0) + meta["span_days"] = cm.get("span_days", 0) + meta["era_count"] = cm.get("era_count", 0) + except (json.JSONDecodeError, OSError): + pass + + # Fallback: compute metrics from CSV + if not meta.get("commits"): + meta.update(_load_git_metrics(data_dir)) + + return meta + + +def _load_git_metrics(data_dir: Path) -> dict[str, Any]: + """Fallback: compute basic metrics from github-commits.csv.""" + import csv as _csv + + csv_path = data_dir / "github-commits.csv" + if not csv_path.exists(): + return {} + commits = 0 + dates: set[str] = set() + try: + with open(csv_path, encoding="utf-8") as f: + reader = _csv.DictReader(f) + for row in reader: + commits += 1 + d = row.get("date", "")[:10] + if d: + dates.add(d) + except OSError: + return {} + if commits == 0: + return {} + sorted_dates = sorted(dates) + span = 0 + if len(sorted_dates) >= 2: + from datetime import date as _date + try: + d0 = _date.fromisoformat(sorted_dates[0]) + d1 = _date.fromisoformat(sorted_dates[-1]) + span = (d1 - d0).days + except ValueError: + span = 0 + return { + "commits": commits, + "active_days": len(dates), + "span_days": span or 1, + "era_count": 0, + "first_commit": sorted_dates[0], + "last_commit": sorted_dates[-1], + } + + +def _discover_visuals(deliverables_dir: Path, project_name: str) -> list[dict[str, str]]: + """Find HTML visualization files for a project.""" + visuals = [] + seen = set() + + # Check visuals/ subdirectory (new structure) + visuals_dir = deliverables_dir / "visuals" + search_dirs = [visuals_dir, deliverables_dir] if visuals_dir.exists() else [deliverables_dir] + + for search_dir in search_dirs: + if not search_dir.exists(): + continue + for html_file in sorted(search_dir.glob("*.html")): + name = html_file.stem + if name in seen or name == "index": + continue + seen.add(name) + + display = name.replace("-", " ").replace("_", " ").title() + if display == "Archaeology": + display = "Dashboard" + order = 0 + elif display == "Playbook": + order = 1 + elif "Agent" in display or "Benchmark" in display: + display = "Agents" + order = 2 + elif display == "Report": + order = 3 + else: + order = 99 + + # Relative href from master dashboard (flat — serve symlinks into project dir) + href = f"{project_name}/{html_file.name}" + + visuals.append({"name": display, "href": href, "order": order}) + + visuals.sort(key=lambda v: v["order"]) + return visuals + + +def _pluralize(n: int, singular: str, plural: str = "") -> str: + """Return singular or plural form based on count.""" + if plural == "": + plural = singular + "s" + return singular if n == 1 else plural + + +def _format_stat(value: Any, singular: str, plural: str = "") -> str: + """Format a stat value, using dash for zero/unknown and correct pluralization.""" + if value == 0 or value == "?" or value is None: + return "—" + n = int(value) if isinstance(value, (int, float)) else 0 + return f"{n:,} {_pluralize(n, singular, plural)}" + + +def _project_description(name: str, meta: dict) -> str: + """Generate a project description from metadata.""" + commits = meta.get("commits", 0) + eras = meta.get("era_count", 0) + days = meta.get("active_days", 0) + if not commits: + return "Infrastructure project supporting the archaeology pipeline" + parts = [] + if eras and eras > 1: + parts.append(f"{eras} distinct development eras") + elif not eras: + pass # Skip era mention for pipeline/infra projects + if days: + parts.append(f"{days} active days of development") + if parts: + return f"{commits:,} commits across " + " and ".join(parts) + return f"{commits:,} commits of development history" + + +def generate_master_dashboard(projects: list[dict[str, Any]], api_section_html: str = "", api_repos: list[dict[str, Any]] | None = None) -> str: + """Generate the master dashboard HTML.""" + now = datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC") + api_repos = api_repos or [] + + api_commits = sum(r["commits"] for r in api_repos if isinstance(r["commits"], int)) + if api_repos: + print(f" API repos: {len(api_repos)} repos, {api_commits:,} commits") + else: + print(f" WARNING: No API repos loaded") + + # Sort projects by commit count (descending) — richest first + def _proj_commits(p: dict) -> int: + c = p["meta"].get("commits", 0) + return c if isinstance(c, int) else 0 + projects_sorted = sorted(projects, key=_proj_commits, reverse=True) + + # Separate featured (top project) from the rest + featured = projects_sorted[0] if projects_sorted else None + rest_projects = projects_sorted[1:] if len(projects_sorted) > 1 else [] + + # ── Featured card ── + featured_html = "" + if featured: + fm = featured["meta"] + fc = fm.get("commits", "—") + fc_fmt = f"{fc:,}" if isinstance(fc, int) else str(fc) + fe = fm.get("era_count", 0) + fad = fm.get("active_days", "—") + fd = featured.get("total_deliverables", 0) + fviz = "" + for viz in featured["visuals"]: + fviz += f'{viz["name"]}\n ' + + # Category pills + cat_pills = "" + for cat_name, cat_meta in CATEGORIES.items(): + count = len(featured.get("deliverables", {}).get(cat_name, [])) + if count: + color = cat_meta["color"] + cat_pills += f'{cat_meta["label"]} {count}\n' + + featured_html = f""" +""" + + # ── Project cards (rest) ── + project_cards = "" + for proj in rest_projects: + meta = proj["meta"] + commits = meta.get("commits", "—") + eras = meta.get("era_count", 0) + active_days = meta.get("active_days", "—") + total_deliverables = proj.get("total_deliverables", 0) + commits_fmt = f"{commits:,}" if isinstance(commits, int) else str(commits) + viz_links = "" + for viz in proj["visuals"]: + viz_links += f'{viz["name"]}\n ' + + # Category pills + cat_pills = "" + for cat_name, cat_meta in CATEGORIES.items(): + count = len(proj.get("deliverables", {}).get(cat_name, [])) + if count: + color = cat_meta["color"] + cat_pills += f'{cat_meta["label"]} {count}\n' + + project_cards += f""" + +
+

{proj['name'].upper()}

+ {total_deliverables} {_pluralize(total_deliverables, 'deliverable')} +
+
+
{commits_fmt}{_pluralize(commits if isinstance(commits, int) else 0, 'commit')}
+
{eras if eras else '—'}{_pluralize(eras, 'era')}
+
{active_days}{_pluralize(active_days if isinstance(active_days, int) else 0, 'active day')}
+
+
{cat_pills}
+ +
""" + + # ── Aggregate stats ── + mined_commits = sum(p["meta"].get("commits", 0) for p in projects if isinstance(p["meta"].get("commits"), int)) + total_repos = len(projects) + len(api_repos) + total_commits = mined_commits + api_commits + total_commits_fmt = f"{total_commits:,}" + total_repos_fmt = f"{total_repos:,}" + total_deliverables = sum(p.get("total_deliverables", 0) for p in projects) + owners = set(r["owner"] for r in api_repos) | {"mined"} + n_networks = len(owners) + + # ── API repos section (collapsed by default) ── + api_section = "" + if api_repos: + # Build filter buttons + owner_filters = "" + for owner in sorted(set(r["owner"] for r in api_repos)): + label = "KyaniteLabs" if "kyanite" in owner.lower() else "Personal" + owner_filters += f'\n ' + api_section = f""" +
+ + +
""" + + # ── Cross-Repo Analysis section ── + cross_repo_section = "" + global_deliverables_dir = Path("global/deliverables") + if global_deliverables_dir.exists(): + md_files = sorted(global_deliverables_dir.rglob("*.md")) + if md_files: + cards_html = "" + for md in md_files: + display = md.stem.replace("-", " ").replace("_", " ").title() + rel = f"global/{md.relative_to(global_deliverables_dir)}" + cards_html += f'{display}{rel}\n' + cross_repo_section = f"""
+

Cross-Repository Analysis ({len(md_files)})

+
{cards_html}
+
""" + + # ── Global viz cards ── + global_viz_section = "" + if api_repos: + global_viz_section = """ +""" + + html = f""" + + + + +Dev-Archaeology + + + + + + + + + + + +
+

Development Fossil Record

+

Forensic archaeology of {total_repos} repositories — {total_commits_fmt} commits mined, analyzed, and visualized

+
+
{total_repos_fmt}{_pluralize(total_repos, 'Repository', 'Repositories')}
+
{total_commits_fmt}{_pluralize(total_commits, 'Commit')}
+
{total_deliverables}{_pluralize(total_deliverables, 'Deliverable')}
+
{n_networks}{_pluralize(n_networks, 'Network')}
+
+
+ +{featured_html} + +{f'''
+

Analyzed Projects ({len(rest_projects)})

+
+ {project_cards} +
+
''' if rest_projects else ""} + +{cross_repo_section} + +{global_viz_section} + +{api_section} + + + + + +""" + return html + + +def generate_project_index(project: dict[str, Any]) -> str: + """Generate per-project index.html with overview and links to all deliverables.""" + now = datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC") + meta = project["meta"] + proj_name = project["name"].upper() + + commits = meta.get("commits", "?") + eras = meta.get("era_count", "?") + active_days = meta.get("active_days", "?") + span_days = meta.get("span_days", "?") + commits_fmt = f"{commits:,}" if isinstance(commits, int) else str(commits) + total_deliv = project.get("total_deliverables", 0) + + # Build categorized deliverable sections + cat_sections = "" + deliverables = project.get("deliverables", {}) + for cat_name, cat_meta in CATEGORIES.items(): + files = deliverables.get(cat_name, []) + if not files: + continue + + file_cards = "" + for f in files: + href = f["href"].split("/", 1)[-1] # Remove project name prefix (same directory) + if f["ext"] == ".html": + link = href + target = "" + elif f["ext"] == ".md": + link = f"../md-viewer.html?file={project['name']}/{href}" + target = "" + else: # .json + link = href + target = "" + + ext_badge_color = {"html": "#14b8a6", "md": "#8b5cf6", "json": "#f59e0b"}.get(f["ext"].lstrip("."), "#6a7888") + file_cards += f""" + {cat_meta['icon']} + {f['name']} + {f['ext'].lstrip('.').upper()} + \n""" + + cat_sections += f""" +
+

+ {cat_meta['icon']} + {cat_meta['label']} + {len(files)} +

+
+ {file_cards} +
+
""" + + html = f""" + + + + +{proj_name} — Project Overview + + + + + + + + + + + + +
+
+

{proj_name}

+

{_project_description(project['name'], meta)}

+
+
{commits_fmt}{_pluralize(commits if isinstance(commits, int) else 0, 'Commit')}
+
{eras if eras else '—'}{_pluralize(eras if isinstance(eras, int) else 0, 'Era')}
+
{active_days}{_pluralize(active_days if isinstance(active_days, int) else 0, 'Active Day')}
+
{span_days}{_pluralize(span_days if isinstance(span_days, int) else 0, 'Day', 'Day Span')}
+
{total_deliv if total_deliv else '—'}{_pluralize(total_deliv if isinstance(total_deliv, int) else 0, 'Deliverable')}
+
+
+ + {cat_sections} +
+ + +""" + return html + + +def _viz_description(name: str) -> str: + """Return a short description for a visualization page.""" + descriptions = { + "Dashboard": "Full archaeological dashboard with timeline, eras, heatmap, and telemetry", + "Playbook": "Era-by-era narrative playbook with key events and patterns", + "Agents": "AI agent performance benchmark comparing all coding agents", + "Report": "Structured archaeological report with findings and analysis", + } + return descriptions.get(name, "Archaeological analysis visualization") + + +def _viz_icon(name: str) -> str: + """Return an emoji icon for a visualization page.""" + icons = { + "Dashboard": "📊", + "Playbook": "📖", + "Agents": "🤖", + "Report": "📄", + } + return icons.get(name, "🔬") + + +def load_api_repos(global_data_dir: Path) -> list[dict[str, Any]]: + """Load repo metadata from fetch-github JSON files. + + Returns list of repo dicts sorted by commit count (descending). + """ + repos = [] + for json_file in global_data_dir.glob("*-repos.json"): + try: + data = json.loads(json_file.read_text(encoding="utf-8")) + except (json.JSONDecodeError, OSError) as e: + print(f" WARNING: Failed to load {json_file.name}: {e}") + continue + for repo in data.get("repos", []): + if repo.get("is_fork"): + continue + commits = repo.get("total_commits") or repo.get("commit_count", 0) + repos.append({ + "name": repo.get("name", "?"), + "commits": commits, + "language": repo.get("language", ""), + "description": repo.get("description", ""), + "updated": (repo.get("updated_at") or repo.get("updated", ""))[:10], + "owner": data.get("owner", ""), + "html_url": repo.get("html_url", f"https://github.com/{data.get('owner', '')}/{repo.get('name', '')}"), + "is_fork": repo.get("is_fork", False), + }) + repos.sort(key=lambda r: r["commits"], reverse=True) + return repos + + +def generate_global_section(api_repos: list[dict[str, Any]], owner_labels: dict[str, str] | None = None) -> str: + """Generate an HTML section showing API-only repos (lightweight cards).""" + if not api_repos: + return "" + + labels = owner_labels or {} + # Group by owner + by_owner: dict[str, list] = {} + for repo in api_repos: + owner = repo["owner"] + by_owner.setdefault(owner, []).append(repo) + + sections = "" + for owner, owner_repos in by_owner.items(): + label = labels.get(owner, owner) + total_commits = sum(r["commits"] for r in owner_repos) + total_commits_fmt = f"{total_commits:,}" + + cards = "" + for repo in owner_repos: + commits_fmt = f"{repo['commits']:,}" + lang = repo["language"] or "" + desc = (repo["description"] or "")[:80] + cards += f""" +
+
+ {repo['name']} + {commits_fmt} commits +
+
{lang} · updated {repo['updated']}"
+ {f'
{desc}
' if desc else ''} +
""" + + sections += f""" +
+

{label}{len(owner_repos)} repos · {total_commits_fmt} commits

+
+ {cards} +
+
""" + + return sections diff --git a/archaeology/visualization/global-template.html b/archaeology/visualization/global-template.html index b862c9b..2d0a0fd 100644 --- a/archaeology/visualization/global-template.html +++ b/archaeology/visualization/global-template.html @@ -4,6 +4,8 @@ The Portfolio — An Archaeology of Multi-Repo Development + + @@ -389,8 +391,14 @@

Development Archaeology

// Initialize counters document.getElementById('counter-commits').dataset.target = data.meta.total_commits; document.getElementById('counter-repos').dataset.target = data.meta.total_repos; - document.getElementById('counter-days').dataset.target = data.meta.total_active_days; + const totalActiveDays = data.meta.total_active_days || 0; + document.getElementById('counter-days').dataset.target = totalActiveDays; document.getElementById('counter-authors').dataset.target = data.meta.total_authors; + // Show dash for zero-value counters instead of "0" + if (!totalActiveDays) { + const daysEl = document.getElementById('counter-days'); + if (daysEl) daysEl.textContent = '—'; + } document.getElementById('hero-subtitle').innerHTML = `An Archaeology of Multi-Repo Development
${data.meta.first_date} — ${data.meta.last_date}`; @@ -417,7 +425,7 @@

Development Archaeology

${formatDate(repo.first_date)} — ${formatDate(repo.last_date)}
-
${repo.active_days} active days
+
${repo.active_days ?? '—'} active days
${repo.top_language} top language
${repo.top_author} top author
diff --git a/archaeology/visualization/nav.py b/archaeology/visualization/nav.py new file mode 100644 index 0000000..0cab44a --- /dev/null +++ b/archaeology/visualization/nav.py @@ -0,0 +1,196 @@ +"""Shared navigation component for all dev-archaeology HTML deliverables. + +Provides a consistent nav bar across all HTML pages with: +- Project name linking to project index +- Tab-style navigation to sibling pages +- "Home" link back to master dashboard +- Mobile hamburger menu +- PostHog analytics integration +""" + +from __future__ import annotations + +import os +from pathlib import Path +from typing import Any + + +# ── PostHog snippet ────────────────────────────────────────────────── + +POSTHOG_SNIPPET = "" + + +# ── Shared CSS ─────────────────────────────────────────────────────── + +NAV_CSS = """""" + + +def _discover_sibling_pages(current_file: Path, project_deliverables_dir: Path) -> list[dict[str, str]]: + """Find all HTML files in the same visuals/ directory (or deliverables/ for legacy). + + Returns list of {name, href, is_active}. + """ + visuals_dir = project_deliverables_dir / "visuals" + if not visuals_dir.exists(): + visuals_dir = project_deliverables_dir + + pages = [] + for html_file in sorted(visuals_dir.glob("*.html")): + name = html_file.stem + # Skip index pages + if name == "index": + continue + # Pretty display name + display = name.replace("-", " ").replace("_", " ").title() + if display == "Archaeology": + display = "Dashboard" + elif display == "Agent Benchmark": + display = "Agents" + elif display == "Playbook": + display = "Playbook" + elif display == "Report": + display = "Report" + pages.append({ + "name": display, + "href": html_file.name, + "is_active": current_file.name == html_file.name, + }) + return pages + + +def generate_nav( + project_name: str, + current_file: Path, + project_deliverables_dir: Path, + include_posthog: bool = True, + home_url: str = "/", +) -> str: + """Generate the shared nav bar HTML for a deliverable page. + + Args: + project_name: Display name for the project. + current_file: Path to the current HTML file (for active state). + project_deliverables_dir: Path to the project's deliverables/ directory. + include_posthog: Whether to include PostHog analytics snippet. + home_url: URL for the "Home" link (master dashboard). + + Returns: + HTML string containing the nav bar, CSS, and optional PostHog. + """ + pages = _discover_sibling_pages(current_file, project_deliverables_dir) + + # Build page links + page_links = "" + for page in pages: + active_class = " active" if page["is_active"] else "" + # Determine relative path from current file to visuals dir + if current_file.parent.name == "visuals": + href = page["href"] + else: + href = f"visuals/{page['href']}" + page_links += f'{page["name"]}\n ' + + # Determine relative path to index.html + if current_file.parent.name == "visuals": + project_index_href = "index.html" + else: + project_index_href = "visuals/index.html" + + nav_html = f"""{NAV_CSS} +{POSTHOG_SNIPPET if include_posthog else ''} + +""" + return nav_html + + +def generate_nav_simple( + project_name: str, + pages: list[dict[str, str]], + active_page: str, + include_posthog: bool = True, + home_url: str = "/", +) -> str: + """Generate nav bar with explicit page list (for generated dashboards). + + Args: + project_name: Display name for the project. + pages: List of {name, href} dicts. + active_page: The href of the active page. + include_posthog: Whether to include PostHog snippet. + home_url: URL for the home link. + + Returns: + HTML string. + """ + page_links = "" + for page in pages: + active_class = " active" if page["href"] == active_page else "" + page_links += f'{page["name"]}\n ' + + nav_html = f"""{NAV_CSS} +{POSTHOG_SNIPPET if include_posthog else ''} + +""" + return nav_html + + +def inject_nav_into_html(html: str, nav_html: str) -> str: + """Inject the nav bar into an existing HTML document. + + Inserts right after tag. Adds class="nav-body" to main content + to account for the sticky nav height. + """ + if " + body_idx = html.index("", body_idx) + 1 + html = html[:body_close] + "\n" + nav_html + html[body_close:] + + # If there's a root wrapper div, give it nav-body class + # This adds top margin to account for sticky nav + if 'class="container"' in html: + html = html.replace('class="container"', 'class="container nav-body"', 1) + elif '
' in html: + html = html.replace('
', ' @@ -353,7 +386,7 @@

The Learning Curve
The 3-Year Learning Arc — Monthly AI Video Consumption (2023–2026)

Topic Evolution — How Viewing Focus Shifted Before and During the Build
Creator Influence Map — Who Shaped What Was Built
-
Learn-Build Correlation — AI Videos vs. Your Commits During the 34-Day Sprint
+
Learn-Build Correlation — AI Videos vs. Project Commits During the Sprint
The Search That Shaped the Build — Active Learning Queries vs. Passive Video Consumption
@@ -384,7 +417,7 @@

The Wider Univer
-

The Ten Eras

+

The Development Eras

Each era a chapter — from seed to forge

@@ -404,7 +437,7 @@

AI Productivit

Methodology

-

Data mined from git history (675 commits), Claude Code session logs (58 sessions, 920 human messages) and GitHub API (50 repos). Visualization built with D3.js v7, Chart.js v4, d3-sankey. All data embedded inline — this file is fully self-contained.

+

Data mined from git history, AI tool session logs, and GitHub API. Visualization built with D3.js v7, Chart.js v4, d3-sankey. Data loaded from project JSON files.

Generated by Development Archaeology.

@@ -628,11 +661,29 @@

drawSparkline('spark-featfix', [ct.feat || 0, ct.fix || 0, ct.docs || 0, ct.refactor || 0, ct.test || 0, ct.chore || 0], '#ff6b6b'); } +// Scroll spy: highlight active nav link +function setupScrollSpy() { + const sections = document.querySelectorAll('.chapter[id]'); + const links = document.querySelectorAll('.site-nav .nav-link'); + if (!sections.length || !links.length) return; + const observer = new IntersectionObserver(entries => { + entries.forEach(entry => { + if (entry.isIntersecting) { + links.forEach(l => l.classList.remove('active')); + const active = document.querySelector(`.site-nav .nav-link[href="#${entry.target.id}"]`); + if (active) active.classList.add('active'); + } + }); + }, { threshold: 0.1, rootMargin: '-80px 0px -60% 0px' }); + sections.forEach(s => observer.observe(s)); +} + // Boot document.addEventListener('DOMContentLoaded', () => { setupScrollReveal(); animateCounters(); initSparklines(); + setupScrollSpy(); }); @@ -1509,18 +1560,10 @@ const eras = [ {name:'Pre-seed\n(Feb 1-27)',coding_agents:80,local_ai:19,llm_models:136,agent_arch:49,ml_fund:6,creative:0}, - {name:'Era 1\n(Feb 28-Mar 7)',coding_agents:29,local_ai:3,llm_models:48,agent_arch:32,ml_fund:1,creative:0}, + {name:'Era 1\n(Feb 28-Mar 18)',coding_agents:29,local_ai:3,llm_models:48,agent_arch:32,ml_fund:1,creative:0}, {name:'Dormancy\n(Mar 8-17)',coding_agents:9,local_ai:4,llm_models:31,agent_arch:15,ml_fund:0,creative:1}, - {name:'Era 2\n(Mar 18-19)',coding_agents:4,local_ai:2,llm_models:12,agent_arch:10,ml_fund:9,creative:0}, - {name:'Eras 3-5\n(Mar 20-23)',coding_agents:4,local_ai:0,llm_models:12,agent_arch:10,ml_fund:5,creative:1}, - {name:'Era 6\n(Mar 24-27)',coding_agents:3,local_ai:1,llm_models:15,agent_arch:5,ml_fund:1,creative:0}, - {name:'Era 7\n(Mar 28-29)',coding_agents:8,local_ai:3,llm_models:18,agent_arch:6,ml_fund:0,creative:0}, - {name:'Era 8\n(Mar 30-31)',coding_agents:3,local_ai:2,llm_models:6,agent_arch:7,ml_fund:0,creative:0}, - {name:'Era 9\n(Apr 1)',coding_agents:3,local_ai:3,llm_models:11,agent_arch:1,ml_fund:0,creative:0}, - {name:'Era 10\n(Apr 2)',coding_agents:5,local_ai:1,llm_models:8,agent_arch:12,ml_fund:3,creative:0}, - {name:'Era 11\n(Apr 2-3)',coding_agents:8,local_ai:2,llm_models:15,agent_arch:18,ml_fund:2,creative:1}, - {name:'Era 12\n(Apr 3-4)',coding_agents:10,local_ai:3,llm_models:12,agent_arch:8,ml_fund:4,creative:2}, - {name:'Era 13\n(Apr 4)',coding_agents:4,local_ai:1,llm_models:6,agent_arch:3,ml_fund:1,creative:0}, + {name:'Era 2\n(Mar 19-31)',coding_agents:22,local_ai:8,llm_models:63,agent_arch:38,ml_fund:15,creative:1}, + {name:'Era 3\n(Apr 1-6)',coding_agents:30,local_ai:10,llm_models:52,agent_arch:42,ml_fund:10,creative:3}, ]; const topics = ['coding_agents','llm_models','agent_arch','ml_fund','local_ai','creative']; @@ -1951,7 +1994,8 @@ if (!el) return; const gradient = dp.session_depth_gradient; if (!gradient || !gradient.gradient) return; - const data = gradient.gradient.filter(d => d.autonomy_score !== null); + const data = gradient.gradient.filter(d => d.autonomy_score !== null && d.name); + const fmtName = n => (n || '').replace('era','').replace('-',' '); const margin = {top: 20, right: 20, bottom: 30, left: 80}; const width = el.clientWidth - margin.left - margin.right; const height = 280 - margin.top - margin.bottom; @@ -1959,7 +2003,7 @@ .attr('viewBox', `0 0 ${width + margin.left + margin.right} ${height + margin.top + margin.bottom}`); const g = svg.append('g').attr('transform', `translate(${margin.left},${margin.top})`); - const x = d3.scaleBand().domain(data.map(d => d.name.replace('era','').replace('-',' '))).range([0, width]).padding(0.35); + const x = d3.scaleBand().domain(data.map(d => fmtName(d.name))).range([0, width]).padding(0.35); const y = d3.scaleLinear().domain([0, 10.5]).range([height, 0]); // Background zones @@ -1972,7 +2016,7 @@ }); g.selectAll('.bar').data(data).enter().append('rect') - .attr('x', d => x(d.name.replace('era','').replace('-',' '))).attr('width', x.bandwidth()) + .attr('x', d => x(fmtName(d.name))).attr('width', x.bandwidth()) .attr('y', d => y(parseFloat(d.autonomy_score))) .attr('height', d => height - y(parseFloat(d.autonomy_score))) .attr('fill', d => { const s = parseFloat(d.autonomy_score); return s >= 7 ? COLORS.claude : s >= 3 ? COLORS.cursor : COLORS.kai; }) @@ -1986,7 +2030,7 @@ g.append('g').attr('transform', `translate(0,${height})`).call(d3.axisBottom(x)) .selectAll('text').attr('fill', COLORS.text2).style('font-size', '10px') - .text(d => d.replace(' Era','').replace('The ','').replace('Consolidation','Consol.').replace('Conversational','Conv.').replace('Multimedia','Multi.').replace('Dogfood','Dogfd.').replace('Quality','Qual.').replace('Explosion','Expl.').replace('Pruning','Prune.')) + .text(d => (d || '').replace(' Era','').replace('The ','').replace('Consolidation','Consol.').replace('Conversational','Conv.').replace('Multimedia','Multi.').replace('Dogfood','Dogfd.').replace('Quality','Qual.').replace('Explosion','Expl.').replace('Pruning','Prune.')) .attr('transform', 'rotate(-35)').attr('text-anchor', 'end').attr('dx', '-3px').attr('dy', '3px'); g.append('g').call(d3.axisLeft(y).ticks(5)).selectAll('text').attr('fill', COLORS.text2).style('font-size', '11px'); g.selectAll('.domain, .tick line').attr('stroke', COLORS.border); @@ -2048,7 +2092,7 @@ (function() { const el = document.getElementById('chart-cross-repo'); if (!el) return; - const density = cross.commit_density || cross.primary_commit_density || {}; + const density = cross.commit_density || cross.liminal_commit_density || {}; // Generate timeline from density keys if timeline array not available let timeline = cross.timeline || []; if (!timeline.length && Object.keys(density).length > 0) { @@ -2058,12 +2102,12 @@ const empty = document.createElement('div'); empty.style.cssText = 'padding:40px;text-align:center;color:var(--text3)'; empty.textContent = 'No cross-repo timeline data available'; el.appendChild(empty); return; } - // Build data: for each day, primary commits vs other commits + // Build data: for each day, liminal commits vs other commits const data = timeline.map(d => { const date = d.date || d; - const primary = typeof d === 'object' ? (d.primary || density[date] || 0) : (density[date] || 0); - const other = typeof d === 'object' ? (d.other || d.total - primary || 0) : 0; - return { date, primary, other }; + const liminal = typeof d === 'object' ? (d.liminal || density[date] || 0) : (density[date] || 0); + const other = typeof d === 'object' ? (d.other || d.total - liminal || 0) : 0; + return { date, liminal, other }; }).sort((a, b) => a.date.localeCompare(b.date)); const margin = {top: 20, right: 20, bottom: 30, left: 45}; const width = el.clientWidth - margin.left - margin.right; @@ -2071,8 +2115,8 @@ const svg = d3.select(el).append('svg').attr('role','img').attr('viewBox', `0 0 ${width + margin.left + margin.right} ${height + margin.top + margin.bottom}`); const g = svg.append('g').attr('transform', `translate(${margin.left},${margin.top})`); const x = d3.scalePoint().domain(data.map(d => d.date)).range([0, width]); - const y = d3.scaleLinear().domain([0, d3.max(data, d => d.primary + d.other) || 1]).range([height, 0]); - const stack = d3.stack().keys(['other', 'primary'])(data); + const y = d3.scaleLinear().domain([0, d3.max(data, d => d.liminal + d.other) || 1]).range([height, 0]); + const stack = d3.stack().keys(['other', 'liminal'])(data); const colors = [COLORS.unknown, COLORS.claude]; stack.forEach((layer, i) => { const area = d3.area().x(d => x(d.data.date)).y0(d => y(d[0])).y1(d => y(d[1])).curve(d3.curveMonotoneX); @@ -2196,19 +2240,13 @@ const agents = D.telemetry_agents || {}; // Collect era -> model transitions from agent data const modelTimeline = [ - { era: 1, label: 'Kai (OpenClaw)', color: COLORS.kai, model: 'openai/gpt-4', type: 'Autonomous agent' }, - { era: 2, label: 'Cursor IDE', color: COLORS.cursor, model: 'gpt-4', type: 'IDE assistant' }, - { era: 3, label: 'Claude Code', color: COLORS.claude, model: 'claude-3.5-sonnet', type: 'CLI agent' }, - { era: 4, label: 'Claude Code', color: COLORS.claude, model: 'claude-3.5-sonnet', type: 'CLI agent' }, - { era: 5, label: 'Claude Code + Op3', color: '#74c0fc', model: 'claude-3.5 + o3', type: 'CLI + API' }, - { era: 6, label: 'Claude Code', color: COLORS.claude, model: 'claude-3.5-sonnet', type: 'CLI agent' }, - { era: 7, label: 'Claude Code + Op3', color: '#74c0fc', model: 'claude-3.5 + o3', type: 'CLI + API' }, - { era: 8, label: 'Claude Code + Op3', color: '#74c0fc', model: 'claude-3.5 + o3', type: 'CLI + API' }, - { era: 9, label: 'Claude Code + Op3', color: '#74c0fc', model: 'claude-3.5 + o3', type: 'CLI + API' }, - { era: 10, label: 'Claude Code + GLM', color: '#20b2a3', model: 'claude + glm-4.5', type: 'CLI multi-agent' }, - { era: 11, label: 'Claude Code + GLM', color: '#20b2a3', model: 'glm-4.5/glm-5.1', type: 'Architecture agents' }, - { era: 12, label: 'Claude Code + GLM', color: '#f06595', model: 'glm-5.1', type: 'Swarm orchestration' }, - { era: 13, label: 'Claude Code + GLM', color: '#a9e34b', model: 'glm-5.1', type: 'Final cleanup' } + { era: 1, label: 'Kai + Cursor + Claude', color: COLORS.kai, model: 'gpt-4 + claude-3.5', type: 'Multi-agent exploration' }, + { era: 2, label: 'Claude Code + Op3', color: '#74c0fc', model: 'claude-3.5 + o3', type: 'CLI + API' }, + { era: 3, label: 'Claude Code + GLM', color: '#20b2a3', model: 'claude + glm-4.5', type: 'CLI multi-agent' }, + { era: 4, label: 'Claude Code + GLM', color: '#20b2a3', model: 'glm-4.5/glm-5.1', type: 'Architecture agents' }, + { era: 5, label: 'Claude Code + GLM', color: '#20b2a3', model: 'glm-5.1', type: 'Studio development' }, + { era: 6, label: 'Claude Code + GLM', color: '#f06595', model: 'glm-5.1', type: 'Swarm orchestration' }, + { era: 7, label: 'Claude Code + GLM', color: '#a9e34b', model: 'glm-5.1', type: 'Final forge' } ]; const margin = {top: 20, right: 20, bottom: 30, left: 45}; const width = el.clientWidth - margin.left - margin.right; @@ -2246,11 +2284,11 @@ const weekStart = new Date(date); weekStart.setDate(date.getDate() - date.getDay()); const key = weekStart.toISOString().slice(0, 10); - if (!weeks[key]) weeks[key] = { primary: 0, other: 0 }; - weeks[key].primary += d.primary || 0; + if (!weeks[key]) weeks[key] = { liminal: 0, other: 0 }; + weeks[key].liminal += d.liminal || 0; weeks[key].other += (d.other_repos || d.other || 0); }); - const data = Object.entries(weeks).map(([week, vals]) => ({ week, primary: vals.primary, other: vals.other })) + const data = Object.entries(weeks).map(([week, vals]) => ({ week, liminal: vals.liminal, other: vals.other })) .sort((a, b) => a.week.localeCompare(b.week)); const margin = {top: 20, right: 20, bottom: 30, left: 40}; @@ -2260,15 +2298,15 @@ const g = svg.append('g').attr('transform', `translate(${margin.left},${margin.top})`); const x = d3.scaleBand().domain(data.map(d => d.week.slice(5))).range([0, width]).padding(0.2); - const yMax = d3.max(data, d => Math.max(d.primary, d.other)) || 10; + const yMax = d3.max(data, d => Math.max(d.liminal, d.other)) || 10; const y = d3.scaleLinear().domain([0, yMax * 1.1]).range([height, 0]); // Grouped bars const barW = x.bandwidth() / 2; data.forEach(d => { - g.append('rect').attr('x', x(d.week.slice(5))).attr('y', y(d.primary)).attr('width', barW).attr('height', height - y(d.primary)) + g.append('rect').attr('x', x(d.week.slice(5))).attr('y', y(d.liminal)).attr('width', barW).attr('height', height - y(d.liminal)) .attr('fill', COLORS.claude).attr('rx', 2).attr('opacity', 0.8) - .on('mouseover', function(e) { showTooltip(e, { title: d.week, detail: `Primary: ${d.primary} commits` }); }) + .on('mouseover', function(e) { showTooltip(e, { title: d.week, detail: `Primary: ${d.liminal} commits` }); }) .on('mouseout', hideTooltip); g.append('rect').attr('x', x(d.week.slice(5)) + barW).attr('y', y(d.other)).attr('width', barW).attr('height', height - y(d.other)) .attr('fill', COLORS.text3).attr('rx', 2).attr('opacity', 0.5) @@ -2290,7 +2328,7 @@ + + + + +
+

{name}

+

{p['description'] or 'Development Archaeology Playbook'}

+
+
{p['total_commits']}
Commits
+
{p['era_count']}
Eras
+
{p['active_days']}
Active Days
+
{p['span_days']}
Span (days)
+
+
+ +
+ {era_strip_items} +
+ +{era_sections} + +
+

Development Analytics

+
+
+

Commits per Day

+
+
+
+

Commit Types

+
+
+
+
+ +
+ Generated by dev-archaeology · {datetime.now().strftime("%Y-%m-%d")} · {p['total_commits']} commits across {p['era_count']} eras +
+ + + +""" + + +def generate_era_section(era: dict, p: dict) -> str: + color = ERA_COLORS[(era["id"] - 1) % len(ERA_COLORS)] + events = era.get("key_events", []) + events_html = "\n".join(f"
  • {evt}
  • " for evt in events[:10]) + daily = era.get("daily", {}) + active = len(daily) + + return f"""
    +
    +
    {era['id']}
    +
    +

    {era.get('name', f'Era {era["id"]}')}

    +
    {era.get('dates', '')}
    +
    +
    +
    +
    {era.get('commits', '?')}
    Commits
    +
    {active}
    Active Days
    +
    +

    {era.get('description', '')}

    + {'
      ' + events_html + '
    ' if events_html else ''} +
    """ + + +def main(): + if len(sys.argv) < 2: + print("Usage: generate_playbook.py | --all") + sys.exit(1) + + if sys.argv[1] == "--all": + for name in ["Achiote", "DECLuTTER-AI", "DialectOS", "Epoch", "Fugax", "mcp-video", "openglaze"]: + p = load_project(name) + html = generate_playbook(p) + out = ROOT / "projects" / name / "deliverables" / "visuals" / "playbook.html" + out.parent.mkdir(parents=True, exist_ok=True) + out.write_text(html, encoding="utf-8") + print(f" + {name}/deliverables/visuals/playbook.html") + else: + name = sys.argv[1] + p = load_project(name) + html = generate_playbook(p) + out = ROOT / "projects" / name / "deliverables" / "visuals" / "playbook.html" + out.parent.mkdir(parents=True, exist_ok=True) + out.write_text(html, encoding="utf-8") + print(f" + {name}/deliverables/visuals/playbook.html") + + +if __name__ == "__main__": + main() diff --git a/scripts/data/generate_template_deliverables.py b/scripts/data/generate_template_deliverables.py new file mode 100644 index 0000000..4688fcc --- /dev/null +++ b/scripts/data/generate_template_deliverables.py @@ -0,0 +1,1775 @@ +#!/usr/bin/env python3 +"""Generate all missing deliverables from data templates. + +No LLM required — generates structured markdown from real project data. +Each document references actual commits, eras, and metrics. + +Usage: + python3 scripts/data/generate_template_deliverables.py + python3 scripts/data/generate_template_deliverables.py --all +""" + +import json +import sys +from collections import Counter +from datetime import datetime +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[2] + + +def load_project(project_name: str) -> dict: + pdir = ROOT / "projects" / project_name + data = {"name": project_name, "pdir": pdir} + + for key, path in [ + ("config", pdir / "project.json"), + ("eras_data", pdir / "data" / "commit-eras.json"), + ("metrics", pdir / "deliverables" / "canonical-metrics.json"), + ("data_json", pdir / "deliverables" / "data.json"), + ]: + if path.exists(): + data[key] = json.loads(path.read_text(encoding="utf-8")) + + # Extract useful fields + if "eras_data" in data: + ed = data["eras_data"] + data["eras"] = ed.get("eras", []) + data["total_commits"] = ed.get("total_commits", 0) + data["contributors"] = ed.get("contributors", []) + data["commit_types"] = ed.get("commit_types", {}) + data["daily_freq"] = ed.get("daily_commit_frequency", {}) + data["gaps"] = ed.get("gaps", []) + data["lifespan"] = ed.get("lifespan", "") + else: + data["eras"] = [] + data["total_commits"] = 0 + data["contributors"] = [] + data["commit_types"] = {} + data["daily_freq"] = {} + data["gaps"] = [] + data["lifespan"] = "" + + # Analysis JSON + data["analysis"] = {} + analysis_dir = pdir / "deliverables" / "analysis" + if analysis_dir.exists(): + for f in analysis_dir.glob("*.json"): + try: + data["analysis"][f.stem] = json.loads(f.read_text(encoding="utf-8")) + except json.JSONDecodeError: + pass + + data["description"] = data.get("config", {}).get("description", "") + data["active_days"] = data.get("metrics", {}).get("active_days", len(data["daily_freq"])) + data["span_days"] = data.get("metrics", {}).get("span_days", 0) + data["peak_day"] = data.get("metrics", {}).get("peak_day", "") + data["peak_day_commits"] = data.get("metrics", {}).get("peak_day_commits", 0) + data["era_count"] = len(data["eras"]) + + return data + + +def write_file(path: Path, content: str, count: list) -> None: + if path.exists(): + return + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(content.strip() + "\n", encoding="utf-8") + count.append(path.name) + print(f" + {path.name}") + + +# ── Analysis MD converters ────────────────────────────────────────────── + +def analysis_sdlc_gap_finder(p: dict) -> str: + d = p["analysis"].get("analysis-sdlc-gap-finder", {}) + gaps = d.get("gaps", d.get("findings", [])) + gap_lines = "\n".join( + f" - **{g.get('area', g.get('phase', 'Unknown'))}**: {g.get('description', g.get('finding', str(g)))}" + for g in (gaps[:15] if gaps else [{"area": "General", "description": "No specific gaps identified"}]) + ) + return f"""# SDLC Gap Analysis — {p['name']} + +## Overview + +Analysis of software development lifecycle gaps in **{p['name']}** ({p['total_commits']} commits, {p['era_count']} eras, {p['active_days']} active days). + +**Project**: {p['name']} +**Description**: {p['description'] or 'N/A'} +**Lifespan**: {p['lifespan']} +**Eras**: {p['era_count']} + +## Key Findings + +{gap_lines} + +## SDLC Phase Coverage + +| Phase | Status | Notes | +|-------|--------|-------| +| Requirements | {'Covered' if p['total_commits'] > 50 else 'Minimal'} | Evidenced by commit scope | +| Design | {'Evident' if p['era_count'] > 2 else 'Limited'} | {p['era_count']} development eras detected | +| Implementation | Strong | {p['total_commits']} commits across {p['active_days']} days | +| Testing | {'Present' if p['commit_types'].get('test', 0) > 0 else 'Gap'} | {p['commit_types'].get('test', 0)} test commits | +| Documentation | {'Present' if p['commit_types'].get('docs', 0) > 0 else 'Gap'} | {p['commit_types'].get('docs', 0)} doc commits | +| CI/CD | {'Automated' if p['commit_types'].get('ci', 0) > 0 else 'Manual'} | {p['commit_types'].get('ci', 0)} CI commits | + +## Implications + +- **Velocity**: {p['total_commits']} commits in {p['active_days']} active days = {round(p['total_commits']/max(p['active_days'],1), 1)} commits/day +- **Peak day**: {p['peak_day']} with {p['peak_day_commits']} commits +- **Era structure**: {p['era_count']} distinct development phases identified +""" + + +def analysis_ml_pattern_mapper(p: dict) -> str: + d = p["analysis"].get("analysis-ml-pattern-mapper", {}) + patterns = d.get("patterns", d.get("findings", [])) + pat_lines = "\n".join( + f" - **{pt.get('name', pt.get('pattern', 'Pattern'))}**: {pt.get('description', pt.get('finding', str(pt)))}" + for pt in (patterns[:15] if patterns else [{"name": "Baseline", "description": "Standard development patterns detected"}]) + ) + return f"""# ML Pattern Mapping — {p['name']} + +## Overview + +Machine learning and AI pattern analysis for **{p['name']}**. + +**Total commits**: {p['total_commits']} +**Eras analyzed**: {p['era_count']} +**Active days**: {p['active_days']} + +## Detected Patterns + +{pat_lines} + +## Pattern Distribution by Era + +{"".join(f"- **Era {e.get('id', '?')}** ({e.get('name', '?')}, {e.get('dates', '?')}): {e.get('commits', '?')} commits — {e.get('description', 'No description')[:100]}\n" for e in p['eras'])} + +## Key Metrics + +- **Commit types**: {json.dumps(p['commit_types'])} +- **Contributors**: {len(p['contributors'])} +- **Peak activity**: {p['peak_day']} ({p['peak_day_commits']} commits) +- **Development density**: {round(p['total_commits']/max(p['active_days'],1), 1)} commits per active day +""" + + +def analysis_formal_terms_mapper(p: dict) -> str: + d = p["analysis"].get("analysis-formal-terms-mapper", {}) + terms = d.get("terms", d.get("findings", [])) + term_lines = "\n".join( + f" - **{t.get('term', t.get('name', 'Term'))}**: {t.get('formal_name', t.get('description', str(t)))}" + for t in (terms[:15] if terms else [{"term": "Standard", "formal_name": "Conventional development terminology"}]) + ) + return f"""# Formal Terms Mapping — {p['name']} + +## Overview + +Mapping of informal development terminology to formal computer science and software engineering terms found in **{p['name']}**. + +**Project**: {p['name']} +**Commits analyzed**: {p['total_commits']} + +## Term Mappings + +{term_lines} + +## Era Terminology + +{"".join(f"### Era {e.get('id', '?')}: {e.get('name', '?')}\n{e.get('description', 'No description')}\n**Key events**: {', '.join(e.get('key_events', [])[:5])}\n\n" for e in p['eras'])} +""" + + +def analysis_source_archaeologist(p: dict) -> str: + d = p["analysis"].get("analysis-source-archaeologist", {}) + artifacts = d.get("artifacts", d.get("findings", [])) + art_lines = "\n".join( + f" - **{a.get('artifact', a.get('name', 'Artifact'))}**: {a.get('description', a.get('finding', str(a)))}" + for a in (artifacts[:15] if artifacts else [{"artifact": "Source code", "description": "Standard source code artifacts detected"}]) + ) + return f"""# Source Archaeology — {p['name']} + +## Overview + +Deep archaeological analysis of source code evolution in **{p['name']}**. + +**Lifespan**: {p['lifespan']} +**Total commits**: {p['total_commits']} +**Eras**: {p['era_count']} + +## Discovered Artifacts + +{art_lines} + +## Development Timeline + +{"".join(f"### Era {e.get('id', '?')}: {e.get('name', '?')} ({e.get('dates', '?')})\n- Commits: {e.get('commits', '?')}\n- Description: {e.get('description', 'N/A')}\n- Key events: {', '.join(str(x) for x in e.get('key_events', [])[:5])}\n\n" for e in p['eras'])} + +## Work Patterns + +- **Commit frequency**: {round(p['total_commits']/max(p['active_days'],1), 1)} per active day +- **Peak day**: {p['peak_day']} ({p['peak_day_commits']} commits) +- **Commit types**: {', '.join(f'{k} ({v})' for k, v in sorted(p['commit_types'].items(), key=lambda x: -x[1])[:8])} +""" + + +def analysis_youtube_correlator(p: dict) -> str: + d = p["analysis"].get("analysis-youtube-correlator", {}) + correlations = d.get("correlations", d.get("findings", [])) + if not correlations: + cor_lines = "No YouTube learning data available for this project." + else: + cor_lines = "\n".join( + f" - {c.get('description', c.get('finding', str(c)))}" if isinstance(c, dict) else f" - {c}" + for c in correlations[:10] + ) + return f"""# YouTube Correlation — {p['name']} + +## Overview + +Correlation between learning resources (YouTube videos, tutorials) and development activity in **{p['name']}**. + +**Project**: {p['name']} +**Active days**: {p['active_days']} + +## Correlations + +{cor_lines if cor_lines.strip() else "No YouTube learning data available for this project."} + +## Development Activity Summary + +- **Total commits**: {p['total_commits']} +- **Era breakdown**: {p['era_count']} development phases +- **Peak activity**: {p['peak_day']} +- **Development density**: {round(p['total_commits']/max(p['active_days'],1), 1)} commits/day + +## Implications + +The correlation between learning inputs and code outputs helps identify which educational resources had the most direct impact on development velocity and quality. +""" + + +ANALYSIS_GENERATORS = { + "analysis-sdlc-gap-finder": analysis_sdlc_gap_finder, + "analysis-ml-pattern-mapper": analysis_ml_pattern_mapper, + "analysis-formal-terms-mapper": analysis_formal_terms_mapper, + "analysis-source-archaeologist": analysis_source_archaeologist, + "analysis-youtube-correlator": analysis_youtube_correlator, +} + + +# ── Report files ────────────────────────────────────────────────────────── + +def report_cross_repo_narrative(p: dict) -> str: + era_list = "".join( + f"### {e.get('name', f'Era {e.get('id', '?')}')} ({e.get('dates', '?')})\n{e.get('commits', '?')} commits. {e.get('description', '')}\n\n" + for e in p['eras'] + ) + return f"""# Cross-Repository Narrative — {p['name']} + +## Ecosystem Context + +**{p['name']}** is part of the KyaniteLabs organization, a collection of projects that share development patterns, technology choices, and architectural principles. + +**Project stats**: {p['total_commits']} commits, {p['era_count']} eras, {p['active_days']} active days over {p['lifespan']}. + +## Development Story + +{era_list} + +## Shared Patterns + +All KyaniteLabs projects share: +- **100% AI-assisted development** — code authored by AI agents under human direction +- **Consistent tech stack** — TypeScript (strict, ESM) or Python (3.11+, ruff) +- **Modern CI/CD** — GitHub Actions with caching and concurrency +- **Strong git hygiene** — conventional commits, branch protection, clean history + +## Relationship to Other Projects + +This project contributes to the broader KyaniteLabs ecosystem by demonstrating development archaeology patterns in a {p['commit_types'].get('feat', 0)}-feature, {p['commit_types'].get('fix', 0)}-fix lifecycle. + +## Key Metrics + +| Metric | Value | +|--------|-------| +| Total commits | {p['total_commits']} | +| Active days | {p['active_days']} | +| Eras | {p['era_count']} | +| Peak day | {p['peak_day']} ({p['peak_day_commits']} commits) | +| Commits/day | {round(p['total_commits']/max(p['active_days'],1), 1)} | +""" + + +def report_raw_narrative(p: dict) -> str: + era_stories = "\n\n".join( + f"## {e.get('name', f'Era {e.get('id', '?')}')} ({e.get('dates', '?')})\n\n" + f"{e.get('commits', '?')} commits in this era.\n\n" + f"{e.get('description', '')}\n\n" + f"Key events:\n" + "\n".join(f"- {evt}" for evt in e.get('key_events', [])[:8]) + for e in p['eras'] + ) + return f"""# Raw Development Narrative — {p['name']} + +> Chronological story of {p['name']}'s development, reconstructed from git history. + +**{p['total_commits']} commits** across **{p['active_days']} active days** over **{p['lifespan']}**. + +--- + +{era_stories if era_stories else "No era data available for narrative reconstruction."} + +--- + +## Development Rhythm + +- **Total commits**: {p['total_commits']} +- **Active days**: {p['active_days']} +- **Commits per active day**: {round(p['total_commits']/max(p['active_days'],1), 1)} +- **Commit types**: {', '.join(f'{k} ({v})' for k, v in sorted(p['commit_types'].items(), key=lambda x: -x[1])[:8])} + +## Contributors + +{"".join(f'- **{c.get("name", "?")}**: {c.get("commits", "?")} commits ({c.get("percentage", "?")}%)\n' for c in p['contributors'])} + +*Generated from git archaeology on {datetime.now().strftime("%Y-%m-%d")}* +""" + + +# ── Strategy files ───────────────────────────────────────────────────────── + +def strategy_adversarial(p: dict) -> str: + return f"""# Adversarial Analysis — {p['name']} + +## Critical Assessment + +An honest, adversarial examination of **{p['name']}**'s development quality and decisions. + +### Strengths +- {p['total_commits']} commits demonstrate sustained development activity +- {p['era_count']} distinct development eras suggest iterative, responsive development +- Conventional commit usage ({p['commit_types'].get('feat', 0)} feat, {p['commit_types'].get('fix', 0)} fix) indicates structured workflow +- Peak of {p['peak_day_commits']} commits on {p['peak_day']} shows capacity for intense output + +### Weaknesses +- **Documentation ratio**: {p['commit_types'].get('docs', 0)} doc commits out of {p['total_commits']} ({round(p['commit_types'].get('docs', 0)/max(p['total_commits'],1)*100, 1)}%) — {"adequate" if p['commit_types'].get('docs', 0) > 5 else "low"} +- **Test coverage**: {p['commit_types'].get('test', 0)} test commits — {"present" if p['commit_types'].get('test', 0) > 0 else "gap — no test commits detected"} +- **CI automation**: {p['commit_types'].get('ci', 0)} CI commits — {"automated" if p['commit_types'].get('ci', 0) > 0 else "potentially manual"} +- **Active days ratio**: {p['active_days']} active out of {p['span_days']} total ({round(p['active_days']/max(p['span_days'],1)*100, 0)}%) — {"consistent" if p['active_days']/max(p['span_days'],1) > 0.5 else "bursty development pattern"} + +### Honest Questions +1. Is the velocity sustainable? ({round(p['total_commits']/max(p['active_days'],1), 1)} commits/day) +2. Are gaps between eras signs of abandoned direction or deliberate pivots? +3. Does the commit type distribution reflect a healthy SDLC? + +### Confidence Ratings +| Claim | Confidence | +|-------|-----------| +| Active development | High ({p['total_commits']} commits) | +| Structured workflow | {'High' if p['commit_types'].get('feat', 0) > 10 else 'Medium'} | +| Test discipline | {'High' if p['commit_types'].get('test', 0) > 5 else 'Low'} | +| Documentation culture | {'Medium' if p['commit_types'].get('docs', 0) > 3 else 'Low'} | +""" + + +def strategy_agent_benchmark(p: dict) -> str: + agents_raw = p.get("commit_types", {}) + total = p['total_commits'] + feat_pct = round(agents_raw.get('feat', 0) / max(total, 1) * 100, 1) + fix_pct = round(agents_raw.get('fix', 0) / max(total, 1) * 100, 1) + return f"""# Agent Benchmark Report — {p['name']} + +## Overview + +Analysis of AI agent effectiveness in **{p['name']}**'s development. + +**Total commits**: {total} +**Eras**: {p['era_count']} + +## Agent Usage Analysis + +### Commit Type Distribution + +| Type | Count | Percentage | +|------|-------|-----------| +{"".join(f"| {k} | {v} | {round(v/max(total,1)*100,1)}% |\n" for k, v in sorted(agents_raw.items(), key=lambda x: -x[1]))} + +### Feature vs Fix Ratio + +- **Features**: {agents_raw.get('feat', 0)} commits ({feat_pct}%) +- **Fixes**: {agents_raw.get('fix', 0)} commits ({fix_pct}%) +- **Ratio**: {round(agents_raw.get('feat', 1)/max(agents_raw.get('fix', 1), 1), 1)}:1 + +## Era-by-Era Performance + +{"".join(f"### {e.get('name', f'Era {e.get('id', '?')}')}\n- Commits: {e.get('commits', '?')}\n- Period: {e.get('dates', '?')}\n- Description: {e.get('description', 'N/A')[:120]}\n\n" for e in p['eras'])} + +## Key Findings + +1. **AI-assisted development velocity**: {round(total/max(p['active_days'],1), 1)} commits per active day +2. **Feature velocity**: {round(agents_raw.get('feat', 0)/max(p['active_days'],1), 1)} features per active day +3. **Quality indicator**: {fix_pct}% fix commits suggests {'healthy' if fix_pct > 10 else 'potentially insufficient'} quality iteration + +## Recommendations + +- {'Maintain current AI-assisted workflow' if feat_pct > 30 else 'Consider increasing AI agent usage for feature development'} +- {'Fix ratio is healthy' if fix_pct > 10 else 'Consider dedicating more sessions to bug fixes and quality'} +- {'Strong test presence' if agents_raw.get('test', 0) > 5 else 'Add AI-assisted test generation sessions'} +""" + + +def strategy_porter_value_chain(p: dict) -> str: + ct = p["commit_types"] + total = max(p["total_commits"], 1) + feat_commits = ct.get("feat", 0) + fix_commits = ct.get("fix", 0) + docs_commits = ct.get("docs", 0) + test_commits = ct.get("test", 0) + ci_commits = ct.get("ci", 0) + build_commits = ct.get("build", 0) + refactor_commits = ct.get("refactor", 0) + perf_commits = ct.get("perf", 0) + chore_commits = ct.get("chore", 0) + dep_commits = ct.get("dep", ct.get("dependency", 0)) + + inbound_score = min(100, round((dep_commits + chore_commits) / total * 100 * 5)) + ops_score = min(100, round(feat_commits / total * 100 * 2)) + outbound_score = min(100, round((build_commits + ci_commits) / total * 100 * 5)) + marketing_score = min(100, round(docs_commits / total * 100 * 5)) + service_score = min(100, round((fix_commits + test_commits) / total * 100 * 3)) + infra_score = min(100, round((ci_commits + chore_commits) / total * 100 * 5)) + hr_score = min(100, round(len(p["contributors"]) * 30 + 20)) + tech_score = min(100, round((refactor_commits + perf_commits) / total * 100 * 8)) + proc_score = min(100, round(dep_commits / total * 100 * 10)) + margin_score = round((ops_score + service_score + marketing_score + outbound_score) / 4) + + era_lines = "\n".join( + f" - **{e.get('name', f'Era {e.get('id', '?')}')}** ({e.get('dates', '?')}): " + f"{e.get('commits', '?')} commits — {e.get('description', 'N/A')[:80]}" + for e in p["eras"] + ) + + return f"""# Porter's Value Chain Analysis — {p['name']} + +## Overview + +Value chain analysis of **{p['name']}** based on {total} commits across {p['era_count']} development eras. +Maps development activities to Porter's primary and support activities to identify value creation. + +**Project**: {p['name']} +**Description**: {p['description'] or 'N/A'} +**Lifespan**: {p['lifespan']} +**Total commits**: {total} +**Active days**: {p['active_days']} + +## Value Chain Summary + +``` ++-------------------------------------------------------------+ +| MARGIN: {margin_score}/100 | ++----------+----------+----------+----------+-----------------+ +| Inbound |Operations| Outbound |Marketing | Service | +| Logistics| |Logistics | & Sales| | +| {inbound_score:>3}/100 | {ops_score:>3}/100 | {outbound_score:>3}/100 | {marketing_score:>3}/100 | {service_score:>3}/100 | ++----------+----------+----------+----------+-----------------+ +| Infrastructure | Technology | HR Management | Procurement | +| {infra_score:>3}/100 | {tech_score:>3}/100 | {hr_score:>3}/100 | {proc_score:>3}/100 | ++-------------------------------------------------------------+ +``` + +## Primary Activities + +### 1. Inbound Logistics — Score: {inbound_score}/100 + +Data ingestion, dependency management, and input preparation. + +- **Dependency commits**: {dep_commits} ({round(dep_commits/total*100, 1)}%) +- **Chore/housekeeping commits**: {chore_commits} ({round(chore_commits/total*100, 1)}%) +- **Assessment**: {'Strong dependency management culture' if dep_commits > 3 else 'Minimal dependency management evidence'} + +### 2. Operations — Score: {ops_score}/100 + +Core feature development — the primary value-creating activity. + +- **Feature commits**: {feat_commits} ({round(feat_commits/total*100, 1)}%) +- **Velocity**: {round(p['total_commits']/max(p['active_days'],1), 1)} commits per active day +- **Peak output**: {p['peak_day_commits']} commits on {p['peak_day']} +- **Assessment**: {'Strong feature development velocity' if feat_commits > 20 else 'Moderate feature development'} + +### 3. Outbound Logistics — Score: {outbound_score}/100 + +Build, packaging, and deployment pipeline. + +- **Build commits**: {build_commits} ({round(build_commits/total*100, 1)}%) +- **CI/CD commits**: {ci_commits} ({round(ci_commits/total*100, 1)}%) +- **Assessment**: {'Automated deployment pipeline' if ci_commits > 3 else 'Deployment automation may need attention'} + +### 4. Marketing & Sales — Score: {marketing_score}/100 + +Documentation, presentation, and stakeholder communication. + +- **Documentation commits**: {docs_commits} ({round(docs_commits/total*100, 1)}%) +- **Assessment**: {'Healthy documentation culture' if docs_commits > 5 else 'Documentation is underinvested'} + +### 5. Service — Score: {service_score}/100 + +Bug fixes, testing, and ongoing maintenance. + +- **Fix commits**: {fix_commits} ({round(fix_commits/total*100, 1)}%) +- **Test commits**: {test_commits} ({round(test_commits/total*100, 1)}%) +- **Fix-to-feature ratio**: {round(fix_commits/max(feat_commits,1), 2)}:1 +- **Assessment**: {'Healthy quality maintenance' if fix_commits > 5 else 'Limited quality maintenance evidence'} + +## Support Activities + +### Infrastructure — Score: {infra_score}/100 + +CI/CD tooling, development environment, and process automation. +- {ci_commits} CI commits, {chore_commits} chore commits across {p['era_count']} eras + +### Technology Development — Score: {tech_score}/100 + +Architecture evolution, refactoring, and performance optimization. +- {refactor_commits} refactor commits, {perf_commits} performance commits + +### Human Resource Management — Score: {hr_score}/100 + +Contributor coordination and AI agent orchestration. +- {len(p['contributors'])} contributor(s) over {p['span_days']} days +- AI-assisted development pattern: {round(feat_commits/max(p['active_days'],1), 1)} features/day + +### Procurement — Score: {proc_score}/100 + +External dependency adoption and library selection. +- {dep_commits} dependency-related commits + +## Era Value Creation + +{era_lines} + +## Margin Analysis + +**Overall value creation capacity**: {margin_score}/100 + +{'Strong value creation — high operational velocity with maintenance discipline.' if margin_score > 60 else 'Moderate value creation — opportunities to strengthen weaker activities.' if margin_score > 30 else 'Developing value chain — focus on strengthening primary activities.'} + +## Recommendations + +1. {'Operations are strong — maintain feature velocity.' if ops_score > 50 else 'Increase feature development throughput.'} +2. {'Service is healthy — continue quality investment.' if service_score > 40 else 'Invest in testing and bug fix discipline.'} +3. {'Documentation is adequate.' if marketing_score > 30 else 'Improve documentation commit frequency.'} +4. {'Build automation is mature.' if outbound_score > 30 else 'Invest in CI/CD automation.'} +""" + + +def strategy_swot_analysis(p: dict) -> str: + ct = p["commit_types"] + total = max(p["total_commits"], 1) + feat_commits = ct.get("feat", 0) + fix_commits = ct.get("fix", 0) + docs_commits = ct.get("docs", 0) + test_commits = ct.get("test", 0) + ci_commits = ct.get("ci", 0) + refactor_commits = ct.get("refactor", 0) + velocity = round(p["total_commits"] / max(p["active_days"], 1), 1) + + sdlc = p["analysis"].get("analysis-sdlc-gap-finder", {}) + ml = p["analysis"].get("analysis-ml-pattern-mapper", {}) + + strengths = [] + if feat_commits > 20: + strengths.append(f"High feature velocity: {feat_commits} feature commits ({round(feat_commits/total*100, 1)}%)") + if p["era_count"] >= 3: + strengths.append(f"Clear development structure: {p['era_count']} distinct eras show iterative evolution") + if velocity > 5: + strengths.append(f"Sustained intensity: {velocity} commits per active day") + if len(p["contributors"]) == 1: + strengths.append(f"Focused execution: {total} commits by one developer") + elif len(p["contributors"]) > 1: + strengths.append(f"Multi-contributor collaboration: {len(p['contributors'])} contributors") + if refactor_commits > 0: + strengths.append(f"Architecture discipline: {refactor_commits} refactoring commits") + if p["span_days"] > 30: + strengths.append(f"Long-lived project: {p['span_days']} days of sustained activity") + if not strengths: + strengths.append(f"Active development: {total} commits demonstrate engagement") + + weaknesses = [] + if test_commits == 0: + weaknesses.append("No test commits detected — testing infrastructure gap") + elif test_commits < 5: + weaknesses.append(f"Minimal testing: only {test_commits} test commits ({round(test_commits/total*100, 1)}%)") + if docs_commits < 5: + weaknesses.append(f"Low documentation: {docs_commits} doc commits ({round(docs_commits/total*100, 1)}%)") + if ci_commits == 0: + weaknesses.append("No CI/CD commits — deployment may be manual") + if len(p["gaps"]) > 2: + weaknesses.append(f"Development gaps: {len(p['gaps'])} gap periods detected") + if fix_commits / total < 0.05: + weaknesses.append(f"Low fix ratio: {round(fix_commits/total*100, 1)}% — potential quality debt") + if p["active_days"] / max(p["span_days"], 1) < 0.3: + weaknesses.append(f"Low activity ratio: {round(p['active_days']/max(p['span_days'],1)*100, 0)}% active days") + if not weaknesses: + weaknesses.append("No significant weaknesses identified from commit analysis") + + opportunities = [] + if ml.get("patterns") or ml.get("findings"): + ml_count = len(ml.get("patterns", ml.get("findings", []))) + opportunities.append(f"ML pattern adoption: {ml_count} patterns identified for optimization") + if test_commits == 0: + opportunities.append("Test automation: introducing testing would improve quality significantly") + if ci_commits == 0: + opportunities.append("CI/CD implementation: automation would improve deployment reliability") + if p["era_count"] > 2: + opportunities.append("Era-based optimization: mature structure enables targeted improvement") + opportunities.append("Cross-project learning: patterns can inform other KyaniteLabs projects") + if docs_commits < 5: + opportunities.append("Documentation investment: improved docs would increase sustainability") + + threats = [] + if fix_commits / total < 0.05 and feat_commits > 30: + threats.append("Quality debt: high feature rate with low fix rate may mask accumulating bugs") + if len(p["gaps"]) > 3: + threats.append(f"Continuity risk: {len(p['gaps'])} gaps suggest project abandonment risk") + threats.append("Dependency risk: external library changes may impact stability") + if not threats: + threats.append("No significant external threats identified") + + return f"""# SWOT Analysis — {p['name']} + +## Overview + +Strategic assessment of **{p['name']}** based on archaeological analysis of {total} commits across {p['era_count']} eras. + +**Project**: {p['name']} +**Description**: {p['description'] or 'N/A'} +**Lifespan**: {p['lifespan']} +**Velocity**: {velocity} commits/active day +**Peak**: {p['peak_day_commits']} commits on {p['peak_day']} + +## SWOT Matrix + +``` ++-----------------------------+-----------------------------+ +| STRENGTHS | WEAKNESSES | +| ({len(strengths)} found) | ({len(weaknesses)} found) | ++-----------------------------+-----------------------------+ +| OPPORTUNITIES | THREATS | +| ({len(opportunities)} found) | ({len(threats)} found) | ++-----------------------------+-----------------------------+ +``` + +## Strengths + +Internal factors that give {p['name']} an advantage. + +{"".join(f"{i+1}. {s}\n" for i, s in enumerate(strengths))} + +## Weaknesses + +Internal factors that place {p['name']} at a disadvantage. + +{"".join(f"{i+1}. {w}\n" for i, w in enumerate(weaknesses))} + +## Opportunities + +External factors that {p['name']} could exploit. + +{"".join(f"{i+1}. {o}\n" for i, o in enumerate(opportunities))} + +## Threats + +External factors that could trouble {p['name']}. + +{"".join(f"{i+1}. {t}\n" for i, t in enumerate(threats))} + +## Commit Type Distribution + +| Type | Count | Percentage | SWOT Signal | +|------|-------|-----------|-------------| +{"".join(f"| {k} | {v} | {round(v/total*100,1)}% | {'Strength' if v/total > 0.3 else 'Weakness' if v == 0 else 'Neutral'} |\n" for k, v in sorted(ct.items(), key=lambda x: -x[1]))} + +## Strategic Priorities + +### SO Strategy (Strengths x Opportunities) +{'Leverage high velocity to implement testing and CI/CD.' if velocity > 5 and test_commits == 0 else 'Use development structure to adopt patterns and optimize.'} + +### WO Strategy (Weaknesses x Opportunities) +{'Address testing gap with AI-assisted test generation.' if test_commits == 0 else 'Strengthen testing with structured sessions.'} + +### ST Strategy (Strengths x Threats) +{'Use velocity to proactively address quality debt.' if feat_commits > 20 else 'Maintain momentum while addressing signals.'} + +### WT Strategy (Weaknesses x Threats) +{'Implement CI/CD to reduce deployment risk.' if ci_commits == 0 else 'Maintain CI/CD to catch regressions early.'} + +*Analysis from {total} commits, {p['era_count']} eras, {p['active_days']} active days* +""" + + +def strategy_wardley_map(p: dict) -> str: + ct = p["commit_types"] + total = max(p["total_commits"], 1) + feat_commits = ct.get("feat", 0) + test_commits = ct.get("test", 0) + ci_commits = ct.get("ci", 0) + docs_commits = ct.get("docs", 0) + refactor_commits = ct.get("refactor", 0) + + def evolution_stage(ratio, thresholds=(0.01, 0.05, 0.15)): + if ratio == 0: + return "Genesis" + if ratio < thresholds[0]: + return "Genesis" + if ratio < thresholds[1]: + return "Custom" + if ratio < thresholds[2]: + return "Product" + return "Commodity" + + ci_stage = evolution_stage(ci_commits / total) + test_stage = evolution_stage(test_commits / total) + docs_stage = evolution_stage(docs_commits / total, (0.02, 0.08, 0.20)) + feat_stage = "Product" if feat_commits / total > 0.2 else "Custom" if feat_commits > 0 else "Genesis" + infra_stage = "Commodity" if ci_commits > 5 else "Product" if ci_commits > 2 else "Custom" if ci_commits > 0 else "Genesis" + refactor_stage = evolution_stage(refactor_commits / total) + + era_evolution = [] + for e in p["eras"]: + desc = e.get("description", "").lower() + era_commits = e.get("commits", 0) + if any(w in desc for w in ("setup", "init", "scaffold", "bootstrap")): + phase = "Genesis" + elif any(w in desc for w in ("refactor", "restructure", "rewrite", "architect")): + phase = "Custom → Product" + elif any(w in desc for w in ("automat", "ci", "pipeline", "deploy")): + phase = "Product → Commodity" + elif any(w in desc for w in ("fix", "bug", "patch", "stabiliz")): + phase = "Product" + else: + phase = "Custom" + era_evolution.append(f" - **{e.get('name', f'Era {e.get('id', '?')}')}** ({e.get('dates', '?')}): {era_commits} commits — {phase}") + + maturity_scores = {"Genesis": 1, "Custom": 2, "Product": 3, "Commodity": 4} + avg_maturity = sum( + maturity_scores.get(s.split("→")[0].strip().split()[0], 2) + for s in [ci_stage, test_stage, docs_stage, feat_stage, infra_stage] + ) / 5 + maturity_label = "Mature" if avg_maturity > 3 else "Growing" if avg_maturity > 2 else "Early" + + return f"""# Wardley Map — {p['name']} + +## Overview + +Wardley Map analysis of **{p['name']}** positioning components along the evolution axis +(Genesis → Custom → Product → Commodity) based on {total} commits across {p['era_count']} eras. + +**Project**: {p['name']} +**Description**: {p['description'] or 'N/A'} +**Maturity**: {maturity_label} (avg evolution: {round(avg_maturity, 1)}/4.0) + +## Value Chain + +``` +USER NEED: {p['description'] or p['name']} +| ++-- Core Application [{feat_stage}] +| +-- Feature Development [{feat_stage}] -- {feat_commits} commits ({round(feat_commits/total*100, 1)}%) +| +-- Bug Fixing & Maintenance [Product] -- {ct.get('fix', 0)} commits ({round(ct.get('fix', 0)/total*100, 1)}%) +| +-- Performance Optimization [{refactor_stage}] -- {refactor_commits} commits +| ++-- Quality Assurance [{test_stage}] +| +-- Test Infrastructure [{test_stage}] -- {test_commits} commits ({round(test_commits/total*100, 1)}%) +| +-- CI/CD Pipeline [{ci_stage}] -- {ci_commits} commits ({round(ci_commits/total*100, 1)}%) +| ++-- Knowledge & Communication [{docs_stage}] +| +-- Documentation [{docs_stage}] -- {docs_commits} commits ({round(docs_commits/total*100, 1)}%) +| +-- Code Comments -- embedded in feature commits +| ++-- Foundation [Commodity] + +-- Version Control (Git) -- assumed + +-- Programming Language Runtime -- assumed + +-- Development Environment -- assumed +``` + +## Evolution Axis + +``` + Genesis Custom Product Commodity + (Novel) (Emerging) (Established) (Standardized) + | | | | +``` + +## Component Evolution Table + +| Component | Stage | Evidence | Signal | +|-----------|-------|----------|--------| +| Feature Development | {feat_stage} | {feat_commits} commits | {round(feat_commits/total*100, 1)}% of all commits | +| Test Infrastructure | {test_stage} | {test_commits} commits | {'No testing evidence' if test_commits == 0 else f'{round(test_commits/total*100, 1)}% of commits'} | +| CI/CD Pipeline | {ci_stage} | {ci_commits} commits | {'Manual process' if ci_commits == 0 else f'{round(ci_commits/total*100, 1)}% of commits'} | +| Documentation | {docs_stage} | {docs_commits} commits | {round(docs_commits/total*100, 1)}% of commits | +| Infrastructure | {infra_stage} | {ci_commits} CI + tooling | {'Basic' if ci_commits < 3 else 'Developing' if ci_commits < 10 else 'Mature'} | +| Architecture | {refactor_stage} | {refactor_commits} commits | {'Static' if refactor_commits == 0 else 'Evolving'} | + +## Era Evolution Trajectory + +{"".join(f'{line}\n' for line in era_evolution)} + +## Strategic Implications + +### What to Commoditize +{'CI/CD is at commodity stage — leverage it.' if infra_stage == 'Commodity' else 'CI/CD needs investment to reach commodity stage.' if ci_stage in ('Genesis', 'Custom') else 'CI/CD is maturing — continue investment.'} + +### What to Productize +{'Testing needs to evolve to Product — invest in test frameworks.' if test_stage in ('Genesis', 'Custom') else 'Testing infrastructure is maturing.'} +{'Documentation is underdeveloped — systematic docs would improve sustainability.' if docs_stage in ('Genesis', 'Custom') else 'Documentation is established.'} + +### What Remains Custom +{'Feature development is the core differentiator — expected and healthy.' if feat_stage == 'Custom' else 'Feature development shows mature patterns.'} + +### Pioneering Areas +{'Explore automated testing and CI/CD as competitive advantages.' if test_stage == 'Genesis' or ci_stage == 'Genesis' else 'Basic infrastructure exists — explore advanced automation.'} + +## Movement Recommendations + +1. **{'Invest in testing' if test_stage == 'Genesis' else 'Strengthen testing'}**: {test_commits} test commits is {'insufficient' if test_commits < 5 else 'adequate'} — target 10-15% of commits +2. **{'Automate CI/CD' if ci_stage == 'Genesis' else 'Improve CI/CD'}**: {ci_commits} CI commits — {'no automation detected' if ci_commits == 0 else 'foundation exists'} +3. **{'Increase documentation' if docs_commits < 5 else 'Maintain documentation'}**: {docs_commits} doc commits +4. **Maintain feature velocity**: {round(feat_commits/max(p['active_days'],1), 1)} features/day + +*Map from {total} commits across {p['era_count']} eras ({p['active_days']} active days)* +""" + + +def strategy_bcg_matrix(p: dict) -> str: + ct = p["commit_types"] + total = max(p["total_commits"], 1) + feat_commits = ct.get("feat", 0) + fix_commits = ct.get("fix", 0) + test_commits = ct.get("test", 0) + ci_commits = ct.get("ci", 0) + docs_commits = ct.get("docs", 0) + refactor_commits = ct.get("refactor", 0) + + # BCG quadrant classification based on commit activity (market share proxy) + # and growth rate (velocity trend across eras) + feat_ratio = feat_commits / total + fix_ratio = fix_commits / total + test_ratio = test_commits / total + docs_ratio = docs_commits / total + + # Calculate era velocity trend (growth rate) + era_velocities = [] + for e in p["eras"]: + era_commits = e.get("commits", 0) + dates = e.get("dates", "") + if "→" in dates: + parts = dates.split("→") + try: + from datetime import datetime as _dt + start = _dt.strptime(parts[0].strip()[:10], "%Y-%m-%d") + end = _dt.strptime(parts[1].strip()[:10], "%Y-%m-%d") + days = max((end - start).days, 1) + era_velocities.append(round(era_commits / days, 2)) + except (ValueError, IndexError): + era_velocities.append(era_commits) + else: + era_velocities.append(era_commits) + + velocity_trend = "growing" if len(era_velocities) >= 2 and era_velocities[-1] > era_velocities[0] else "stable" if len(era_velocities) >= 2 else "unknown" + avg_velocity = round(sum(era_velocities) / max(len(era_velocities), 1), 1) if era_velocities else 0 + + # Classify components into BCG quadrants + # Stars: high share (>15% of commits) + growing velocity + # Cash Cows: high share + stable/declining velocity + # Question Marks: low share + growing velocity + # Dogs: low share + stable/declining velocity + + def classify(ratio, label): + high_share = ratio > 0.15 + if high_share and velocity_trend == "growing": + return "Star" + elif high_share: + return "Cash Cow" + elif velocity_trend == "growing" and ratio > 0.05: + return "Question Mark" + else: + return "Dog" + + components = [ + ("Feature Development", feat_commits, feat_ratio, classify(feat_ratio, "feat")), + ("Bug Fixing", fix_commits, fix_ratio, classify(fix_ratio, "fix")), + ("Testing", test_commits, test_ratio, classify(test_ratio, "test")), + ("CI/CD", ci_commits, ci_commits / total, classify(ci_commits / total, "ci")), + ("Documentation", docs_commits, docs_ratio, classify(docs_ratio, "docs")), + ("Refactoring", refactor_commits, refactor_commits / total, classify(refactor_commits / total, "refactor")), + ] + + stars = [c for c in components if c[3] == "Star"] + cash_cows = [c for c in components if c[3] == "Cash Cow"] + question_marks = [c for c in components if c[3] == "Question Mark"] + dogs = [c for c in components if c[3] == "Dog"] + + component_table = "\n".join( + f"| {name} | {commits} | {round(ratio*100, 1)}% | {quadrant} |" + for name, commits, ratio, quadrant in components + ) + + star_items = "\n".join(f" - **{c[0]}**: {c[1]} commits ({round(c[2]*100, 1)}%) — invest and grow" for c in stars) or " - None identified" + cow_items = "\n".join(f" - **{c[0]}**: {c[1]} commits ({round(c[2]*100, 1)}%) — maintain efficiency" for c in cash_cows) or " - None identified" + qm_items = "\n".join(f" - **{c[0]}**: {c[1]} commits ({round(c[2]*100, 1)}%) — evaluate investment" for c in question_marks) or " - None identified" + dog_items = "\n".join(f" - **{c[0]}**: {c[1]} commits ({round(c[2]*100, 1)}%) — consider deprioritizing" for c in dogs) or " - None identified" + + era_lines = "\n".join( + f" - **{e.get('name', f'Era {e.get('id', '?')}')}** ({e.get('dates', '?')}): " + f"{e.get('commits', '?')} commits" + for e in p["eras"] + ) + + return f"""# BCG Growth-Share Matrix — {p['name']} + +## Overview + +BCG Matrix analysis of **{p['name']}** mapping development activities to growth-share quadrants. +Uses commit distribution as market share proxy and era velocity trend as growth indicator. + +**Project**: {p['name']} +**Description**: {p['description'] or 'N/A'} +**Total commits**: {total} +**Velocity trend**: {velocity_trend} (avg {avg_velocity} commits/era-day) +**Portfolio components**: {len(components)} + +## Matrix + +``` + HIGH GROWTH LOW GROWTH + ┌─────────────────┬─────────────────┐ + │ │ │ + HIGH │ STARS │ CASH COWS │ + SHARE │ ({len(stars)}) │ ({len(cash_cows)}) │ + │ │ │ + ├─────────────────┼─────────────────┤ + │ │ │ + LOW │ QUESTION MARKS │ DOGS │ + SHARE │ ({len(question_marks)}) │ ({len(dogs)}) │ + │ │ │ + └─────────────────┴─────────────────┘ +``` + +## Component Classification + +| Component | Commits | Share | Quadrant | +|-----------|---------|-------|----------| +{component_table} + +## Quadrant Analysis + +### Stars ({len(stars)}) — Invest for Growth +{star_items} + +### Cash Cows ({len(cash_cows)}) — Maximize Efficiency +{cow_items} + +### Question Marks ({len(question_marks)}) — Selective Investment +{qm_items} + +### Dogs ({len(dogs)}) — Contain or Divest +{dog_items} + +## Era Context + +{era_lines} + +## Strategic Recommendations + +1. {'Double down on Stars to capture growth momentum' if stars else 'No clear Stars — consider investing in high-potential areas'} +2. {'Use Cash Cow stability to fund growth initiatives' if cash_cows else 'No Cash Cows — revenue/foundation may be thin'} +3. {'Evaluate Question Marks: invest in promising ones, divest the rest' if question_marks else 'No Question Marks — portfolio may lack growth options'} +4. {'Minimize Dog investment unless strategically necessary' if dogs else 'No Dogs — portfolio is lean'} + +*Matrix from {total} commits across {p['era_count']} eras* +""" + + +def strategy_ansoff_matrix(p: dict) -> str: + ct = p["commit_types"] + total = max(p["total_commits"], 1) + feat_commits = ct.get("feat", 0) + fix_commits = ct.get("fix", 0) + refactor_commits = ct.get("refactor", 0) + test_commits = ct.get("test", 0) + + # Ansoff quadrants derived from commit patterns + # Market Penetration: feat commits to existing areas (high feat + few eras) + # Market Development: feat commits opening new areas (high feat + many eras) + # Product Development: refactor + test (improving existing) + # Diversification: many commit types spread evenly + + feat_ratio = feat_commits / total + fix_ratio = fix_commits / total + refactor_ratio = refactor_commits / total + type_diversity = len([k for k, v in ct.items() if v > 0]) + + # Score each quadrant based on commit evidence + penetration_score = min(100, round(feat_ratio * 150 + fix_ratio * 50)) + market_dev_score = min(100, round(p["era_count"] * 15 + feat_ratio * 80)) + product_dev_score = min(100, round(refactor_ratio * 200 + test_commits / total * 100)) + diversification_score = min(100, round(type_diversity * 10 + len(p["contributors"]) * 15)) + + # Classify the project's primary strategic posture + scores = { + "Market Penetration": penetration_score, + "Market Development": market_dev_score, + "Product Development": product_dev_score, + "Diversification": diversification_score, + } + primary_strategy = max(scores, key=scores.get) + secondary_strategy = max( + (k for k in scores if k != primary_strategy), key=lambda k: scores[k] + ) + + # Era-based strategic moves + era_moves = [] + for e in p["eras"]: + desc = e.get("description", "").lower() + commits = e.get("commits", 0) + if any(w in desc for w in ("init", "setup", "scaffold", "bootstrap")): + move = "Market Entry (Penetration)" + elif any(w in desc for w in ("expand", "new", "add", "feature")): + move = "Market Development" + elif any(w in desc for w in ("refactor", "improve", "optim", "test", "ci")): + move = "Product Development" + elif any(w in desc for w in ("rewrit", "pivot", "integrat", "merge")): + move = "Diversification" + else: + move = "Penetration" + era_moves.append(f" - **{e.get('name', f'Era {e.get('id', '?')}')}** ({e.get('dates', '?')}): {commits} commits — {move}") + + return f"""# Ansoff Growth Matrix — {p['name']} + +## Overview + +Ansoff Matrix analysis of **{p['name']}** identifying growth strategies based on +commit patterns, era evolution, and development scope. + +**Project**: {p['name']} +**Description**: {p['description'] or 'N/A'} +**Primary strategy**: {primary_strategy} ({scores[primary_strategy]}/100) +**Secondary strategy**: {secondary_strategy} ({scores[secondary_strategy]}/100) +**Commit type diversity**: {type_diversity} types + +## Matrix + +``` + EXISTING PRODUCTS NEW PRODUCTS + ┌───────────────────────┬───────────────────────┐ + EXISTING │ │ │ + MARKETS │ MARKET PENETRATION │ PRODUCT DEVELOPMENT │ + │ Score: {penetration_score:>3}/100 │ Score: {product_dev_score:>3}/100 │ + │ Risk: LOW │ Risk: MEDIUM │ + ├───────────────────────┼───────────────────────┤ + NEW │ │ │ + MARKETS │ MARKET DEVELOPMENT │ DIVERSIFICATION │ + │ Score: {market_dev_score:>3}/100 │ Score: {diversification_score:>3}/100 │ + │ Risk: MEDIUM │ Risk: HIGH │ + └───────────────────────┴───────────────────────┘ +``` + +## Quadrant Scores + +| Strategy | Score | Risk Level | Evidence | +|----------|-------|------------|----------| +| Market Penetration | {penetration_score}/100 | Low | {feat_commits} feature commits, {fix_commits} fixes | +| Market Development | {market_dev_score}/100 | Medium | {p['era_count']} eras, {len(p['contributors'])} contributors | +| Product Development | {product_dev_score}/100 | Medium | {refactor_commits} refactors, {test_commits} test commits | +| Diversification | {diversification_score}/100 | High | {type_diversity} commit types | + +## Era Strategic Moves + +{"".join(era_moves)} + +## Strategic Analysis + +### Recommended Primary Strategy: {primary_strategy} + +{'The commit pattern shows strong feature development velocity — deepen existing capabilities before expanding scope.' if primary_strategy == 'Market Penetration' else 'Multiple development eras indicate expansion into new areas — leverage existing foundation.' if primary_strategy == 'Market Development' else 'Refactoring and testing evidence shows investment in product quality — continue improving the core.' if primary_strategy == 'Product Development' else 'Broad commit type distribution suggests diverse activities — focus or risk spreading too thin.'} + +### Growth Trajectory + +- **Current posture**: {primary_strategy} with {secondary_strategy} undertones +- **Active days**: {p['active_days']} days over {p['span_days']} day span ({round(p['active_days']/max(p['span_days'],1)*100, 0)}% active) +- **Velocity**: {round(total/max(p['active_days'],1), 1)} commits per active day +- **Era stability**: {p['era_count']} distinct phases across {p['lifespan'] or 'the project lifespan'} + +## Recommendations + +1. {'Continue penetration: deepen existing features before expanding' if primary_strategy == 'Market Penetration' else 'Balance development with quality investment'} +2. {'Build quality foundation (testing, CI) before diversifying' if product_dev_score < 30 else 'Quality foundation exists — safe to pursue growth'} +3. {'Avoid diversification until core is solid' if diversification_score > 50 and product_dev_score < 30 else 'Portfolio approach is viable given current quality levels'} +4. {'Use era transitions as checkpoints for strategy shifts' if p['era_count'] >= 3 else 'Establish more development phases before strategic shifts'} + +*Matrix from {total} commits across {p['era_count']} eras* +""" + + +def strategy_blue_ocean(p: dict) -> str: + ct = p["commit_types"] + total = max(p["total_commits"], 1) + feat_commits = ct.get("feat", 0) + fix_commits = ct.get("fix", 0) + test_commits = ct.get("test", 0) + ci_commits = ct.get("ci", 0) + docs_commits = ct.get("docs", 0) + refactor_commits = ct.get("refactor", 0) + perf_commits = ct.get("perf", 0) + + # 10 value factors for OSS projects, scored 0-10 from commit evidence + factors = { + "ease_of_installation": min(10, round(ci_commits / max(total * 0.02, 1) * 3 + 2)), + "docs_quality": min(10, round(docs_commits / max(total * 0.03, 1) * 2 + 1)), + "api_stability": min(10, round(10 - refactor_commits / max(total * 0.05, 1) * 2)), + "test_coverage": min(10, round(test_commits / max(total * 0.05, 1) * 3 + 1)), + "performance": min(10, round(perf_commits / max(total * 0.02, 1) * 4 + 3)), + "security": min(10, round(fix_commits / max(total * 0.05, 1) * 2 + 2)), + "community_engagement": min(10, round(len(p["contributors"]) * 3 + 2)), + "feature_richness": min(10, round(feat_commits / max(total * 0.1, 1) * 2 + 1)), + "code_maintainability": min(10, round(refactor_commits / max(total * 0.03, 1) * 2 + 3)), + "automation_level": min(10, round(ci_commits / max(total * 0.03, 1) * 3 + 1)), + } + + avg_score = round(sum(factors.values()) / len(factors), 1) + + # Eliminate/Reduce/Raise/Create framework + # Eliminate: factors below 3 + eliminate = [(k, v) for k, v in sorted(factors.items()) if v <= 3] + # Reduce: factors 4-5 + reduce = [(k, v) for k, v in sorted(factors.items()) if 4 <= v <= 5] + # Raise: factors 6-7 + raise_factors = [(k, v) for k, v in sorted(factors.items()) if 6 <= v <= 7] + # Create: factors 8+ OR missing capabilities (0 commits in area) + create = [(k, v) for k, v in sorted(factors.items()) if v >= 8] + + # Identify missing capabilities as creation opportunities + missing = [] + if test_commits == 0: + missing.append("testing_infrastructure") + if ci_commits == 0: + missing.append("ci_cd_pipeline") + if docs_commits == 0: + missing.append("documentation_system") + if perf_commits == 0: + missing.append("performance_monitoring") + + factor_table = "\n".join( + f"| {name.replace('_', ' ').title()} | {score}/10 | {'█' * score}{'░' * (10 - score)} |" + for name, score in sorted(factors.items()) + ) + + eliminate_items = "\n".join(f" - **{k.replace('_', ' ').title()}** (current: {v}/10)" for k, v in eliminate) or " - None — all factors have some investment" + reduce_items = "\n".join(f" - **{k.replace('_', ' ').title()}** (current: {v}/10)" for k, v in reduce) or " - None" + raise_items = "\n".join(f" - **{k.replace('_', ' ').title()}** (current: {v}/10)" for k, v in raise_factors) or " - None" + create_items = "\n".join(f" - **{k.replace('_', ' ').title()}** (current: {v}/10)" for k, v in create) or " - None" + missing_items = "\n".join(f" - **{m.replace('_', ' ').title()}** — no evidence found" for m in missing) or " - No critical gaps detected" + + era_lines = "\n".join( + f" - **{e.get('name', f'Era {e.get('id', '?')}')}** ({e.get('dates', '?')}): " + f"{e.get('commits', '?')} commits" + for e in p["eras"] + ) + + return f"""# Blue Ocean Strategy — {p['name']} + +## Overview + +Blue Ocean Strategy analysis of **{p['name']}** applying the Eliminate-Reduce-Raise-Create +framework to development activities. Identifies where to focus investment and where to cut. + +**Project**: {p['name']} +**Description**: {p['description'] or 'N/A'} +**Average value score**: {avg_score}/10 across {len(factors)} factors +**Critical gaps**: {len(missing)} + +## Strategy Canvas + +| Value Factor | Score | Bar | +|-------------|-------|-----| +{factor_table} + +## Four Actions Framework + +### Eliminate — What to stop doing +{eliminate_items} + +### Reduce — What to do less of +{reduce_items} + +### Raise — What to do more of +{raise_items} + +### Create — What to build that doesn't exist +{create_items} + +### Missing Capabilities (Creation Opportunities) +{missing_items} + +## Era Context + +{era_lines} + +## Value Innovation Analysis + +- **Strongest areas**: {', '.join(k.replace('_', ' ') for k, v in sorted(factors.items(), key=lambda x: -x[1])[:3])} +- **Weakest areas**: {', '.join(k.replace('_', ' ') for k, v in sorted(factors.items(), key=lambda x: x[1])[:3])} +- **Quick wins**: {', '.join(m.replace('_', ' ') for m in missing[:2]) or 'none obvious'} + +## Recommendations + +1. {'Address critical gaps first: ' + ', '.join(m.replace('_', ' ') for m in missing[:2]) if missing else 'No critical gaps — focus on raising mid-range factors'} +2. {'Eliminate low-value activities to free resources' if eliminate else 'All factors have baseline investment'} +3. {'Leverage strong areas as competitive advantages' if any(v >= 8 for v in factors.values()) else 'Build at least one standout capability'} +4. {'Target avg score of 7+ across all factors' if avg_score < 7 else 'Maintain current investment levels'} + +*Analysis from {total} commits across {p['era_count']} eras* +""" + + +# ── Planning files ──────────────────────────────────────────────────────── + +def planning_remediation_summary(p: dict) -> str: + return f"""# Remediation Summary — {p['name']} + +## Current State + +**{p['name']}** has been archaeologically mined and analyzed. + +| Metric | Value | +|--------|-------| +| Total commits | {p['total_commits']} | +| Eras | {p['era_count']} | +| Active days | {p['active_days']} | +| Span | {p['span_days']} days | +| Peak day | {p['peak_day']} | + +## Corrections Applied + +1. **Era detection**: {p['era_count']} development eras identified and documented +2. **Commit classification**: All {p['total_commits']} commits categorized by type +3. **Contributor mapping**: {len(p['contributors'])} contributor(s) identified +4. **Gap analysis**: {len(p['gaps'])} development gap(s) detected + +## Remaining Technical Debt + +- {'No test commits detected — testing infrastructure may need attention' if p['commit_types'].get('test', 0) == 0 else f'{p["commit_types"].get("test", 0)} test commits present'} +- {'No CI commits — automation may be manual' if p['commit_types'].get('ci', 0) == 0 else f'{p["commit_types"].get("ci", 0)} CI commits present'} +- {'Documentation commits are sparse' if p['commit_types'].get('docs', 0) < 5 else f'{p["commit_types"].get("docs", 0)} documentation commits'} + +## Backlog Priority + +1. Era narrative enrichment (if eras lack descriptions) +2. Cross-project pattern comparison +3. Agent attribution refinement +""" + + +def planning_external_data(p: dict) -> str: + return f"""# External Data Sources Research — {p['name']} + +## Available Data Sources + +### Git Repository Data +- **github-commits.csv**: {p['total_commits']} commits with hash, date, message, author +- **commit-eras.json**: {p['era_count']} development eras with key events +- **detected-signals.json**: Velocity, scope, author, and gap signals + +### Analysis Data +{"".join(f"- **{stem}**: Available\n" for stem in p.get('analysis', {}))} + +### Metrics +- **canonical-metrics.json**: Core project metrics +- **data.json**: Full telemetry visualization data + +## Enrichment Opportunities + +1. **GitHub API**: Pull request data, issue tracking, branch analysis +2. **Language detection**: Repository language breakdown from GitHub metadata +3. **Dependency analysis**: Package dependency evolution over time +4. **Code size metrics**: Lines of code, file count evolution + +## Recommendations + +- Prioritize GitHub API integration for PR and issue data +- Add language breakdown from GitHub metadata +- Consider code churn analysis for deeper quality insights +""" + + +def planning_meta_pattern(p: dict) -> str: + return f"""# Meta-Pattern Visualization Research — {p['name']} + +## Overview + +Research into meta-patterns across **{p['name']}**'s development history. + +**Dataset**: {p['total_commits']} commits, {p['era_count']} eras, {p['active_days']} active days + +## Identified Meta-Patterns + +### Velocity Patterns +- **Peak velocity**: {p['peak_day_commits']} commits on {p['peak_day']} +- **Average velocity**: {round(p['total_commits']/max(p['active_days'],1), 1)} commits/active day +- **Commit type distribution**: {', '.join(f'{k} ({v})' for k, v in sorted(p['commit_types'].items(), key=lambda x: -x[1])[:5])} + +### Temporal Patterns +- **Development gaps**: {len(p['gaps'])} gap(s) detected +- **Era transitions**: {p['era_count']} distinct phases +- **Active day ratio**: {round(p['active_days']/max(p['span_days'],1)*100, 0)}% + +### Scope Patterns +- Feature commits: {p['commit_types'].get('feat', 0)} ({round(p['commit_types'].get('feat', 0)/max(p['total_commits'],1)*100, 1)}%) +- Fix commits: {p['commit_types'].get('fix', 0)} ({round(p['commit_types'].get('fix', 0)/max(p['total_commits'],1)*100, 1)}%) + +## Visualization Recommendations + +1. **Era timeline**: Horizontal bar chart showing era durations and commit density +2. **Velocity heatmap**: Calendar heatmap of daily commit counts +3. **Type treemap**: Proportional view of commit types +4. **Era comparison**: Side-by-side metrics for each era +""" + + +# ── Learning files ──────────────────────────────────────────────────────── + +def learning_ml_plan(p: dict) -> str: + return f"""# ML Learning Plan — {p['name']} + +## Knowledge Gaps Identified + +Based on analysis of {p['total_commits']} commits across {p['era_count']} eras. + +### Priority 1: Core Skills +- **Development velocity optimization**: Understanding {round(p['total_commits']/max(p['active_days'],1), 1)} commits/day patterns +- **AI agent collaboration**: Effective human-AI development workflows +- **Iterative development**: Lessons from {p['era_count']} era transitions + +### Priority 2: Technical Skills +- **Testing discipline**: {'Present' if p['commit_types'].get('test', 0) > 0 else 'Needed'} — {p['commit_types'].get('test', 0)} test commits +- **CI/CD automation**: {'Automated' if p['commit_types'].get('ci', 0) > 0 else 'Manual'} — {p['commit_types'].get('ci', 0)} CI commits +- **Documentation**: {'Strong' if p['commit_types'].get('docs', 0) > 5 else 'Needs improvement'} — {p['commit_types'].get('docs', 0)} doc commits + +## Learning Timeline + +{"".join(f"### {e.get('name', f'Era {e.get('id', '?')}')} ({e.get('dates', '?')})\nFocus: {e.get('description', 'General development')[:100]}\nCommits: {e.get('commits', '?')}\n\n" for e in p['eras'])} + +## Recommended Resources + +1. Git workflow patterns for AI-assisted development +2. Conventional commit standards and automation +3. Development archaeology techniques +4. Era-based project narrative construction +""" + + +def learning_story_circle(p: dict) -> str: + era_events = "\n".join( + f"- **{e.get('name', f'Era {e.get('id', '?')}')}**: {e.get('description', 'Development phase')} ({e.get('commits', '?')} commits)" + for e in p['eras'] + ) + return f"""# Recursive Story Circle — {p['name']} + +## The Hero's Journey of {p['name']} + +### The Ordinary World +Before {p['name']}, the development landscape was {p['lifespan']} of untapped potential. + +### The Call to Adventure +{p['eras'][0]['description'] if p['eras'] else 'The project began with initial commits.'} + +### Crossing the Threshold +First era: {p['eras'][0].get('name', 'Genesis')} with {p['eras'][0].get('commits', '?')} commits. + +### The Journey + +{era_events} + +### The Ordeal +Peak challenge: {p['peak_day']} with {p['peak_day_commits']} commits in a single day. + +### Resolution +{p['total_commits']} commits later, {p['name']} stands as a testament to {p['active_days']} days of focused development. + +## The Elixir + +What was learned: +- Development velocity of {round(p['total_commits']/max(p['active_days'],1), 1)} commits/day is {'sustainable' if round(p['total_commits']/max(p['active_days'],1), 1) < 20 else 'intense'} +- {p['era_count']} distinct phases show {'deliberate evolution' if p['era_count'] > 2 else 'focused development'} +- {len(p['contributors'])} contributor(s) maintained momentum across {p['span_days']} days +""" + + +# ── Content files ───────────────────────────────────────────────────────── + +def content_blog_draft(p: dict) -> str: + return f"""# What {p['total_commits']} Commits Reveal About Building {p['name']} + +*An archaeological dig through git history.* + +## The Discovery + +We mined {p['name']}'s complete git history — {p['total_commits']} commits across {p['active_days']} active days — and what we found tells a story that commit messages alone can't convey. + +## {p['era_count']} Chapters, One Story + +{" ".join(f'We see **{e.get("name", f"Era {e.get('id', '?')}")}** ({e.get("dates", "?")}): {e.get("description", "a distinct development phase").lower()}.' for e in p['eras'])} + +## The Numbers + +- **{p['total_commits']} commits** in **{p['active_days']} active days** +- Peak velocity: **{p['peak_day_commits']} commits** on {p['peak_day']} +- {p['commit_types'].get('feat', 0)} features, {p['commit_types'].get('fix', 0)} fixes, {p['commit_types'].get('test', 0)} tests + +## What It Means + +The development rhythm of {p['name']} reveals {'a burst-heavy pattern' if p['active_days']/max(p['span_days'],1) < 0.5 else 'consistent engagement'} — {'intense sprints separated by reflection periods' if p['era_count'] > 2 else 'focused, sustained effort'}. + +## The Takeaway + +Every git repository tells a story. {p['name']}'s story is one of {'rapid iteration' if round(p['total_commits']/max(p['active_days'],1), 1) > 10 else 'careful craftsmanship'}, {'bold pivots' if p['era_count'] > 3 else 'steady direction'}, and the kind of development velocity that {'only AI-assisted workflows can achieve' if round(p['total_commits']/max(p['active_days'],1), 1) > 15 else 'comes from focused, intentional work'}. +""" + + +def content_excavation_report(p: dict) -> str: + return f"""# Excavation Report — {p['name']} + +*Date: {datetime.now().strftime("%Y-%m-%d")}* + +## Executive Summary + +Archaeological mining of **{p['name']}** reveals {p['total_commits']} commits across {p['era_count']} development eras. + +## Findings by Era + +{"".join(f"### Era {e.get('id', '?')}: {e.get('name', '?')}\n- **Period**: {e.get('dates', '?')}\n- **Commits**: {e.get('commits', '?')}\n- **Summary**: {e.get('description', 'N/A')}\n- **Key events**: {', '.join(str(x) for x in e.get('key_events', [])[:5])}\n\n" for e in p['eras'])} + +## Statistical Summary + +| Metric | Value | +|--------|-------| +| Total commits | {p['total_commits']} | +| Active days | {p['active_days']} | +| Development span | {p['span_days']} days | +| Eras identified | {p['era_count']} | +| Peak day | {p['peak_day']} | +| Peak day commits | {p['peak_day_commits']} | +| Commits/active day | {round(p['total_commits']/max(p['active_days'],1), 1)} | + +## Artifacts Cataloged + +{"".join(f"- **{k}**: {v} commits ({round(v/max(p['total_commits'],1)*100, 1)}%)\n" for k, v in sorted(p['commit_types'].items(), key=lambda x: -x[1]))} +""" + + +def content_story_circle_sample(p: dict) -> str: + return f"""# The Story of {p['name']} + +{p['eras'][0]['description'] if p['eras'] else 'A project begins.'} + +That's how it started. Not with a grand plan, but with a commit message and the conviction that something needed to exist. + +--- + +{" ".join(f'**{e.get("name", f"Era {e.get('id', '?')}")}** — {e.get("commits", "?")} commits in {e.get("dates", "?")}. {e.get("description", "The work continued.")}' for e in p['eras'])} + +--- + +{p['peak_day_commits']} commits in a single day. That was the peak — {p['peak_day']}. When everything clicked and the code just flowed. + +In the end, it was {p['total_commits']} commits. {p['active_days']} active days. A lifespan of {p['lifespan']}. And {'a story still being written' if p['eras'] else 'a complete archaeological record'}. + +The repository remembers everything. +""" + + +def content_twitter_thread(p: dict) -> str: + return f"""# Twitter Thread — {p['name']} + +1/ I just mined the complete git history of {p['name']} — {p['total_commits']} commits across {p['active_days']} days. What I found was fascinating. + +2/ The project went through {p['era_count']} distinct phases. {"Here's each one:" if p['era_count'] > 1 else "Here's what happened:"} + +{" 3/" + chr(10).join(f" {i+4}/ **{e.get('name', f'Era {e.get('id', '?')}')}** ({e.get('dates', '?')}): {e.get('commits', '?')} commits. {e.get('description', '')[:80]}" for i, e in enumerate(p['eras']))} + +{len(p['eras'])+4}/ Peak velocity: {p['peak_day_commits']} commits on {p['peak_day']}. That's {round(p['peak_day_commits']/24, 1)} commits per hour. + +{len(p['eras'])+5}/ The commit type breakdown: {p['commit_types'].get('feat', 0)} features, {p['commit_types'].get('fix', 0)} fixes, {p['commit_types'].get('test', 0)} tests. {'Healthy ratio.' if p['commit_types'].get('feat', 0) > p['commit_types'].get('fix', 0) else 'Lots of iteration.'} + +{len(p['eras'])+6}/ The most interesting finding? {round(p['active_days']/max(p['span_days'],1)*100, 0)}% of days were active. {'Consistent development.' if round(p['active_days']/max(p['span_days'],1)*100, 0) > 50 else 'Burst-heavy development pattern.'} + +{len(p['eras'])+7}/ Every git repo tells a story. {p['name']}'s story is {'one of rapid, AI-assisted creation' if round(p['total_commits']/max(p['active_days'],1), 1) > 10 else 'one of careful, intentional development'}. + +{len(p['eras'])+8}/ Want to see your project's archaeological record? Check out dev-archaeology. +""" + + +def content_project_narrative(p: dict) -> str: + return f"""# Project Narrative — {p['name']} + +## The Beginning + +{p['eras'][0]['description'] if p['eras'] else 'The project began.'} Over the course of {p['lifespan']}, {p['name']} would accumulate {p['total_commits']} commits from {len(p['contributors'])} developer(s). + +## The Eras + +{"".join(f"## {e.get('name', f'Era {e.get('id', '?')}')} ({e.get('dates', '?')})\n\n{e.get('commits', '?')} commits shaped this era.\n\n{e.get('description', '')}\n\nKey milestones:\n" + "".join(f"- {evt}\n" for evt in e.get('key_events', [])[:6]) + "\n" for e in p['eras'])} + +## The Numbers Tell the Story + +{p['total_commits']} commits. {p['active_days']} active days. A development density of {round(p['total_commits']/max(p['active_days'],1), 1)} commits per active day. + +The peak came on {p['peak_day']} with {p['peak_day_commits']} commits — a day when everything aligned. + +## What This Project Says About Growth + +{'The multiple era transitions show a developer learning, adapting, and refining their approach.' if p['era_count'] > 2 else 'The focused development shows clarity of purpose from the start.'} {'The commit types reveal a project that values features and fixing in roughly equal measure.' if abs(p['commit_types'].get('feat', 0) - p['commit_types'].get('fix', 0)) < p['total_commits'] * 0.3 else 'The feature-heavy commit pattern shows a project in active creation mode.'} +""" + + +def content_ai_collaboration(p: dict) -> str: + return f"""# AI Collaboration Analysis — {p['name']} + +## Overview + +Analysis of human-AI collaboration patterns in **{p['name']}**. + +**Total commits**: {p['total_commits']} +**Eras**: {p['era_count']} + +## Evidence of AI-Assisted Development + +All KyaniteLabs projects use AI-assisted development. Key indicators: +- **Velocity**: {round(p['total_commits']/max(p['active_days'],1), 1)} commits/day is {'consistent with AI-assisted workflows' if round(p['total_commits']/max(p['active_days'],1), 1) > 5 else 'typical for mixed workflows'} +- **Commit patterns**: {'High feature count suggests AI pair programming' if p['commit_types'].get('feat', 0) > 20 else 'Moderate feature count'} +- **Peak intensity**: {p['peak_day_commits']} commits in one day requires {'AI assistance' if p['peak_day_commits'] > 20 else 'focused work'} + +## Collaboration Patterns by Era + +{"".join(f"### {e.get('name', f'Era {e.get('id', '?')}')}\n- Commits: {e.get('commits', '?')}\n- Intensity: {'High' if e.get('commits', 0) > 30 else 'Medium' if e.get('commits', 0) > 10 else 'Low'}\n\n" for e in p['eras'])} + +## Lessons + +1. **Velocity scales with AI assistance** — peak days show what's possible +2. **Quality requires intention** — fix and test commits need deliberate focus +3. **Era transitions are natural** — they represent learning and adaptation +""" + + +def content_dev_rhythm(p: dict) -> str: + return f"""# Development Rhythm Analysis — {p['name']} + +## Rhythm Profile + +**{p['name']}** shows a {'burst-heavy' if p['active_days']/max(p['span_days'],1) < 0.5 else 'consistent'} development rhythm. + +| Metric | Value | +|--------|-------| +| Total commits | {p['total_commits']} | +| Active days | {p['active_days']} | +| Total span | {p['span_days']} days | +| Active ratio | {round(p['active_days']/max(p['span_days'],1)*100, 0)}% | +| Avg commits/active day | {round(p['total_commits']/max(p['active_days'],1), 1)} | +| Peak day | {p['peak_day']} ({p['peak_day_commits']} commits) | + +## Era Velocity + +{"".join(f"- **{e.get('name', f'Era {e.get('id', '?')}')}**: {e.get('commits', '?')} commits in {e.get('dates', '?')}\n" for e in p['eras'])} + +## Pattern Analysis + +- **Development style**: {'Sprint-oriented' if p['era_count'] > 3 else 'Steady-paced'} +- **Gaps**: {len(p['gaps'])} gap(s) detected — {'common in side projects' if len(p['gaps']) > 2 else 'minimal interruptions'} +- **Sustainability**: {'High velocity — watch for burnout' if round(p['total_commits']/max(p['active_days'],1), 1) > 20 else 'Sustainable pace'} +""" + + +def content_tech_decisions(p: dict) -> str: + return f"""# Technical Decisions Log — {p['name']} + +## Overview + +Key technical decisions visible in **{p['name']}**'s commit history. + +**Total commits**: {p['total_commits']} +**Commit types**: {', '.join(f'{k} ({v})' for k, v in sorted(p['commit_types'].items(), key=lambda x: -x[1])[:6])} + +## Era-by-Era Decisions + +{"".join(f"### {e.get('name', f'Era {e.get('id', '?')}')} ({e.get('dates', '?')})\n\n{e.get('description', 'Development phase.')}\n\n**Key decisions:**\n" + "".join(f"- {evt}\n" for evt in e.get('key_events', [])[:5]) + "\n" for e in p['eras'])} + +## Architecture Evolution + +The project evolved through {p['era_count']} distinct phases, each representing a shift in development focus and technical direction. + +## Quality Indicators + +- Test commits: {p['commit_types'].get('test', 0)} ({round(p['commit_types'].get('test', 0)/max(p['total_commits'],1)*100, 1)}%) +- Refactor commits: {p['commit_types'].get('refactor', 0)} ({round(p['commit_types'].get('refactor', 0)/max(p['total_commits'],1)*100, 1)}%) +- CI commits: {p['commit_types'].get('ci', 0)} ({round(p['commit_types'].get('ci', 0)/max(p['total_commits'],1)*100, 1)}%) +""" + + +def content_era_deep_dive(p: dict) -> str: + return f"""# Era Deep-Dive — {p['name']} + +## Overview + +Detailed analysis of each development era in **{p['name']}**. + +**{p['total_commits']} commits** across **{p['era_count']} eras** over **{p['lifespan']}**. + +--- + +{"".join(f"""## Era {e.get('id', '?')}: {e.get('name', '?')} + +**Dates**: {e.get('dates', '?')} +**Commits**: {e.get('commits', '?')} +**Active days**: {len(e.get('daily', {}))} +**Description**: {e.get('description', 'N/A')} + +### Key Events +{"".join(f'- {evt}\n' for evt in e.get('key_events', [])[:8])} + +### Daily Commit Distribution +{', '.join(f'{k}: {v}' for k, v in list(e.get('daily', {}).items())[:10])} + +--- + +""" for e in p['eras'])} + +## Era Transitions + +{"".join(f"- Era {p['eras'][i].get('id', '?')} → Era {p['eras'][i+1].get('id', '?')}: {p['eras'][i].get('name', '?')} to {p['eras'][i+1].get('name', '?')}\n" for i in range(len(p['eras'])-1))} + +## Summary + +The {p['era_count']} eras of {p['name']} represent {'a clear evolution in development focus and approach' if p['era_count'] > 2 else 'focused, consistent development'}. +""" + + +# ── Video files ─────────────────────────────────────────────────────────── + +def video_script_outline(p: dict) -> str: + return f"""# Video Script Outline — {p['name']} + +## Opening Hook (30 seconds) + +"What if you could read the entire story of a software project — not from documentation, but from its git history?" + +**{p['name']}**: {p['total_commits']} commits. {p['era_count']} eras. One story. + +## Section 1: The Setup (60 seconds) + +- Show the project stats: {p['total_commits']} commits, {p['active_days']} active days +- Introduce the concept of development archaeology +- Visual: commit timeline chart + +## Section 2: The Eras (90 seconds each) + +{"".join(f"### {e.get('name', f'Era {e.get('id', '?')}')} ({e.get('dates', '?')})\n- {e.get('commits', '?')} commits\n- {e.get('description', '')[:100]}\n- Visual: era strip with highlight\n\n" for e in p['eras'])} + +## Section 3: Key Findings (60 seconds) + +- Peak day: {p['peak_day']} with {p['peak_day_commits']} commits +- Commit type breakdown: {', '.join(f'{k} ({v})' for k, v in sorted(p['commit_types'].items(), key=lambda x: -x[1])[:4])} +- What the data reveals about development patterns + +## Closing (30 seconds) + +"The code remembers everything. We just have to look." + +CTA: Try dev-archaeology on your own projects + +*Estimated total runtime: 5-8 minutes* +""" + + +# ── Master generator ────────────────────────────────────────────────────── + +FILE_GENERATORS = { + # Analysis MD + "deliverables/analysis/analysis-sdlc-gap-finder.md": analysis_sdlc_gap_finder, + "deliverables/analysis/analysis-ml-pattern-mapper.md": analysis_ml_pattern_mapper, + "deliverables/analysis/analysis-formal-terms-mapper.md": analysis_formal_terms_mapper, + "deliverables/analysis/analysis-source-archaeologist.md": analysis_source_archaeologist, + "deliverables/analysis/analysis-youtube-correlator.md": analysis_youtube_correlator, + # Reports + "deliverables/reports/CROSS-REPO-NARRATIVE.md": report_cross_repo_narrative, + "deliverables/reports/raw-narrative.md": report_raw_narrative, + # Strategy + "deliverables/strategy/ADVERSARIAL-ANALYSIS.md": strategy_adversarial, + "deliverables/strategy/AGENT-BENCHMARK-REPORT.md": strategy_agent_benchmark, + "deliverables/strategy/VALUE-CHAIN-ANALYSIS.md": strategy_porter_value_chain, + "deliverables/strategy/SWOT-ANALYSIS.md": strategy_swot_analysis, + "deliverables/strategy/WARDLEY-MAP.md": strategy_wardley_map, + "deliverables/strategy/BCG-MATRIX.md": strategy_bcg_matrix, + "deliverables/strategy/ANSOFF-MATRIX.md": strategy_ansoff_matrix, + "deliverables/strategy/BLUE-OCEAN.md": strategy_blue_ocean, + # Planning + "deliverables/planning/REMEDIATION_SUMMARY.md": planning_remediation_summary, + "deliverables/planning/external-data-sources-research.md": planning_external_data, + "deliverables/planning/META-PATTERN-VISUALIZATION-RESEARCH.md": planning_meta_pattern, + # Learning + "deliverables/learning/ML-LEARNING-PLAN.md": learning_ml_plan, + "deliverables/learning/RECURSIVE-STORY-CIRCLE.md": learning_story_circle, + # Content + "deliverables/content/blog-draft.md": content_blog_draft, + "deliverables/content/excavation-report.md": content_excavation_report, + "deliverables/content/STORY-CIRCLE-SAMPLE.md": content_story_circle_sample, + "deliverables/content/twitter-thread.md": content_twitter_thread, + "deliverables/content/project-narrative.md": content_project_narrative, + "deliverables/content/ai-collaboration-analysis.md": content_ai_collaboration, + "deliverables/content/development-rhythm-analysis.md": content_dev_rhythm, + "deliverables/content/technical-decisions-log.md": content_tech_decisions, + "deliverables/content/era-deep-dive.md": content_era_deep_dive, + # Video + "deliverables/video/video-script-outline.md": video_script_outline, +} + + +def generate_for_project(project_name: str) -> int: + print(f"\n{'='*50}") + print(f"Generating: {project_name}") + print(f"{'='*50}") + + proj = load_project(project_name) + pdir = proj["pdir"] + generated = [] + + for rel_path, gen_func in FILE_GENERATORS.items(): + target = pdir / rel_path + if target.exists(): + continue + content = gen_func(proj) + write_file(target, content, generated) + + print(f" Generated {len(generated)} new files") + return len(generated) + + +def main(): + if len(sys.argv) < 2: + print("Usage: generate_template_deliverables.py | --all") + sys.exit(1) + + total = 0 + if sys.argv[1] == "--all": + for name in ["Achiote", "DECLuTTER-AI", "DialectOS", "Epoch", "Fugax", "mcp-video", "openglaze", "liminal"]: + total += generate_for_project(name) + else: + total += generate_for_project(sys.argv[1]) + + print(f"\nTotal: {total} new files generated") + + +if __name__ == "__main__": + main() diff --git a/scripts/data/refresh_data.py b/scripts/data/refresh_data.py index f39f217..be3ecbb 100644 --- a/scripts/data/refresh_data.py +++ b/scripts/data/refresh_data.py @@ -2,15 +2,14 @@ """ Dev-Archaeology: Incremental Data Refresh ========================================== -Mines a git repo and updates data.json incrementally. +Mines the Liminal git repo and updates data.json incrementally. Adds new dates without destroying historical analysis. Usage: - python3 refresh_data.py # Full refresh (uses DEFAULT_PRIMARY_PROJECT) - python3 refresh_data.py --primary-project myproject # Use specific project as primary + python3 refresh_data.py # Full refresh python3 refresh_data.py --sections meta,commits,hourly # Partial refresh - python3 refresh_data.py --dry-run # Show what would change - python3 refresh_data.py --repo /path/to/repo # Custom repo path + python3 refresh_data.py --dry-run # Show what would change + python3 refresh_data.py --repo /path/to/repo # Custom repo path Design principles: - Existing data.json dates are PRESERVED (only appended to) @@ -30,10 +29,9 @@ # ─── Configuration ────────────────────────────────────────────────────────────── -DEFAULT_PRIMARY_PROJECT = "liminal" # Override with --primary-project DEFAULT_REPO = Path("/Users/simongonzalezdecruz/Desktop/OMC/liminal") -DEFAULT_DATA_JSON = Path(__file__).parent / "projects" / DEFAULT_PRIMARY_PROJECT / "deliverables" / "data.json" -DEFAULT_ERAS_JSON = Path(__file__).parent / "projects" / DEFAULT_PRIMARY_PROJECT / "data" / "commit-eras.json" +DEFAULT_DATA_JSON = Path(__file__).parent / "projects" / "liminal" / "deliverables" / "data.json" +DEFAULT_ERAS_JSON = Path(__file__).parent / "projects" / "liminal" / "data" / "commit-eras.json" ALL_SECTIONS = [ "meta", "commits", "hourly", "types", "authors", @@ -1173,15 +1171,15 @@ def update_sentiment(data: dict, repo: Path, dry_run: bool) -> list[str]: return changes -def update_cross_repo(data: dict, repo: Path, dry_run: bool, primary_project: str = "primary") -> list[str]: +def update_cross_repo(data: dict, repo: Path, dry_run: bool) -> list[str]: """Update cross_repo_velocity_correlation section.""" changes = [] crc = data.get("derived_patterns", {}).get("cross_repo_velocity_correlation", {}) - # Get daily commits for primary project + # Get daily commits for liminal daily = extract_daily_commits(repo) - # Update daily_data (it's a list of dicts with date, primary, other_repos, total) + # Update daily_data (it's a list of dicts with date, liminal, other_repos, total) if "daily_data" in crc and isinstance(crc["daily_data"], list): daily_data = crc["daily_data"] @@ -1189,20 +1187,20 @@ def update_cross_repo(data: dict, repo: Path, dry_run: bool, primary_project: st existing_by_date = {entry.get("date"): entry for entry in daily_data if isinstance(entry, dict)} # Update or add entries for each date - for date, primary_count in daily.items(): + for date, liminal_count in daily.items(): if date in existing_by_date: entry = existing_by_date[date] - if entry.get("primary") != primary_count: - changes.append(f" cross_repo_velocity_correlation.daily_data[{date}].primary: {entry.get('primary')} → {primary_count}") + if entry.get("liminal") != liminal_count: + changes.append(f" cross_repo_velocity_correlation.daily_data[{date}].liminal: {entry.get('liminal')} → {liminal_count}") if not dry_run: - entry["primary"] = primary_count + entry["liminal"] = liminal_count # Update total other = entry.get("other_repos", 0) - entry["total"] = primary_count + other + entry["total"] = liminal_count + other else: # Add new entry - new_entry = {"date": date, "primary": primary_count, "other_repos": 0, "total": primary_count} - changes.append(f" + cross_repo_velocity_correlation.daily_data[{date}]: primary={primary_count}") + new_entry = {"date": date, "liminal": liminal_count, "other_repos": 0, "total": liminal_count} + changes.append(f" + cross_repo_velocity_correlation.daily_data[{date}]: liminal={liminal_count}") if not dry_run: daily_data.append(new_entry) @@ -1437,7 +1435,6 @@ def main(): parser.add_argument("--sections", help="Comma-separated sections to update (default: all)") parser.add_argument("--dry-run", action="store_true", help="Show changes without writing") parser.add_argument("--list", action="store_true", help="List available sections") - parser.add_argument("--primary-project", default=DEFAULT_PRIMARY_PROJECT, help="Primary project name (used in cross-repo data)") args = parser.parse_args() if args.list: @@ -1480,11 +1477,7 @@ def main(): continue try: - # Cross-repo section needs primary_project parameter - if section == "cross_repo": - changes = fn(data, args.repo, args.dry_run, args.primary_project) - else: - changes = fn(data, args.repo, args.dry_run) + changes = fn(data, args.repo, args.dry_run) if changes: print(f"[{section}] {len(changes)} changes:") for c in changes: diff --git a/scripts/dev-archaeology-dashboard.plist.template b/scripts/dev-archaeology-dashboard.plist.template new file mode 100644 index 0000000..8e89402 --- /dev/null +++ b/scripts/dev-archaeology-dashboard.plist.template @@ -0,0 +1,28 @@ + + + + + Label + com.kyanitelabs.dev-archaeology.dashboard + ProgramArguments + + /opt/homebrew/bin/python3 + -m + archaeology.cli + serve + --no-open + --port + 8099 + + WorkingDirectory + /Users/simongonzalezdecruz/workspaces/dev-archaeology + RunAtLoad + + KeepAlive + + StandardOutPath + /tmp/dev-archaeology-dashboard.log + StandardErrorPath + /tmp/dev-archaeology-dashboard.err + + diff --git a/scripts/hooks/install.sh b/scripts/hooks/install.sh index dc403de..f4081cd 100755 --- a/scripts/hooks/install.sh +++ b/scripts/hooks/install.sh @@ -1,24 +1,46 @@ -#!/bin/bash -# Install git hooks by symlinking them into .git/hooks/ +#!/usr/bin/env bash +# +# Install git hooks for dev-archaeology +# Symlinks hooks from scripts/hooks/ to .git/hooks/ +# set -e SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" -HOOKS_DIR="$SCRIPT_DIR" -GIT_HOOKS_DIR="$REPO_ROOT/.git/hooks" - -echo "Installing git hooks..." - -# Create symlinks for each hook -for hook in pre-commit pre-push; do - if [ -f "$HOOKS_DIR/$hook" ]; then - chmod +x "$HOOKS_DIR/$hook" - ln -sf "$HOOKS_DIR/$hook" "$GIT_HOOKS_DIR/$hook" - echo "✓ Linked $hook" - else - echo "⚠ Hook $hook not found, skipping" +HOOKS_SRC="$SCRIPT_DIR" +HOOKS_DST="$REPO_ROOT/.git/hooks" + +echo "📦 Installing git hooks for dev-archaeology..." +echo "" + +# Ensure hooks directory exists +mkdir -p "$HOOKS_DST" + +# List of hooks to install +HOOKS=("pre-commit" "pre-push") + +for hook in "${HOOKS[@]}"; do + src="$HOOKS_SRC/$hook" + dst="$HOOKS_DST/$hook" + + # Remove existing hook if present (idempotent) + if [ -L "$dst" ]; then + echo "🔄 Removing existing symlink: $hook" + rm "$dst" + elif [ -f "$dst" ]; then + echo "⚠️ Backing up existing hook: $hook → $hook.bak" + mv "$dst" "$dst.bak" fi + + # Create symlink + ln -s "$src" "$dst" + echo "✓ Installed: $hook" done -echo "Git hooks installed successfully" +echo "" +echo "✅ Git hooks installed successfully" +echo "" +echo "Active hooks:" +echo " • pre-commit – Era scanner + framework sync reminder" +echo " • pre-push – Audit + parity check (blocks on failure)" diff --git a/scripts/hooks/pre-commit b/scripts/hooks/pre-commit index 368260f..5e13429 100755 --- a/scripts/hooks/pre-commit +++ b/scripts/hooks/pre-commit @@ -1,39 +1,67 @@ -#!/bin/bash -# Pre-commit hook: syntax check Python files and verify archaeology package imports +#!/usr/bin/env bash +# +# Pre-commit hook for dev-archaeology +# Runs era scanner on deliverable changes and reminds about framework sync +# set -e -RED='\033[0;31m' -GREEN='\033[0;32m' -NC='\033[0m' # No Color +# Get list of staged files +STAGED_FILES=$(git diff --cached --name-only --diff-filter=ACM) -# Get list of staged .py files -PY_FILES=$(git diff --cached --name-only --diff-filter=ACM | grep '\.py$' || true) +# Check if any deliverable files are staged +DELIVERABLES_CHANGED=$(echo "$STAGED_FILES" | grep -E '^projects/[^/]+/deliverables/' || true) -if [ -z "$PY_FILES" ]; then - exit 0 -fi +if [ -n "$DELIVERABLES_CHANGED" ]; then + echo "📜 Deliverable files staged, running era scanner..." -echo "Checking Python syntax..." + # Determine project directory from first staged deliverable + FIRST_FILE=$(echo "$DELIVERABLES_CHANGED" | head -1) + PROJECT_DIR=$(echo "$FIRST_FILE" | sed -E 's|^(projects/[^/]+).*|\1|') -# Check syntax of all staged Python files -for file in $PY_FILES; do - if ! python3 -m py_compile "$file" 2>/dev/null; then - echo -e "${RED}✗ Syntax error in $file${NC}" - exit 1 - fi -done + # Locate commit-eras.json (could be in project dir or project/data/) + ERAS_FILE="" + for candidate in "$PROJECT_DIR/commit-eras.json" "$PROJECT_DIR/data/commit-eras.json"; do + if [ -f "$candidate" ]; then + ERAS_FILE="$candidate" + break + fi + done + + if [ -z "$ERAS_FILE" ]; then + echo "⚠️ No commit-eras.json found for $PROJECT_DIR — skipping era scan" + else + # Run era scanner via Python module + FINDING_COUNT=$(python3 -c " +import sys +sys.path.insert(0, '.') +from pathlib import Path +from archaeology.era_scanner import scan_deliverables +from archaeology.era_mapper import load_eras +eras = load_eras(Path('$ERAS_FILE')) +result = scan_deliverables(Path('$PROJECT_DIR'), eras) +print(len(result.refs)) +" 2>&1) || { + echo "⚠️ Era scanner error. Output: $FINDING_COUNT" + exit 1 + } -echo -e "${GREEN}✓ All Python files pass syntax check${NC}" + if [ "$FINDING_COUNT" != "0" ]; then + echo "⚠️ Era scanner found $FINDING_COUNT issues. Run the scanner to review." + exit 1 + fi -# If any files in archaeology/ are staged, verify the package still imports -if echo "$PY_FILES" | grep -q '^archaeology/'; then - echo "Verifying archaeology package imports..." - if ! python3 -c "import archaeology; print('OK')" 2>/dev/null; then - echo -e "${RED}✗ archaeology package import failed${NC}" - exit 1 + echo "✓ Era scanner passed (0 findings)" fi - echo -e "${GREEN}✓ archaeology package imports successfully${NC}" +fi + +# Check if archaeology/ package files are staged +ARCHAEOLGY_CHANGED=$(echo "$STAGED_FILES" | grep -E '^archaeology/' || true) + +if [ -n "$ARCHAEOLGY_CHANGED" ]; then + echo "" + echo "⚠️ archaeology/ package changed — remember to sync devarch-framework" + echo " Run: python3 scripts/sync/check_parity.py after syncing" fi exit 0 diff --git a/scripts/hooks/pre-push b/scripts/hooks/pre-push index f096daf..334345e 100755 --- a/scripts/hooks/pre-push +++ b/scripts/hooks/pre-push @@ -1,34 +1,24 @@ -#!/bin/bash -# Pre-push hook: run demo and test suite to verify basic functionality +#!/usr/bin/env bash +# +# Pre-push hook for devarch-framework +# Blocks push if audit fails or parity is broken +# set -e -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[0;33m' -NC='\033[0m' # No Color +echo "🔍 Running pre-push checks..." +echo "" -echo "Running pre-push checks..." - -# Run demo project generation and build -echo "Building demo project..." -if ! python3 -m archaeology.cli demo --force --build-db > /dev/null 2>&1; then - echo -e "${RED}✗ Demo project build failed${NC}" +# Run audit on demo project with fail-on HIGH +echo "1️⃣ Running audit (HIGH severity blocks push)..." +if ! python3 -m archaeology.cli audit demo-archaeology --fail-on HIGH; then + echo "" + echo "❌ Audit failed with HIGH or CRITICAL findings" + echo " Please fix all blocking issues before pushing" exit 1 fi -echo -e "${GREEN}✓ Demo project builds successfully${NC}" - -# Run test suite if tests exist -if [ -d "tests" ] && [ "$(ls -A tests/*.py 2>/dev/null)" ]; then - echo "Running test suite..." - if ! python3 -m pytest tests/ -x -q; then - echo -e "${RED}✗ Tests failed${NC}" - exit 1 - fi - echo -e "${GREEN}✓ All tests passed${NC}" -else - echo -e "${YELLOW}⚠ No tests found, skipping test suite${NC}" -fi +echo "✓ Audit passed" +echo "" -echo -e "${GREEN}✓ Pre-push checks passed${NC}" +echo "✅ All pre-push checks passed" exit 0 diff --git a/scripts/integrations/README.md b/scripts/integrations/README.md new file mode 100644 index 0000000..5e67079 --- /dev/null +++ b/scripts/integrations/README.md @@ -0,0 +1,355 @@ +# Dev-Archaeology Integration Hooks + +This directory contains integration hooks for external tools to trigger dev-archaeology analysis. + +## Scout Hook + +The `scout_hook.py` script allows external tools (research-scout, CI/CD, etc.) to automatically trigger archaeological analysis on discovered repositories. + +### Features + +- **Multiple input modes**: CLI arguments, JSON stdin, or programmatic Python calls +- **Automatic cloning**: Clones repositories from URLs to temporary directories +- **Full pipeline execution**: init → mine → build-db → signals → analyze +- **Structured JSON output**: Returns status, metrics, and artifact paths +- **Graceful error handling**: Continues through non-critical failures (signals, analysis) +- **Automatic cleanup**: Removes temporary clones by default + +### Usage + +#### CLI Mode (Repository URL) + +```bash +python3 scripts/integrations/scout_hook.py \ + --repo-url https://github.com/user/repo \ + --project-name my-project +``` + +#### CLI Mode (Local Repository) + +```bash +python3 scripts/integrations/scout_hook.py \ + --repo-path /path/to/local/repo \ + --project-name my-project +``` + +#### Stdin Mode (JSON Input) + +```bash +echo '{"url": "https://github.com/user/repo", "name": "my-project"}' | \ + python3 scripts/integrations/scout_hook.py --stdin +``` + +#### Keep Cloned Repository + +```bash +python3 scripts/integrations/scout_hook.py \ + --repo-url https://github.com/user/repo \ + --project-name my-project \ + --keep +``` + +#### Custom Clone Directory + +```bash +python3 scripts/integrations/scout_hook.py \ + --repo-url https://github.com/user/repo \ + --project-name my-project \ + --clone-dir /tmp/archaeology-clones +``` + +### Input Format + +#### CLI Arguments + +- `--repo-url`: Repository URL to clone and analyze +- `--repo-path`: Local repository path (skips cloning) +- `--project-name`: Name for the archaeology project (required) +- `--clone-dir`: Directory for cloned repos (default: temp dir) +- `--keep`: Keep cloned repository after analysis +- `--stdin`: Read input as JSON from stdin + +#### JSON Stdin Format + +```json +{ + "url": "https://github.com/user/repo", + "path": "/path/to/local/repo", + "name": "my-project", + "keep": false, + "clone_dir": "/tmp/archaeology-clones" +} +``` + +Either `url` or `path` must be provided. `name` is required. + +### Output Format + +The script outputs JSON to stdout with the following structure: + +```json +{ + "project_name": "my-project", + "repo_path": "/path/to/repo", + "repo_url": "https://github.com/user/repo", + "status": "complete", + "steps": { + "init": { + "status": "success", + "message": "Created project 'my-project' at projects/my-project/" + }, + "mine": { + "status": "success", + "message": "Extracted 123 commits to projects/my-project/data/github-commits.csv" + }, + "build_db": { + "status": "success", + "message": "Database built at projects/my-project/data/archaeology.db" + }, + "signals": { + "status": "success", + "message": "Detected 5 signals across 3 clusters." + }, + "analyze": { + "status": "success", + "message": " sdlc-gap-finder: projects/my-project/deliverables/analysis-sdlc-gap-finder.json\n ..." + } + }, + "metrics": { + "commit_count": 123, + "db_built": true, + "signal_count": 5, + "analysis_count": 6 + }, + "artifacts": { + "project_dir": "projects/my-project", + "db_path": "projects/my-project/data/archaeology.db", + "analysis_files": [ + "projects/my-project/deliverables/analysis-sdlc-gap-finder.json", + "projects/my-project/deliverables/analysis-ml-pattern-mapper.json", + "projects/my-project/deliverables/analysis-agentic-workflow.json", + "projects/my-project/deliverables/analysis-formal-terms-mapper.json", + "projects/my-project/deliverables/analysis-source-archaeologist.json", + "projects/my-project/deliverables/analysis-youtube-correlator.json" + ] + } +} +``` + +#### Status Values + +- `complete`: All critical steps succeeded +- `failed`: One or more critical steps failed (init, mine, build-db) +- `error`: Unexpected error occurred +- `running`: Pipeline is still executing (should not appear in final output) + +### Integration with research-scout + +#### Example Configuration + +If research-scout supports webhook or script execution, configure it to call the scout hook: + +```yaml +# research-scout config example +on_repo_discovered: + trigger_archaeology: + script: "/path/to/dev-archaeology/scripts/integrations/scout_hook.py" + args: + - "--repo-url" + - "{{repo_url}}" + - "--project-name" + - "{{repo_name}}" + parse_output: json + on_success: + log: "Archaeology analysis complete: {{output.metrics.commit_count}} commits" + on_failure: + log: "Archaeology analysis failed: {{output.error}}" +``` + +#### Programmatic Integration + +```python +import json +import subprocess + +def analyze_repo(repo_url: str, project_name: str) -> dict: + """Trigger archaeology analysis from research-scout.""" + cmd = [ + "python3", "scripts/integrations/scout_hook.py", + "--repo-url", repo_url, + "--project-name", project_name, + ] + result = subprocess.run( + cmd, + capture_output=True, + text=True, + cwd="/path/to/dev-archaeology", + ) + return json.loads(result.stdout) + +# Usage +result = analyze_repo( + "https://github.com/user/repo", + "my-project" +) +if result["status"] == "complete": + print(f"Analysis complete: {result['metrics']['commit_count']} commits") +else: + print(f"Analysis failed: {result.get('error')}") +``` + +### CI/CD Integration + +#### GitHub Actions Example + +```yaml +name: Archaeology Analysis + +on: + push: + branches: [main] + +jobs: + archaeology: + runs-on: ubuntu-latest + steps: + - name: Checkout dev-archaeology + uses: actions/checkout@v3 + with: + path: dev-archaeology + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + cd dev-archaeology + pip install -e . + + - name: Run archaeology analysis + run: | + python3 scripts/integrations/scout_hook.py \ + --repo-url ${{ github.repositoryUrl }} \ + --project-name ${{ github.event.repository.name }} + + - name: Upload artifacts + uses: actions/upload-artifact@v3 + with: + name: archaeology-results + path: dev-archaeology/projects/*/deliverables/ +``` + +#### GitLab CI Example + +```yaml +archaeology: + script: + - pip install -e . + - python3 scripts/integrations/scout_hook.py + --repo-url $CI_REPOSITORY_URL + --project-name $CI_PROJECT_NAME + artifacts: + paths: + - projects/*/deliverables/ + reports: + archaeology: archaeology-report.json +``` + +### Error Handling + +The script handles errors gracefully: + +- **Critical failures** (init, mine, build-db): Set `status: failed` and exit with code 1 +- **Partial failures** (signals, analyze): Set step status to `partial` but continue +- **Clone failures**: Return error message with details +- **Timeouts**: Each step has a timeout (mine: 10min, build-db: 10min, analyze: 10min) + +### Exit Codes + +- `0`: Success (complete or partial success) +- `1`: Failure (critical step failed or error occurred) + +### Troubleshooting + +#### Repository Not Found + +```json +{ + "status": "failed", + "error": "Repository not found: /path/to/repo" +} +``` + +**Solution**: Verify the repository path or URL is correct. + +#### Clone Timeout + +```json +{ + "status": "failed", + "error": "git clone timed out" +} +``` + +**Solution**: Large repositories may take longer to clone. Consider using a local path or increasing the timeout in the script. + +#### Project Already Exists + +If a project with the same name exists, the init step will fail. Either: +- Use a unique project name +- Delete the existing project directory first +- Modify the script to update existing projects + +### Advanced Usage + +#### Batch Processing + +```bash +# Analyze multiple repos +while read -r url name; do + python3 scripts/integrations/scout_hook.py \ + --repo-url "$url" \ + --project-name "$name" +done < repos.txt +``` + +#### Parallel Processing + +```bash +# Analyze repos in parallel (GNU parallel) +cat repos.txt | parallel -j 4 \ + "python3 scripts/integrations/scout_hook.py \ + --repo-url {1} \ + --project-name {2}" +``` + +#### Custom Analysis Pipeline + +To customize the pipeline steps, edit the `run_full_pipeline()` function in `scout_hook.py`: + +```python +# Skip signals detection +# success, msg, data = detect_signals(project_name) + +# Run specific analysis vectors only +cmd = [sys.executable, "-m", "archaeology.cli", "analyze", project_name, "--vector", "sdlc-gap-finder"] +``` + +## Contributing + +When adding new integration hooks: + +1. Follow the same input/output conventions (JSON stdin/stdout) +2. Include comprehensive error handling +3. Document the hook in this README +4. Add examples for common use cases +5. Test with both URLs and local paths + +## Support + +For issues or questions: +- Open an issue on the dev-archaeology repository +- Check the main dev-archaeology documentation +- Review the CLI help: `archaeology --help` diff --git a/scripts/integrations/research-scout-config.example.yaml b/scripts/integrations/research-scout-config.example.yaml new file mode 100644 index 0000000..beaf3d0 --- /dev/null +++ b/scripts/integrations/research-scout-config.example.yaml @@ -0,0 +1,227 @@ +# Example configuration for wiring research-scout to dev-archaeology +# This shows how research-scout could trigger archaeological analysis on discovered repos + +# research-scout configuration example (pseudo-config) +# Note: research-scout's actual configuration format may differ + +on_repository_discovered: + # Trigger archaeology analysis for repos with significant commit history + if: + - commits_count > 50 + - not: fork + - language_in: [Python, TypeScript, JavaScript, Go, Rust] + + then: + archaeology_analysis: + enabled: true + script_path: "/path/to/dev-archaeology/scripts/integrations/scout_hook.py" + + # Input mapping from research-scout context to scout_hook args + input: + # Use repo URL for cloning + repo_url: "{{repository.url}}" + + # Generate project name from repo owner/name + project_name: "{{repository.owner}}-{{repository.name}}" + + # Optional: keep cloned repos for debugging + keep: false + + # Optional: custom clone directory + clone_dir: "/tmp/archaeology-clones" + + # Output parsing + parse_output: json + + # Success criteria + success_criteria: + status: + - complete + - partial # Accept partial success (some analysis vectors may fail) + + # Actions based on results + on_success: + - log: "✓ Archaeology analysis complete: {{output.metrics.commit_count}} commits analyzed" + - log: " - Signals detected: {{output.metrics.signal_count}}" + - log: " - Analysis vectors: {{output.metrics.analysis_count}}/{{output.metrics.total_vectors}}" + - store_artifacts: + database: "{{output.artifacts.db_path}}" + analysis_files: "{{output.artifacts.analysis_files}}" + - label: + add: "archaeology:analyzed" + + on_failure: + - log: "✗ Archaeology analysis failed: {{output.error}}" + - label: + add: "archaeology:failed" + + # Conditional actions based on findings + on_signals_detected: + if: "{{output.metrics.signal_count}} > 10" + then: + - log: "⚠ High signal activity detected ({{output.metrics.signal_count}} signals)" + - label: + add: "archaeology:high-activity" + + on_quality_gaps: + if: "{{output.analysis.sdlc_gap_finder.gaps}}" + then: + - log: "📋 SDLC gaps found: {{output.analysis.sdlc_gap_finder.gaps}}" + - create_issue: + title: "SDLC Gaps Detected in {{repository.name}}" + body: | + Archaeology analysis found {{output.analysis.sdlc_gap_finder.gaps.length}} SDLC gaps: + + {% for gap in output.analysis.sdlc_gap_finder.gaps %} + - **{{gap.practice}}**: {{gap.status}} (severity: {{gap.severity}}) + Recommendation: {{gap.recommendation}} + {% endfor %} + + Full analysis: {{output.artifacts.analysis_files}} + +# Alternative: Batch processing mode +batch_analysis: + enabled: true + schedule: "0 0 * * 0" # Weekly + + # Find repos analyzed in the last week + repos: + source: "github" + query: "pushed:>7d language:python" + limit: 20 + + # Run archaeology analysis on each + pipeline: + - step: "trigger_archaeology" + script: "/path/to/dev-archaeology/scripts/integrations/scout_hook.py" + parallel: true # Run up to 4 analyses in parallel + max_workers: 4 + + - step: "aggregate_results" + output: "weekly-archaeology-report.json" + + - step: "notify" + slack: + channel: "#archaeology-updates" + message: | + Weekly archaeology analysis complete: + - Repos analyzed: {{aggregate_results.total_repos}} + - Total commits: {{aggregate_results.total_commits}} + - High-signal repos: {{aggregate_results.high_signal_repos}} + - Report: {{aggregate_results.report_url}} + +# Webhook mode: Receive triggers from external systems +webhook: + endpoint: "/webhook/archaeology" + method: POST + + # Expected payload format + payload_schema: + type: object + properties: + repo_url: + type: string + format: uri + project_name: + type: string + keep: + type: boolean + default: false + + # Process webhook + handler: | + import subprocess + import json + + payload = json.loads(request.body) + cmd = [ + "python3", "/path/to/dev-archaeology/scripts/integrations/scout_hook.py", + "--repo-url", payload["repo_url"], + "--project-name", payload["project_name"], + ] + if payload.get("keep"): + cmd.append("--keep") + + result = subprocess.run(cmd, capture_output=True, text=True) + return json.loads(result.stdout) + +# CI/CD integration: GitHub Actions workflow example +github_actions: + workflow: | + name: Archaeology Analysis + + on: + push: + branches: [main] + + jobs: + archaeology: + runs-on: ubuntu-latest + steps: + - name: Checkout dev-archaeology + uses: actions/checkout@v3 + with: + path: dev-archaeology + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + cd dev-archaeology + pip install -e . + + - name: Run archaeology analysis + run: | + python3 scripts/integrations/scout_hook.py \ + --repo-url ${{ github.repositoryUrl }} \ + --project-name ${{ github.event.repository.name }} + + - name: Upload artifacts + uses: actions/upload-artifact@v3 + with: + name: archaeology-results + path: dev-archaeology/projects/*/deliverables/ + +# Example: Python programmatic integration +python_example: | + import json + import subprocess + from pathlib import Path + + def analyze_repo(repo_url: str, project_name: str) -> dict: + \"\"\"Trigger archaeology analysis from research-scout.\"\"\" + cmd = [ + "python3", "scripts/integrations/scout_hook.py", + "--repo-url", repo_url, + "--project-name", project_name, + ] + result = subprocess.run( + cmd, + capture_output=True, + text=True, + cwd="/path/to/dev-archaeology", + ) + return json.loads(result.stdout) + + # Usage + result = analyze_repo( + "https://github.com/user/repo", + "my-project" + ) + + if result["status"] == "complete": + print(f\"✓ Analysis complete: {result['metrics']['commit_count']} commits\") + print(f\" Signals: {result['metrics']['signal_count']}\") + print(f\" Analysis files: {result['metrics']['analysis_count']}\") + + # Check for quality gaps + with open(Path(result["artifacts"]["analysis_files"][0])) as f: + sdlc_data = json.load(f) + for gap in sdlc_data.get("gaps", []): + if gap["severity"] == "HIGH": + print(f\" ⚠ {gap['practice']}: {gap['status']}\") + else: + print(f\"✗ Analysis failed: {result.get('error')}\") diff --git a/scripts/integrations/scout_hook.py b/scripts/integrations/scout_hook.py new file mode 100755 index 0000000..0f8c83c --- /dev/null +++ b/scripts/integrations/scout_hook.py @@ -0,0 +1,498 @@ +#!/usr/bin/env python3 +"""Integration hook for research-scout to trigger dev-archaeology analysis. + +This script allows external tools (research-scout, CI/CD, etc.) to trigger +archaeological analysis on discovered repositories. + +Usage: + # CLI mode + python3 scout_hook.py --repo-url https://github.com/user/repo --project-name my-project + + # Stdin mode + echo '{"url": "https://github.com/user/repo", "name": "my-project"}' | python3 scout_hook.py --stdin + + # Local repo + python3 scout_hook.py --repo-path /path/to/repo --project-name my-project + +Output: + JSON to stdout with status, metrics, and artifact paths +""" + +from __future__ import annotations + +import argparse +import json +import os +import shutil +import subprocess +import sys +import tempfile +from pathlib import Path +from typing import Any + + +def log_error(msg: str) -> None: + """Write error to stderr for non-JSON logging.""" + print(f"[ERROR] {msg}", file=sys.stderr) + + +def log_info(msg: str) -> None: + """Write info to stderr for progress logging.""" + print(f"[INFO] {msg}", file=sys.stderr) + + +def run_command(cmd: list[str], check: bool = True, timeout: int = 300) -> subprocess.CompletedProcess[str]: + """Run a command and return the result.""" + log_info(f"Running: {' '.join(cmd)}") + result = subprocess.run( + cmd, + capture_output=True, + text=True, + check=check, + timeout=timeout, + ) + if result.stderr: + log_info(f"stderr: {result.stderr.strip()}") + return result + + +def clone_repo(url: str, clone_dir: str) -> tuple[bool, str]: + """Clone a repository to a temporary directory. + + Returns: + (success, path_or_error) + """ + try: + log_info(f"Cloning {url} to {clone_dir}") + result = run_command( + ["git", "clone", "--depth", "1", url, clone_dir], + check=False, + timeout=600, + ) + if result.returncode != 0: + return False, f"git clone failed: {result.stderr}" + return True, clone_dir + except subprocess.TimeoutExpired: + return False, "git clone timed out" + except Exception as e: + return False, f"clone error: {e}" + + +def init_project(project_name: str, description: str, repo_url: str) -> tuple[bool, str, dict[str, Any]]: + """Initialize a new archaeology project. + + Returns: + (success, message, result_dict) + """ + try: + log_info(f"Initializing project '{project_name}'") + cmd = [ + sys.executable, "-m", "archaeology.cli", + "init", project_name, + "--description", description, + "--repo-url", repo_url, + ] + result = run_command(cmd, check=False) + if result.returncode != 0: + return False, f"init failed: {result.stderr}", {} + + project_dir = os.path.join("projects", project_name) + return True, project_dir, {"project_dir": project_dir} + except Exception as e: + return False, f"init error: {e}", {} + + +def mine_repo(repo_path: str, project_name: str) -> tuple[bool, str, dict[str, Any]]: + """Extract git data from repository. + + Returns: + (success, message, result_dict) + """ + try: + log_info(f"Mining git data from {repo_path}") + cmd = [ + sys.executable, "-m", "archaeology.cli", + "mine", repo_path, + "--project", project_name, + ] + result = run_command(cmd, check=False, timeout=600) + if result.returncode != 0: + return False, f"mine failed: {result.stderr}", {} + + # Parse commit count from output + commit_count = 0 + for line in result.stdout.split("\n"): + if "Extracted" in line and "commits" in line: + try: + commit_count = int(line.split()[1]) + except (ValueError, IndexError): + pass + + return True, result.stdout, {"commit_count": commit_count} + except subprocess.TimeoutExpired: + return False, "mine timed out", {} + except Exception as e: + return False, f"mine error: {e}", {} + + +def build_database(project_name: str) -> tuple[bool, str, dict[str, Any]]: + """Build SQLite database from extracted data. + + Returns: + (success, message, result_dict) + """ + try: + log_info(f"Building database for '{project_name}'") + cmd = [ + sys.executable, "-m", "archaeology.cli", + "build-db", project_name, + ] + result = run_command(cmd, check=False, timeout=600) + if result.returncode != 0: + return False, f"build-db failed: {result.stderr}", {} + + db_path = os.path.join("projects", project_name, "data", "archaeology.db") + exists = os.path.exists(db_path) + return True, result.stdout, {"db_path": db_path, "db_exists": exists} + except subprocess.TimeoutExpired: + return False, "build-db timed out", {} + except Exception as e: + return False, f"build-db error: {e}", {} + + +def detect_signals(project_name: str) -> tuple[bool, str, dict[str, Any]]: + """Detect development signals. + + Returns: + (success, message, result_dict) + """ + try: + log_info(f"Detecting signals for '{project_name}'") + cmd = [ + sys.executable, "-m", "archaeology.cli", + "signals", project_name, + ] + result = run_command(cmd, check=False, timeout=300) + if result.returncode != 0: + # Signals might fail if no patterns found - not critical + log_info(f"Signals detection returned non-zero: {result.stderr}") + + # Try to parse signal count + signal_count = 0 + for line in result.stdout.split("\n"): + if "Detected" in line and "signals" in line: + try: + signal_count = int(line.split()[1]) + except (ValueError, IndexError): + pass + + return True, result.stdout, {"signal_count": signal_count} + except subprocess.TimeoutExpired: + return False, "signals timed out", {} + except Exception as e: + return False, f"signals error: {e}", {} + + +def run_analysis(project_name: str) -> tuple[bool, str, dict[str, Any]]: + """Run analysis vectors. + + Returns: + (success, message, result_dict) + """ + try: + log_info(f"Running analysis vectors for '{project_name}'") + cmd = [ + sys.executable, "-m", "archaeology.cli", + "analyze", project_name, + ] + result = run_command(cmd, check=False, timeout=600) + + # Parse analysis outputs from files created (more reliable than parsing stdout) + deliverables_dir = os.path.join("projects", project_name, "deliverables") + analysis_files = [] + if os.path.exists(deliverables_dir): + for f in os.listdir(deliverables_dir): + if f.startswith("analysis-") and f.endswith(".json"): + file_path = os.path.join(deliverables_dir, f) + # Only count files created recently (within last minute) + if os.path.exists(file_path): + import time + mtime = os.path.getmtime(file_path) + if time.time() - mtime < 120: # Created within last 2 minutes + analysis_files.append(file_path) + + # Determine success: at least one analysis file created = partial success + success_count = len(analysis_files) + if success_count == 0: + return False, f"analyze failed: {result.stderr}", {} + + # Check if any vectors failed from stdout + failed_count = result.stdout.count("ERROR:") + total_vectors = 6 # Known vector count + status = "success" if failed_count == 0 else "partial" + + return True, result.stdout, { + "analysis_count": success_count, + "analysis_files": analysis_files, + "failed_vectors": failed_count, + "total_vectors": total_vectors, + } + except subprocess.TimeoutExpired: + return False, "analyze timed out", {} + except Exception as e: + return False, f"analyze error: {e}", {} + + +def run_full_pipeline( + repo_path: str, + project_name: str, + repo_url: str | None = None, + keep_clone: bool = False, +) -> dict[str, Any]: + """Run the complete archaeological pipeline. + + Args: + repo_path: Path to repository (local or cloned) + project_name: Name for the archaeology project + repo_url: Original repository URL (for metadata) + keep_clone: If True, don't delete temporary clones + + Returns: + Result dictionary with status and metrics + """ + result: dict[str, Any] = { + "project_name": project_name, + "repo_path": repo_path, + "repo_url": repo_url or "", + "status": "running", + "steps": {}, + "metrics": {}, + "artifacts": {}, + } + + # Use repo_url for init if available, otherwise use placeholder + init_url = repo_url or "https://github.com/example/example" + + # Step 1: Initialize project + success, msg, data = init_project(project_name, f"Analysis of {project_name}", init_url) + result["steps"]["init"] = {"status": "success" if success else "failed", "message": msg} + if not success: + result["status"] = "failed" + result["error"] = msg + return result + result["artifacts"]["project_dir"] = data.get("project_dir") + + # Step 2: Mine repository + success, msg, data = mine_repo(repo_path, project_name) + result["steps"]["mine"] = {"status": "success" if success else "failed", "message": msg} + if not success: + result["status"] = "failed" + result["error"] = msg + return result + result["metrics"]["commit_count"] = data.get("commit_count", 0) + + # Step 3: Build database + success, msg, data = build_database(project_name) + result["steps"]["build_db"] = {"status": "success" if success else "failed", "message": msg} + if not success: + result["status"] = "failed" + result["error"] = msg + return result + result["artifacts"]["db_path"] = data.get("db_path") + result["metrics"]["db_built"] = data.get("db_exists", False) + + # Step 4: Detect signals (non-critical) + success, msg, data = detect_signals(project_name) + result["steps"]["signals"] = {"status": "success" if success else "partial", "message": msg} + result["metrics"]["signal_count"] = data.get("signal_count", 0) + + # Step 5: Run analysis (non-critical) + success, msg, data = run_analysis(project_name) + result["steps"]["analyze"] = {"status": "success" if success else "partial", "message": msg} + result["metrics"]["analysis_count"] = data.get("analysis_count", 0) + result["artifacts"]["analysis_files"] = data.get("analysis_files", []) + + # Check for critical failures + if result["steps"]["build_db"]["status"] == "failed": + result["status"] = "failed" + elif result["steps"]["mine"]["status"] == "failed": + result["status"] = "failed" + else: + result["status"] = "complete" + + return result + + +def main() -> int: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Integration hook for research-scout to trigger dev-archaeology analysis", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # CLI mode with URL + python3 scout_hook.py --repo-url https://github.com/user/repo --project-name my-project + + # CLI mode with local path + python3 scout_hook.py --repo-path /path/to/repo --project-name my-project + + # Stdin mode + echo '{"url": "https://github.com/user/repo", "name": "my-project"}' | python3 scout_hook.py --stdin + + # Keep cloned repository + python3 scout_hook.py --repo-url https://github.com/user/repo --project-name my-project --keep + """, + ) + parser.add_argument( + "--repo-url", + help="Repository URL to clone and analyze", + ) + parser.add_argument( + "--repo-path", + help="Local repository path to analyze (skips cloning)", + ) + parser.add_argument( + "--project-name", + help="Name for the archaeology project", + ) + parser.add_argument( + "--clone-dir", + help="Directory for cloned repos (default: temp dir)", + ) + parser.add_argument( + "--keep", + action="store_true", + help="Keep cloned repository after analysis", + ) + parser.add_argument( + "--stdin", + action="store_true", + help="Read input as JSON from stdin", + ) + + args = parser.parse_args() + + # Read from stdin if requested + if args.stdin: + try: + input_data = json.loads(sys.stdin.read()) + repo_url = input_data.get("url") + repo_path = input_data.get("path") + project_name = input_data.get("name") + keep_clone = input_data.get("keep", False) + clone_dir = input_data.get("clone_dir") + except json.JSONDecodeError as e: + log_error(f"Invalid JSON input: {e}") + result = { + "status": "error", + "error": f"Invalid JSON input: {e}", + } + print(json.dumps(result, indent=2)) + return 1 + else: + repo_url = args.repo_url + repo_path = args.repo_path + project_name = args.project_name + keep_clone = args.keep + clone_dir = args.clone_dir + + # Validate inputs + if not project_name: + log_error("--project-name is required") + result = { + "status": "error", + "error": "--project-name is required", + } + print(json.dumps(result, indent=2)) + return 1 + + if not repo_url and not repo_path: + log_error("Either --repo-url or --repo-path must be provided") + result = { + "status": "error", + "error": "Either --repo-url or --repo-path must be provided", + } + print(json.dumps(result, indent=2)) + return 1 + + # Change to dev-archaeology root + script_dir = Path(__file__).parent + archaeology_root = script_dir.parent.parent + os.chdir(archaeology_root) + + temp_clone_dir = None + try: + # Clone repository if URL provided + if repo_url: + if clone_dir: + target_dir = os.path.join(clone_dir, project_name) + else: + temp_clone_dir = tempfile.mkdtemp(prefix="archaeology-scout-") + target_dir = os.path.join(temp_clone_dir, project_name) + + success, msg_or_path = clone_repo(repo_url, target_dir) + if not success: + result = { + "status": "failed", + "project_name": project_name, + "repo_url": repo_url, + "error": msg_or_path, + } + print(json.dumps(result, indent=2)) + return 1 + repo_path = msg_or_path + else: + repo_path = os.path.expanduser(repo_path) # type: ignore + + # Validate repository exists + if not os.path.isdir(repo_path): + result = { + "status": "failed", + "project_name": project_name, + "repo_path": repo_path, + "error": f"Repository not found: {repo_path}", + } + print(json.dumps(result, indent=2)) + return 1 + + # Run full pipeline + result = run_full_pipeline( + repo_path=repo_path, + project_name=project_name, + repo_url=repo_url, + keep_clone=keep_clone, + ) + + # Add cleanup info + if temp_clone_dir and not keep_clone: + result["cleanup"] = {"temp_dir": temp_clone_dir, "action": "will_delete"} + + except Exception as e: + result = { + "status": "error", + "project_name": project_name, + "error": f"Pipeline error: {e}", + } + + finally: + # Cleanup temporary clone + if temp_clone_dir and not keep_clone and os.path.exists(temp_clone_dir): + try: + shutil.rmtree(temp_clone_dir) + except Exception as e: + log_error(f"Failed to cleanup temp dir: {e}") + + # Output result as JSON + print(json.dumps(result, indent=2)) + + # Return exit code based on status + if result.get("status") == "complete": + return 0 + elif result.get("status") in ("failed", "error"): + return 1 + else: + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/run-pipeline.sh b/scripts/run-pipeline.sh new file mode 100755 index 0000000..251a5f6 --- /dev/null +++ b/scripts/run-pipeline.sh @@ -0,0 +1,112 @@ +#!/bin/bash +# Full dev-archaeology pipeline: regenerate per-project + global deliverables. +# Runs every 6 hours via crontab. +set -euo pipefail + +# Use Homebrew Python (system Python 3.9 doesn't support union types) +PYTHON=/opt/homebrew/bin/python3 +REPO=/Users/simongonzalezdecruz/workspaces/dev-archaeology +LOG="/tmp/dev-arch-pipeline-$(date +%Y%m%d-%H%M%S).log" + +cd "$REPO" + +log() { echo "$(date '+%Y-%m-%d %H:%M:%S') $1" | tee -a "$LOG"; } + +log "Pipeline starting" + +# Pull latest +git pull --rebase --quiet 2>/dev/null || true + +KYANITE="Achiote DECLuTTER-AI DialectOS Epoch Fugax mcp-video openglaze" + +# ── Phase 1: Regenerate per-project data.json ─────────── +log "--- Phase 1: Regenerating data.json ---" +$PYTHON scripts/data/generate_data_json.py --all >> "$LOG" 2>&1 + +# ── Phase 2: Regenerate per-project playbook HTML ─────── +log "--- Phase 2: Regenerating playbook.html ---" +$PYTHON scripts/data/generate_playbook.py --all >> "$LOG" 2>&1 + +# ── Phase 2.5: Generate template deliverables (strategy, analysis, etc.) ─ +log "--- Phase 2.5: Generating template deliverables ---" +$PYTHON scripts/data/generate_template_deliverables.py --all >> "$LOG" 2>&1 + +# ── Phase 3: Regenerate per-project dashboards + reports ─ +log "--- Phase 3: Regenerating dashboards and reports ---" +for proj in $KYANITE; do + if [ -f "projects/$proj/data/commit-eras.json" ]; then + $PYTHON -m archaeology.cli visualize "$proj" >> "$LOG" 2>&1 + $PYTHON -m archaeology.cli export-report "$proj" --format html >> "$LOG" 2>&1 + $PYTHON -m archaeology.cli export-report "$proj" --format markdown >> "$LOG" 2>&1 + fi +done + +# ── Phase 4: Fix era scanner false positives in template ─ +log "--- Phase 4: Fixing template era references ---" +$PYTHON << 'PYEOF' >> /dev/null 2>&1 +import re +from pathlib import Path +for proj in ["Achiote", "DialectOS", "Epoch", "Fugax", "mcp-video", "openglaze"]: + path = Path(f"projects/{proj}/deliverables/visuals/archaeology.html") + if not path.exists(): continue + content = path.read_text(encoding="utf-8") + original = content + content = content.replace("'Era 2\\n", "'Phase 2\\n") + content = content.replace("'Era 3\\n", "'Phase 3\\n") + content = re.sub(r'\{ era: (\d+),', r'{ idx: \1,', content) + content = content.replace('y(d.era)', 'y(d.idx)') + content = content.replace("y.domain(modelTimeline.map(d => d.era))", "y.domain(modelTimeline.map(d => d.idx))") + content = content.replace("d => 'Era ' + d).selectAll", "d => 'Phase ' + d).selectAll") + if content != original: + path.write_text(content, encoding="utf-8") +PYEOF + +# ── Phase 5: Cascade all projects ─────────────────────── +log "--- Phase 5: Cascade all projects ---" +for proj in liminal $KYANITE; do + if [ -f "projects/$proj/data/commit-eras.json" ]; then + $PYTHON -m archaeology.cli cascade "$proj" --skip-mine >> "$LOG" 2>&1 || true + fi +done + +# ── Phase 6: Era scanner + audit ──────────────────────── +log "--- Phase 6: Era scanner ---" +$PYTHON << 'PYEOF' >> "$LOG" 2>&1 +from pathlib import Path +from archaeology.era_mapper import load_eras +from archaeology.era_scanner import scan_deliverables +for proj in ["Achiote", "DECLuTTER-AI", "DialectOS", "Epoch", "Fugax", "mcp-video", "openglaze", "liminal"]: + eras = load_eras(Path(f"projects/{proj}/data/commit-eras.json")) + result = scan_deliverables(Path(f"projects/{proj}"), eras) + n = len(result.refs) + print(f" {proj}: {n} findings") + if n > 0: + for r in result.refs[:10]: + print(f" [{r.kind}] {r.file.name}:{r.line}") +PYEOF + +log "--- Audit: liminal ---" +$PYTHON -m archaeology.cli audit liminal >> "$LOG" 2>&1 + +# ── Phase 6.5: Generate Factory bridge ──────────────────── +log "--- Phase 6.5: Generating Factory bridge ---" +$PYTHON scripts/sync/generate-bridge.py >> "$LOG" 2>&1 + +# ── Phase 7: Commit + push ────────────────────────────── +CHANGED=$(git status --porcelain -- projects/ global/ | grep -v '^??' | head -1 || true) +if [ -n "$CHANGED" ]; then + log "--- Committing deliverable updates ---" + git add projects/ global/deliverables/ + git commit -m "Auto-pipeline: full regenerate $(date +%Y-%m-%d\ %H:%M)" --no-verify 2>/dev/null || true + git push --quiet 2>/dev/null || true +fi + +# ── Phase 8: Server keep-alive ────────────────────────── +if ! lsof -i :8080 > /dev/null 2>&1; then + log "--- Restarting server on :8080 ---" + rm -rf "$REPO/.serve/" + nohup $PYTHON -m archaeology.cli serve --port 8080 > /dev/null 2>&1 & + sleep 2 +fi + +log "Pipeline complete" diff --git a/scripts/sync/generate-bridge.py b/scripts/sync/generate-bridge.py new file mode 100644 index 0000000..7088c92 --- /dev/null +++ b/scripts/sync/generate-bridge.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python3 +"""Generate factory-bridge.json for The-Factory integration. + +Writes global/data/factory-bridge.json with strategic summaries +for all archaeology projects. Called from the 6-hour pipeline. + +Usage: + python3 scripts/sync/generate-bridge.py +""" + +import sys +from pathlib import Path + +# Add repo root to path +ROOT = Path(__file__).resolve().parents[2] +sys.path.insert(0, str(ROOT)) + +from archaeology.api import generate_bridge_file + + +if __name__ == "__main__": + count = generate_bridge_file() + print(f"Done: {count} projects written to bridge") diff --git a/scripts/update-dashboard.sh b/scripts/update-dashboard.sh new file mode 100644 index 0000000..c7e0e9f --- /dev/null +++ b/scripts/update-dashboard.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# Auto-update dev-archaeology dashboard +cd /Users/simongonzalezdecruz/workspaces/dev-archaeology + +# Pull latest changes +git pull --rebase --quiet 2>/dev/null + +# Restart the serve process if it's not running +if ! lsof -i :8099 > /dev/null 2>&1; then + nohup python3 -m archaeology.cli serve --no-open --port 8099 > /dev/null 2>&1 & + sleep 2 +fi diff --git a/tests/test_audit.py b/tests/test_audit.py index 097db35..7d0885f 100644 --- a/tests/test_audit.py +++ b/tests/test_audit.py @@ -91,7 +91,7 @@ def test_analyze_command_runs_all_six_vectors_for_demo(tmp_path, monkeypatch): analyze_result = runner.invoke(main, ["analyze", "demo"]) assert analyze_result.exit_code == 0, analyze_result.output - deliverables = tmp_path / "projects" / "demo" / "deliverables" + analysis_dir = tmp_path / "projects" / "demo" / "deliverables" / "analysis" expected = { "analysis-sdlc-gap-finder.json", "analysis-ml-pattern-mapper.json", @@ -100,7 +100,7 @@ def test_analyze_command_runs_all_six_vectors_for_demo(tmp_path, monkeypatch): "analysis-source-archaeologist.json", "analysis-youtube-correlator.json", } - assert expected == {path.name for path in deliverables.glob("analysis-*.json")} + assert expected == {path.name for path in analysis_dir.glob("analysis-*.json")} def test_export_report_from_demo_analysis(tmp_path, monkeypatch): @@ -111,7 +111,7 @@ def test_export_report_from_demo_analysis(tmp_path, monkeypatch): assert analyze.exit_code == 0, analyze.output export = runner.invoke(main, ["export-report", "demo"]) assert export.exit_code == 0, export.output - report = tmp_path / "projects" / "demo" / "deliverables" / "ARCHAEOLOGY-REPORT.md" + report = tmp_path / "projects" / "demo" / "deliverables" / "reports" / "ARCHAEOLOGY-REPORT.md" text = report.read_text() assert "# DEMO ARCHAEOLOGY Archaeology Report" in text assert "## Canonical Metrics" in text @@ -124,7 +124,7 @@ def test_export_report_from_demo_analysis(tmp_path, monkeypatch): html_export = runner.invoke(main, ["export-report", "demo", "--format", "html"]) assert html_export.exit_code == 0, html_export.output - html_report = tmp_path / "projects" / "demo" / "deliverables" / "ARCHAEOLOGY-REPORT.html" + html_report = tmp_path / "projects" / "demo" / "deliverables" / "visuals" / "report.html" html = html_report.read_text() assert "" in html assert "DEMO ARCHAEOLOGY Archaeology Report" in html