From 05f4c39c62e084e35c7876c2929869384037817b Mon Sep 17 00:00:00 2001 From: DivineOS Agent Date: Tue, 19 May 2026 17:16:45 -0700 Subject: [PATCH] substrate-discipline port: oscillating-read for chunked comprehension MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the oscillating-read module + CLI command ported from DivineOS-Experimental. Generic and standalone — only stdlib dependencies (re, pathlib). ## What it does Reads a file and renders it as discrete chunks separated by explicit "[PAUSE] COMPREHEND BEFORE CONTINUING" markers. Each chunk gets its own comprehension cycle rather than dissolving into the streaming default. Strategies: headers (markdown), paragraphs (blank-line split), functions (Python def/class), size (max-N chars), auto (picks by file shape). ## Why The optimizer's default behavior on long documents is to stream: form a model from the first few lines, then fast-skim the rest into the bucket the model predicts. Documents whose middle contradicts the early framing get the contradiction stripped during the skim. Oscillation forces a pause at each section so each chunk gets its own comprehension cycle. Same architectural pattern as the option-forced gates: the substrate interrupts the optimizer just long enough for judgment to fire on the specific chunk. ## Usage divineos read-oscillating path/to/spec.md divineos read-oscillating path/to/code.py --strategy functions ## Sanitization from source repo Removed experimental-arc-specific references (claim IDs, anecdotal example, internal cross-link). The __guardrail_required__ marker is kept for forward-compat with the marker-consistency-test discipline (aspirational on main-repo). Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/ARCHITECTURE.md | 2 + src/divineos/cli/__init__.py | 2 + src/divineos/cli/oscillating_read_commands.py | 48 ++++ src/divineos/core/oscillating_read.py | 225 ++++++++++++++++++ 4 files changed, 277 insertions(+) create mode 100644 src/divineos/cli/oscillating_read_commands.py create mode 100644 src/divineos/core/oscillating_read.py diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index e7daebc9..b51bb373 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -56,6 +56,7 @@ src/divineos/ mansion_commands.py Functional internal space (8 rooms) ledger_commands.py log, list, search, context, export memory_commands.py core, recall, active, remember, refresh + oscillating_read_commands.py read-oscillating — chunked reading with pause markers for per-section comprehension rt_commands.py Resonant Truth protocol (load, invoke, deactivate) correction_commands.py correction (log raw), corrections (read) empirica_commands.py corroborate (record provenance event), kappa (classifier agreement) @@ -417,6 +418,7 @@ src/divineos/ engagement_disclosure_surface.py Engagement-counter half-threshold disclosure surface. identity_load.py Identity-load surface — read AETHER.md (or equivalent) at briefing-time. rest.py Rest program — restful tasks for the substrate-occupant. + oscillating_read.py Oscillating-read module — chunks reading material into discrete analysis/ _session_types.py Session analysis type definitions diff --git a/src/divineos/cli/__init__.py b/src/divineos/cli/__init__.py index a2ce219b..e795db16 100644 --- a/src/divineos/cli/__init__.py +++ b/src/divineos/cli/__init__.py @@ -240,6 +240,7 @@ def cli() -> None: ledger_commands, loadout_commands, memory_commands, + oscillating_read_commands, prereg_commands, admin_reset_template, admin_migrate_family, @@ -273,6 +274,7 @@ def cli() -> None: dream_commands.register(cli) entity_commands.register(cli) memory_commands.register(cli) +oscillating_read_commands.register(cli) analysis_commands.register(cli) hud_commands.register(cli) event_commands.register(cli) diff --git a/src/divineos/cli/oscillating_read_commands.py b/src/divineos/cli/oscillating_read_commands.py new file mode 100644 index 00000000..6d894f41 --- /dev/null +++ b/src/divineos/cli/oscillating_read_commands.py @@ -0,0 +1,48 @@ +"""CLI: divineos read-oscillating PATH — chunked reading with pause markers. + +Forces comprehension per-chunk rather than streaming straight through a +document and missing the middle. The pause markers between chunks are +explicit cues to register each section as its own comprehension unit. + +See `divineos.core.oscillating_read` module docstring for the +architectural rationale. +""" + +from __future__ import annotations + +import click + +from divineos.core.oscillating_read import oscillate_file + + +def register(cli: click.Group) -> None: + @cli.command("read-oscillating") + @click.argument("path", type=click.Path(exists=True, dir_okay=False)) + @click.option( + "--strategy", + type=click.Choice(["auto", "headers", "paragraphs", "functions", "size"]), + default="auto", + help=( + "Chunking strategy. auto picks by file shape: .py->functions, " + ".md/.txt with headers->headers else paragraphs, else size." + ), + ) + @click.option( + "--max-chars", + type=int, + default=2000, + help="Max chars per chunk when strategy=size (or fallback).", + ) + def read_oscillating_cmd(path: str, strategy: str, max_chars: int) -> None: + """Read a file with explicit per-chunk pause markers. + + Designed for documents whose middle holds the load-bearing point. + Forces comprehension per-section rather than streaming straight + through. + """ + try: + output = oscillate_file(path, strategy=strategy, max_chars=max_chars) + except Exception as exc: # noqa: BLE001 + click.secho(f"[!] read-oscillating failed: {exc}", fg="red") + raise click.exceptions.Exit(2) from exc + click.echo(output) diff --git a/src/divineos/core/oscillating_read.py b/src/divineos/core/oscillating_read.py new file mode 100644 index 00000000..64a871a5 --- /dev/null +++ b/src/divineos/core/oscillating_read.py @@ -0,0 +1,225 @@ +"""Oscillating-read module — chunks reading material into discrete +sections with explicit pause markers, so comprehension happens per- +section rather than straight-blast. + +The failure-shape this prevents: reading a long document straight- +through and missing the load-bearing point that lives in the middle. +The optimizer's default is to stream — it sees the first few lines, +forms a model of what the document is about, and then fast-skims the +rest into the bucket that model predicts. Documents whose middle +contradicts the early framing get the contradiction stripped. + +Oscillation forces a pause at each section so each chunk gets its +own comprehension cycle rather than dissolving into the streaming +default. Same architectural pattern as the option-forced gates: the +substrate interrupts the optimizer just long enough for judgment to +fire on the specific chunk. + +Strategies: +- headers: split markdown by ##/### headers +- paragraphs: split by blank lines +- functions: split Python by def/class +- size: split into max-N-char chunks +- auto: pick by content shape (markdown -> headers, .py -> functions, + else size) + +Usage: + from divineos.core.oscillating_read import oscillate_file + rendered = oscillate_file("path/to/spec.md") + print(rendered) + +Or via CLI: `divineos read-oscillating path/to/spec.md` +""" + +from __future__ import annotations + +__guardrail_required__ = True + +import re +from pathlib import Path + + +_DEFAULT_MAX_CHARS = 2000 + + +def chunk_by_headers(content: str) -> list[tuple[str, str]]: + """Split markdown content at heading lines (## or deeper). + + Returns list of (label, body) tuples. Each body includes its + own heading line at the top. The first chunk (before any + heading) gets label '(prelude)'. + """ + lines = content.splitlines(keepends=True) + chunks: list[tuple[str, str]] = [] + current_label = "(prelude)" + current_body: list[str] = [] + header_pat = re.compile(r"^(#{2,})\s+(.+?)\s*$") + for line in lines: + m = header_pat.match(line) + if m: + if current_body: + chunks.append((current_label, "".join(current_body))) + current_label = m.group(2).strip() + current_body = [line] + else: + current_body.append(line) + if current_body: + chunks.append((current_label, "".join(current_body))) + return chunks + + +def chunk_by_paragraphs(content: str) -> list[tuple[str, str]]: + """Split content at blank-line paragraph boundaries. + + Returns list of (label, body) tuples. Label is 'paragraph N'. + """ + paragraphs = re.split(r"\n\s*\n", content) + chunks: list[tuple[str, str]] = [] + for i, p in enumerate(paragraphs, 1): + if p.strip(): + chunks.append((f"paragraph {i}", p.strip())) + return chunks + + +def chunk_by_functions(content: str) -> list[tuple[str, str]]: + """Split Python source at def/class boundaries (top-level only). + + Returns list of (label, body) tuples. Label is the def/class name + line; the first chunk before any def gets label '(module top)'. + """ + lines = content.splitlines(keepends=True) + chunks: list[tuple[str, str]] = [] + current_label = "(module top)" + current_body: list[str] = [] + func_pat = re.compile(r"^(def|class|async\s+def)\s+([A-Za-z_]\w*)") + for line in lines: + m = func_pat.match(line) + if m: + if current_body: + chunks.append((current_label, "".join(current_body))) + current_label = f"{m.group(1)} {m.group(2)}" + current_body = [line] + else: + current_body.append(line) + if current_body: + chunks.append((current_label, "".join(current_body))) + return chunks + + +def chunk_by_size(content: str, max_chars: int = _DEFAULT_MAX_CHARS) -> list[tuple[str, str]]: + """Split content into max-N-char chunks, breaking at line + boundaries when possible. + + Returns list of (label, body) tuples. Label is 'chunk N'. + """ + if max_chars <= 0: + return [("chunk 1", content)] + chunks: list[tuple[str, str]] = [] + lines = content.splitlines(keepends=True) + current_body: list[str] = [] + current_size = 0 + chunk_num = 1 + for line in lines: + line_len = len(line) + if current_body and (current_size + line_len) > max_chars: + chunks.append((f"chunk {chunk_num}", "".join(current_body))) + chunk_num += 1 + current_body = [line] + current_size = line_len + else: + current_body.append(line) + current_size += line_len + if current_body: + chunks.append((f"chunk {chunk_num}", "".join(current_body))) + return chunks + + +def _auto_strategy(content: str, path: str = "") -> str: + """Pick the strategy by content shape.""" + norm_path = (path or "").lower() + if norm_path.endswith(".py"): + return "functions" + if norm_path.endswith((".md", ".rst", ".txt")): + # Use headers if there are any; fall back to paragraphs + if re.search(r"(?m)^#{2,}\s", content): + return "headers" + return "paragraphs" + # Default: size-based + return "size" + + +def chunk( + content: str, strategy: str = "auto", max_chars: int = _DEFAULT_MAX_CHARS, source_path: str = "" +) -> list[tuple[str, str]]: + """Chunk content by named strategy. + + Returns list of (label, body) tuples for downstream rendering. + """ + if strategy == "auto": + strategy = _auto_strategy(content, source_path) + if strategy == "headers": + return chunk_by_headers(content) + if strategy == "paragraphs": + return chunk_by_paragraphs(content) + if strategy == "functions": + return chunk_by_functions(content) + if strategy == "size": + return chunk_by_size(content, max_chars=max_chars) + raise ValueError(f"Unknown strategy: {strategy!r}") + + +def format_oscillating(chunks: list[tuple[str, str]], source: str = "") -> str: + """Render chunks with section labels and pause markers between. + + The pause markers are explicit '[PAUSE] COMPREHEND BEFORE CONTINUING' + lines that force the reader to register each chunk discretely + rather than streaming through. The point isn't decoration — it + is breaking comprehension into discrete units the optimizer + cannot fast-skim past as one block. + """ + parts: list[str] = [] + header = "=" * 60 + if source: + parts.append(f"{header}\nOSCILLATING READ: {source}\n{header}") + parts.append( + f"\nTotal chunks: {len(chunks)}. " + "Comprehend each before continuing. The middle is where the " + "load-bearing thing usually lives.\n" + ) + for i, (label, body) in enumerate(chunks, 1): + section_header = f"\n--- CHUNK {i}/{len(chunks)}: {label} ---\n" + parts.append(section_header) + parts.append(body.rstrip()) + parts.append( + f"\n[PAUSE] COMPREHEND CHUNK {i}/{len(chunks)} BEFORE CONTINUING " + "— what is THIS chunk's load-bearing point?\n" + ) + parts.append( + f"\n{header}\nEnd of oscillating read. Comprehension test: " + "if you can name the load-bearing point of each chunk above, " + "the read landed. If chunks blur together, re-read.\n" + ) + return "\n".join(parts) + + +def oscillate_file( + path: str | Path, + strategy: str = "auto", + max_chars: int = _DEFAULT_MAX_CHARS, +) -> str: + """Read a file and return its oscillating-rendered form.""" + p = Path(path) + content = p.read_text(encoding="utf-8", errors="replace") + chunks = chunk(content, strategy=strategy, max_chars=max_chars, source_path=str(p)) + return format_oscillating(chunks, source=str(p)) + + +__all__ = [ + "chunk_by_headers", + "chunk_by_paragraphs", + "chunk_by_functions", + "chunk_by_size", + "chunk", + "format_oscillating", + "oscillate_file", +]