From 29903233b5978b8136b5af8a72501f05d0c806b1 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 10 Jun 2026 14:16:11 +0000
Subject: [PATCH 1/6] Add ROM gitignore protection, CI workflow, unit tests;
 fix Space.write overflow corruption

Priority 1 items from the v1.4.3 code review:

- .gitignore: exclude *.smc/*.sfc ROM files (legal requirement) and
  generated test artifacts in tests/
- CI (.github/workflows/ci.yml): syntax check, unit tests, and wc.py -h
  smoke test on Python 3.9-3.13, plus a guard job that fails if any
  ROM-like or oversized file is ever committed
- tests/: new unit test suite (65 tests) covering the ROM-independent
  core: heap allocation/free/reserve, label and branch encoding,
  Space write/clear/conflict handling, compression round-trips,
  flatten/intersection/weighted_random/shuffle_if, seeding, ROM
  validation, and CLI parsing
- memory/space.py: validate bounds BEFORE writing in Space.write() so
  an overflowing write can no longer corrupt bytes beyond the end of
  its space; replace bare except clauses with except TypeError
- memory/errors.py: new RomSpaceError (subclasses MemoryError for
  backward compatibility) raised by Space.write() and Heap.allocate()
  instead of the builtin MemoryError
- agents.md / llms.md: updated error signature and test documentation

No RNG calls are added, removed, or reordered: seed output is unchanged.

https://claude.ai/code/session_01QnF26DB2TYi1hpJE8L6EJr
---
 .github/workflows/ci.yml     |  47 +++++++++++
 .gitignore                   |  12 ++-
 agents.md                    |  14 +++-
 llms.md                      |   2 +
 memory/errors.py             |   8 ++
 memory/heap.py               |   4 +-
 memory/space.py              |  16 ++--
 tests/__init__.py            |   0
 tests/test_cli.py            |  23 ++++++
 tests/test_heap.py           |  94 ++++++++++++++++++++++
 tests/test_label.py          |  71 ++++++++++++++++
 tests/test_seed.py           |  51 ++++++++++++
 tests/test_space.py          | 152 +++++++++++++++++++++++++++++++++++
 tests/test_utils.py          | 101 +++++++++++++++++++++++
 tests/test_valid_rom_file.py |  25 ++++++
 15 files changed, 610 insertions(+), 10 deletions(-)
 create mode 100644 .github/workflows/ci.yml
 create mode 100644 memory/errors.py
 create mode 100644 tests/__init__.py
 create mode 100644 tests/test_cli.py
 create mode 100644 tests/test_heap.py
 create mode 100644 tests/test_label.py
 create mode 100644 tests/test_seed.py
 create mode 100644 tests/test_space.py
 create mode 100644 tests/test_utils.py
 create mode 100644 tests/test_valid_rom_file.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 00000000..0b8f7080
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,47 @@
+name: CI
+
+on:
+  push:
+  pull_request:
+
+jobs:
+  rom-guard:
+    name: ROM safety guard
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Fail if ROM or oversized files are tracked
+        run: |
+          set -e
+          rom_files=$(git ls-files | grep -iE '\.(smc|sfc)$' || true)
+          if [ -n "$rom_files" ]; then
+            echo "::error::ROM files must never be committed (legal requirement):"
+            echo "$rom_files"
+            exit 1
+          fi
+          # largest legitimate source file is ~56KB; anything over 512KB is suspect
+          big_files=$(git ls-files -z | xargs -0 du -b 2>/dev/null | awk '$1 > 524288 {print $2 " (" $1 " bytes)"}')
+          if [ -n "$big_files" ]; then
+            echo "::error::Unexpectedly large files tracked (possible ROM or binary data):"
+            echo "$big_files"
+            exit 1
+          fi
+
+  test:
+    name: Tests (Python ${{ matrix.python-version }})
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Syntax check all modules
+        run: python -m compileall -q .
+      - name: Run unit tests
+        run: python -m unittest discover -s tests -v
+      - name: CLI smoke test
+        run: python wc.py -h
diff --git a/.gitignore b/.gitignore
index eaa59db1..191b6831 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,4 +3,14 @@ __pycache__/
 $py.class
 *-objective.json
 *-flag.json
-wip/
\ No newline at end of file
+wip/
+
+# ROM files must never be committed (legal requirement)
+*.smc
+*.sfc
+*.SMC
+*.SFC
+
+# generated test artifacts (seed logs, manifests) - see agents.md "Test Data Isolation"
+tests/*.txt
+tests/*.json
diff --git a/agents.md b/agents.md
index 76e4e727..6d60035b 100644
--- a/agents.md
+++ b/agents.md
@@ -24,9 +24,16 @@ To verify ROM modifications and successfully run a seed generation test, a valid
 
 ### 1.3 Test Data Isolation
 > [!IMPORTANT]
-> **TEST DATA DIRECTORY RULE**: All test output data (including modified test ROMs, debug log text files, or API metadata manifests generated during your test runs) **MUST** be isolated and placed inside a `tests` directory in the workspace root (e.g., `tests/test_output.smc`, `tests/test_output.txt`).
+> **TEST DATA DIRECTORY RULE**: All test output data (including modified test ROMs, debug log text files, or API metadata manifests generated during your test runs) **MUST** be isolated and placed inside the `tests` directory in the workspace root (e.g., `tests/test_output.smc`, `tests/test_output.txt`).
 > - Do not write test output files directly to the root directory.
-> - Ensure the `tests` directory is created before running commands (e.g. `mkdir -Force tests`).
+> - The `tests` directory also contains the committed unit test suite (`tests/test_*.py`); generated artifacts there are excluded from git via `.gitignore` (`*.smc`, `*.sfc`, `tests/*.txt`, `tests/*.json`).
+
+### 1.4 Unit Tests
+The `tests/` directory contains a unit test suite for the ROM-independent logic (memory allocation, label/branch encoding, compression, seeding, CLI parsing). It requires no ROM file:
+```powershell
+python3 -m unittest discover -s tests -v
+```
+Run this after any change to `memory/`, `utils/`, `seed.py`, or `valid_rom_file.py`, and add tests for new pure-Python logic. The same suite runs in CI (`.github/workflows/ci.yml`) on every push and pull request, along with `python -m compileall`, `wc.py -h`, and a guard that fails if any ROM-like or oversized file is ever committed.
 
 ---
 
@@ -64,8 +71,9 @@ During development, agents commonly trigger three specific errors. Here is how t
 ### 3.1 Memory Overflow / Bank Exhaustion
 **Error Signature**:
 ```text
-MemoryError: Not enough room in space "custom event toggle": Next (0xc0f124) > End (0xc0f100). Diff: 36
+memory.errors.RomSpaceError: Not enough room in space "custom event toggle": Next (0xc0f124) > End (0xc0f100). Diff: 36
 ```
+(`RomSpaceError` subclasses `MemoryError`, so older code or docs referring to `MemoryError` still apply. The overflow is detected *before* any bytes are written, so the ROM buffer is never corrupted by an overflowing write.)
 **Cause**: You have written more bytes of assembly instructions or static data than the target `Reserve` range fits, or have exceeded the size of a dynamically requested `Allocate` block in that bank.
 **Resolution**:
 1. Check the size parameters in your dynamic allocation: `Allocate(Bank.C0, size, "description")`. Increase `size` to support your assembly array length.
diff --git a/llms.md b/llms.md
index c03eea55..24ab193f 100644
--- a/llms.md
+++ b/llms.md
@@ -183,6 +183,8 @@ All generated test data, output ROMs, log text files, and metadata manifests gen
   - Output test ROM: `tests/test_output.smc`
   - Output test Log: `tests/test_output.txt`
   - Manifest file: `tests/test_manifest.json`
+- **Unit Tests**: `tests/` also contains the committed unit test suite (`tests/test_*.py`) covering ROM-independent logic (memory allocation, label/branch encoding, compression, seeding, CLI parsing). Run it with `python3 -m unittest discover -s tests -v`; it requires no ROM file. CI (`.github/workflows/ci.yml`) runs it on every push/PR, plus a guard that rejects committed ROM-like files. Generated artifacts in `tests/` are gitignored (`*.smc`, `*.sfc`, `tests/*.txt`, `tests/*.json`); test source files are tracked.
+- **Memory Overflow Errors**: Space overflows raise `memory.errors.RomSpaceError` (a `MemoryError` subclass). Bounds are validated *before* writing, so an overflowing `space.write(...)` never corrupts adjacent ROM bytes.
 
 ---
 
diff --git a/memory/errors.py b/memory/errors.py
new file mode 100644
index 00000000..c98b0515
--- /dev/null
+++ b/memory/errors.py
@@ -0,0 +1,8 @@
+# RomSpaceError subclasses MemoryError because these failures were historically
+# raised as MemoryError; existing callers/tools catching MemoryError still work
+class RomSpaceError(MemoryError):
+    """Raised when data does not fit in the requested/reserved ROM space.
+
+    Common causes and resolutions are documented in agents.md under
+    "Memory Overflow / Bank Exhaustion".
+    """
diff --git a/memory/heap.py b/memory/heap.py
index 55460e52..2f56ab6a 100644
--- a/memory/heap.py
+++ b/memory/heap.py
@@ -1,3 +1,5 @@
+from memory.errors import RomSpaceError
+
 class Block:
     def __init__(self, start, end):
         if start > end:
@@ -53,7 +55,7 @@ def find_best_fit(size):
 
         block = find_best_fit(size)
         if block is None:
-            raise MemoryError(f"Unable to allocate block of size {size}")
+            raise RomSpaceError(f"Unable to allocate block of size {size}")
 
         start = block.start
         block.start += size
diff --git a/memory/space.py b/memory/space.py
index 5cc6c945..39eb51eb 100644
--- a/memory/space.py
+++ b/memory/space.py
@@ -1,6 +1,7 @@
 from memory.rom import ROM
 from memory.heap import Heap
 from memory.label import Label, LabelPointer
+from memory.errors import RomSpaceError
 
 from enum import IntEnum
 BANK_SIZE = 0x10000
@@ -80,16 +81,20 @@ def write(self, *values):
         values = self._invoke_callables(values)
         values = self._parse_labels(values)
 
-        self._next_address = Space.rom.set_bytes(self.next_address, values)
-        if(self.next_address - 1 > self.end_address):
-            raise MemoryError(f"Not enough room in space \"{self.description}\": Next (0x{self.next_address -1:x}) > End (0x{self.end_address:x}). Diff: {(self.next_address - 1) - (self.end_address)}")
+        # validate bounds before writing so an overflow cannot corrupt bytes
+        # beyond the end of this space
+        last_address = self.next_address + len(values) - 1
+        if last_address > self.end_address:
+            raise RomSpaceError(f"Not enough room in space \"{self.description}\": Next (0x{last_address:x}) > End (0x{self.end_address:x}). Diff: {last_address - self.end_address}")
 
+        self._next_address = Space.rom.set_bytes(self.next_address, values)
         self._update_label_pointers()
 
     def clear(self, value):
         try:
             values = [value] * (len(self) // len(value))
-        except:
+        except TypeError:
+            # value is a single int (no len()), not a sequence
             values = [value] * len(self)
 
         values = self._invoke_callables(values)
@@ -177,7 +182,8 @@ def _parse_labels(self, values):
                 new_values.append(value)
                 try:
                     index += len(value)
-                except:
+                except TypeError:
+                    # value is a single byte/int (no len())
                     index += 1
         return new_values
 
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_cli.py b/tests/test_cli.py
new file mode 100644
index 00000000..bf8e0d5d
--- /dev/null
+++ b/tests/test_cli.py
@@ -0,0 +1,23 @@
+import os
+import subprocess
+import sys
+import unittest
+
+REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+class TestCli(unittest.TestCase):
+    def test_help_exits_successfully(self):
+        # smoke test: importing args parses all 30+ flag modules, so -h
+        # exercises the entire argument interface without needing a ROM
+        result = subprocess.run(
+            [sys.executable, "wc.py", "-h"],
+            cwd = REPO_ROOT,
+            capture_output = True,
+            text = True,
+            timeout = 60,
+        )
+        self.assertEqual(result.returncode, 0, msg = result.stderr)
+        self.assertIn("usage", result.stdout)
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_heap.py b/tests/test_heap.py
new file mode 100644
index 00000000..374d1021
--- /dev/null
+++ b/tests/test_heap.py
@@ -0,0 +1,94 @@
+import unittest
+
+from memory.errors import RomSpaceError
+from memory.heap import Block, Heap
+
+class TestBlock(unittest.TestCase):
+    def test_size_is_inclusive_of_both_ends(self):
+        self.assertEqual(Block(0, 0).size, 1)
+        self.assertEqual(Block(0, 9).size, 10)
+
+    def test_swapped_bounds_are_normalized(self):
+        block = Block(9, 0)
+        self.assertEqual(block.start, 0)
+        self.assertEqual(block.end, 9)
+        self.assertEqual(block.size, 10)
+
+class TestHeap(unittest.TestCase):
+    def setUp(self):
+        self.heap = Heap()
+
+    def test_allocate_from_empty_heap_raises(self):
+        with self.assertRaises(RomSpaceError):
+            self.heap.allocate(1)
+
+    def test_allocate_raises_memory_error_for_backward_compatibility(self):
+        with self.assertRaises(MemoryError):
+            self.heap.allocate(1)
+
+    def test_free_then_allocate(self):
+        self.heap.free(0x100, 0x1ff)
+        self.assertEqual(self.heap.available, 0x100)
+
+        start = self.heap.allocate(0x10)
+        self.assertEqual(start, 0x100)
+        self.assertEqual(self.heap.available, 0xf0)
+
+    def test_allocate_uses_best_fit_block(self):
+        self.heap.free(0, 99)       # 100 byte block
+        self.heap.free(200, 219)    # 20 byte block
+
+        start = self.heap.allocate(20)
+        self.assertEqual(start, 200) # exact fit preferred over larger block
+        self.assertEqual(self.heap.available, 100)
+
+    def test_allocate_more_than_largest_block_raises(self):
+        self.heap.free(0, 99)
+        self.heap.free(200, 219)
+        with self.assertRaises(RomSpaceError):
+            self.heap.allocate(101) # 120 bytes available but largest block is 100
+
+    def test_free_merges_adjacent_blocks(self):
+        self.heap.free(0, 9)
+        self.heap.free(10, 19)
+        self.assertEqual(self.heap.available, 20)
+        self.assertEqual(len(self.heap.blocks), 1)
+        self.assertEqual(self.heap.allocate(20), 0)
+
+    def test_free_merges_overlapping_blocks(self):
+        self.heap.free(0, 14)
+        self.heap.free(10, 19)
+        self.assertEqual(self.heap.available, 20)
+        self.assertEqual(len(self.heap.blocks), 1)
+
+    def test_free_inside_existing_block_changes_nothing(self):
+        self.heap.free(0, 99)
+        self.heap.free(40, 59)
+        self.assertEqual(self.heap.available, 100)
+        self.assertEqual(len(self.heap.blocks), 1)
+
+    def test_reserve_splits_free_block(self):
+        self.heap.free(0, 99)
+        self.heap.reserve(40, 59)
+        self.assertEqual(self.heap.available, 80)
+
+        # neither remaining block can hold more than 40 bytes
+        with self.assertRaises(RomSpaceError):
+            self.heap.allocate(41)
+        self.assertEqual(self.heap.allocate(40), 0)
+        self.assertEqual(self.heap.allocate(40), 60)
+
+    def test_reserve_trims_overlapping_block(self):
+        self.heap.free(0, 99)
+        self.heap.reserve(50, 120)
+        self.assertEqual(self.heap.available, 50)
+        self.assertEqual(self.heap.allocate(50), 0)
+
+    def test_reserve_consumes_fully_covered_block(self):
+        self.heap.free(10, 19)
+        self.heap.reserve(0, 29)
+        self.assertEqual(self.heap.available, 0)
+        self.assertEqual(len(self.heap.blocks), 0)
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_label.py b/tests/test_label.py
new file mode 100644
index 00000000..53ec76b9
--- /dev/null
+++ b/tests/test_label.py
@@ -0,0 +1,71 @@
+import unittest
+
+from memory.label import Label, LabelPointer
+
+class TestLabelPointer(unittest.TestCase):
+    def _pointer(self, label_address, pointer_address, mode, offset = 0):
+        label = Label("TEST")
+        label.address = label_address
+        pointer = LabelPointer(label, pointer_address, mode)
+        pointer.offset = offset
+        return pointer
+
+    def test_absolute(self):
+        pointer = self._pointer(0x1234, 0x100, LabelPointer.ABSOLUTE)
+        self.assertEqual(int(pointer), 0x1234)
+
+    def test_absolute_with_offset(self):
+        pointer = self._pointer(0x1234, 0x100, LabelPointer.ABSOLUTE)
+        pointer = pointer + 4
+        self.assertEqual(int(pointer), 0x1238)
+        pointer = pointer - 8
+        self.assertEqual(int(pointer), 0x1230)
+
+    def test_relative(self):
+        pointer = self._pointer(0x110, 0x100, LabelPointer.RELATIVE)
+        self.assertEqual(int(pointer), 0x10)
+
+        pointer = self._pointer(0x100, 0x110, LabelPointer.RELATIVE)
+        self.assertEqual(int(pointer), -0x10)
+
+    def test_absolute_relative(self):
+        pointer = self._pointer(0x100, 0x110, LabelPointer.ABSOLUTE_RELATIVE)
+        self.assertEqual(int(pointer), 0x10)
+
+    # 65c816 branch offsets are relative to the program counter after the
+    # one-byte operand, so the encoded value is (distance - 1) mod 256
+    def test_branch_relative_forward(self):
+        pointer = self._pointer(0x110, 0x100, LabelPointer.BRANCH_RELATIVE)
+        self.assertEqual(int(pointer), 0x0f)
+
+    def test_branch_relative_backward(self):
+        pointer = self._pointer(0x100, 0x110, LabelPointer.BRANCH_RELATIVE)
+        self.assertEqual(int(pointer), 0xef) # two's complement of -0x11
+
+    def test_branch_relative_backward_minimal(self):
+        # branching to the previous byte: distance -1, encoded offset -2 (0xfe)
+        pointer = self._pointer(0x0ff, 0x100, LabelPointer.BRANCH_RELATIVE)
+        self.assertEqual(int(pointer), 0xfe)
+
+    def test_branch_relative_out_of_range_raises(self):
+        pointer = self._pointer(0x200, 0x100, LabelPointer.BRANCH_RELATIVE)
+        with self.assertRaises(ValueError):
+            int(pointer)
+
+        pointer = self._pointer(0x100, 0x200, LabelPointer.BRANCH_RELATIVE)
+        with self.assertRaises(ValueError):
+            int(pointer)
+
+    def test_to_bytes_little_endian(self):
+        pointer = self._pointer(0x1234, 0x100, LabelPointer.ABSOLUTE)
+        self.assertEqual(pointer.to_bytes(2, "little"), b"\x34\x12")
+
+    def test_comparisons_use_pointed_to_address(self):
+        pointer = self._pointer(0x1234, 0x100, LabelPointer.ABSOLUTE)
+        self.assertTrue(pointer < 0x1235)
+        self.assertTrue(pointer <= 0x1234)
+        self.assertTrue(pointer > 0x1233)
+        self.assertTrue(pointer >= 0x1234)
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_seed.py b/tests/test_seed.py
new file mode 100644
index 00000000..fdda35f0
--- /dev/null
+++ b/tests/test_seed.py
@@ -0,0 +1,51 @@
+import random
+import string
+import unittest
+
+from seed import SEED_LENGTH, generate_seed, seed_rng
+
+class TestGenerateSeed(unittest.TestCase):
+    def test_length(self):
+        self.assertEqual(len(generate_seed()), SEED_LENGTH)
+
+    def test_charset(self):
+        alpha_digits = set(string.ascii_lowercase + string.digits)
+        for _ in range(20):
+            self.assertTrue(set(generate_seed()) <= alpha_digits)
+
+class TestSeedRng(unittest.TestCase):
+    def setUp(self):
+        self._rng_state = random.getstate()
+
+    def tearDown(self):
+        random.setstate(self._rng_state)
+
+    def test_returns_given_seed(self):
+        self.assertEqual(seed_rng("abc123", "-i -o"), "abc123")
+
+    def test_generates_seed_when_none_given(self):
+        seed = seed_rng(None, "")
+        self.assertEqual(len(seed), SEED_LENGTH)
+
+    def test_deterministic_for_same_seed_and_flags(self):
+        # seed reproducibility is a core requirement: the same seed + flags
+        # must always produce the same random stream
+        seed_rng("abc123", "-flags")
+        first = [random.random() for _ in range(10)]
+
+        seed_rng("abc123", "-flags")
+        second = [random.random() for _ in range(10)]
+
+        self.assertEqual(first, second)
+
+    def test_different_flags_produce_different_stream(self):
+        seed_rng("abc123", "-flags one")
+        first = random.random()
+
+        seed_rng("abc123", "-flags two")
+        second = random.random()
+
+        self.assertNotEqual(first, second)
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_space.py b/tests/test_space.py
new file mode 100644
index 00000000..8421017b
--- /dev/null
+++ b/tests/test_space.py
@@ -0,0 +1,152 @@
+import unittest
+
+import memory.space
+from memory.errors import RomSpaceError
+from memory.heap import Heap
+from memory.space import Allocate, Bank, Free, Reserve, Space, Write
+
+class FakeRom:
+    """Stand-in for memory.rom.ROM backed by a plain list (no ROM file needed)."""
+    def __init__(self, size = 0x20000):
+        self.data = [0] * size
+
+    def set_bytes(self, address, values):
+        self.data[address : address + len(values)] = values
+        return address + len(values)
+
+    def get_bytes(self, address, count):
+        return self.data[address : address + count]
+
+    def get_byte(self, address):
+        return self.data[address]
+
+class SpaceTestCase(unittest.TestCase):
+    """Space uses class-level shared state; isolate and restore it per test."""
+    def setUp(self):
+        self._saved_rom = Space.rom
+        self._saved_heaps = Space.heaps
+        self._saved_spaces = Space.spaces
+
+        self.rom = FakeRom()
+        Space.rom = self.rom
+        Space.heaps = { bank : Heap() for bank in Bank }
+        Space.spaces = []
+
+    def tearDown(self):
+        Space.rom = self._saved_rom
+        Space.heaps = self._saved_heaps
+        Space.spaces = self._saved_spaces
+
+class TestSpaceWrite(SpaceTestCase):
+    def test_write_within_bounds(self):
+        space = Space(0x100, 0x10f, "test space")
+        space.write(1, 2, 3)
+        self.assertEqual(self.rom.get_bytes(0x100, 3), [1, 2, 3])
+        self.assertEqual(space.next_address, 0x103)
+
+    def test_writes_are_sequential(self):
+        space = Space(0x100, 0x10f, "test space")
+        space.write([1, 2])
+        space.write([3, 4])
+        self.assertEqual(self.rom.get_bytes(0x100, 4), [1, 2, 3, 4])
+
+    def test_write_nested_values_are_flattened(self):
+        space = Space(0x100, 0x10f, "test space")
+        space.write([1, [2, 3]], (4, 5), b"\x06")
+        self.assertEqual(self.rom.get_bytes(0x100, 6), [1, 2, 3, 4, 5, 6])
+
+    def test_write_exactly_full(self):
+        space = Space(0x100, 0x103, "test space")
+        space.write([1, 2, 3, 4])
+        self.assertEqual(self.rom.get_bytes(0x100, 4), [1, 2, 3, 4])
+
+    def test_write_overflow_raises(self):
+        space = Space(0x100, 0x103, "test space")
+        with self.assertRaises(RomSpaceError):
+            space.write([1, 2, 3, 4, 5])
+
+    def test_write_overflow_does_not_modify_rom(self):
+        # regression test: overflow used to be detected only after the
+        # out-of-bounds bytes had already been written to the rom buffer
+        self.rom.data[0x104] = 0xaa # byte just past the end of the space
+        space = Space(0x100, 0x103, "test space")
+        with self.assertRaises(RomSpaceError):
+            space.write([1, 2, 3, 4, 5])
+        self.assertEqual(self.rom.get_byte(0x104), 0xaa)
+        self.assertEqual(self.rom.get_bytes(0x100, 4), [0, 0, 0, 0])
+
+    def test_overflow_error_is_a_memory_error_for_backward_compatibility(self):
+        space = Space(0x100, 0x103, "test space")
+        with self.assertRaises(MemoryError):
+            space.write([1, 2, 3, 4, 5])
+
+class TestSpaceLabels(SpaceTestCase):
+    def test_backward_branch_label_resolution(self):
+        space = Space(0x100, 0x10f, "test space")
+        space.write(
+            "LOOP",
+            0xea, 0xea,                     # 2 placeholder bytes
+            0x80, space.branch_distance("LOOP"), # BRA LOOP
+        )
+        # branch operands stay as LabelPointer objects in the rom buffer and
+        # resolve via __index__; distance -3, encoded as (distance - 1) mod 256
+        self.assertEqual(int(self.rom.get_byte(0x103)), 0xfc)
+
+    def test_duplicate_label_raises(self):
+        space = Space(0x100, 0x10f, "test space")
+        space.write("LABEL", 0xea)
+        with self.assertRaises(ValueError):
+            space.write("LABEL", 0xea)
+
+class TestSpaceClear(SpaceTestCase):
+    def test_clear_with_single_value(self):
+        space = Space(0x100, 0x103, "test space", clear_value = 0xff)
+        self.assertEqual(self.rom.get_bytes(0x100, 4), [0xff] * 4)
+
+    def test_clear_with_instruction_value(self):
+        # multi-byte clear values are callables with a len(), like the
+        # instruction objects from instruction/ (e.g. space.clear(field.NOP()))
+        class FakeInstruction:
+            def __len__(self):
+                return 2
+            def __call__(self, space):
+                return [1, 2]
+
+        space = Space(0x100, 0x103, "test space", clear_value = FakeInstruction())
+        self.assertEqual(self.rom.get_bytes(0x100, 4), [1, 2, 1, 2])
+
+class TestSpaceConflicts(SpaceTestCase):
+    def test_overlapping_spaces_raise(self):
+        Space(0x100, 0x1ff, "first")
+        with self.assertRaises(RuntimeError):
+            Space(0x180, 0x280, "second")
+
+    def test_adjacent_spaces_allowed(self):
+        Space(0x100, 0x1ff, "first")
+        Space(0x200, 0x2ff, "second") # must not raise
+
+class TestAllocateReserveFree(SpaceTestCase):
+    def test_allocate_after_free(self):
+        Free(0x2000, 0x2fff)
+        space = Allocate(Bank["C0"], 0x100, "test allocation")
+        self.assertEqual(space.start_address, 0x2000)
+        self.assertEqual(len(space), 0x100)
+
+    def test_allocate_without_free_space_raises(self):
+        with self.assertRaises(RomSpaceError):
+            Allocate(Bank["C0"], 0x100, "test allocation")
+
+    def test_reserve_excludes_range_from_allocation(self):
+        Free(0x2000, 0x20ff)
+        Reserve(0x2000, 0x207f, "reserved range")
+        space = Allocate(Bank["C0"], 0x80, "test allocation")
+        self.assertEqual(space.start_address, 0x2080)
+
+    def test_write_helper_allocates_and_writes(self):
+        Free(0x2000, 0x2fff)
+        space = Write(Bank["C0"], [1, 2, 3], "test write")
+        self.assertEqual(len(space), 3)
+        self.assertEqual(self.rom.get_bytes(space.start_address, 3), [1, 2, 3])
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_utils.py b/tests/test_utils.py
new file mode 100644
index 00000000..dd69390d
--- /dev/null
+++ b/tests/test_utils.py
@@ -0,0 +1,101 @@
+import random
+import unittest
+
+from utils.compression import compress, decompress
+from utils.flatten import flatten
+from utils.intersection import intersection
+from utils.shuffle_if import shuffle_if
+from utils.weighted_random import weighted_random
+
+class TestFlatten(unittest.TestCase):
+    def test_scalar(self):
+        self.assertEqual(flatten(5), [5])
+
+    def test_flat_list(self):
+        self.assertEqual(flatten([1, 2, 3]), [1, 2, 3])
+
+    def test_nested_lists_and_tuples(self):
+        self.assertEqual(flatten([1, [2, (3, 4)], [[5]]]), [1, 2, 3, 4, 5])
+
+    def test_bytes_are_flattened_to_ints(self):
+        self.assertEqual(flatten(b"\x01\x02"), [1, 2])
+
+    def test_strings_are_not_flattened(self):
+        # strings are labels in the memory model and must stay intact
+        self.assertEqual(flatten(["LABEL", 1]), ["LABEL", 1])
+
+    def test_empty(self):
+        self.assertEqual(flatten([]), [])
+
+class TestCompression(unittest.TestCase):
+    def assert_round_trip(self, data):
+        self.assertEqual(decompress(compress(data)), data)
+
+    def test_round_trip_repetitive_data(self):
+        self.assert_round_trip([1, 2, 3] * 100)
+
+    def test_round_trip_incompressible_data(self):
+        rng = random.Random(42)
+        self.assert_round_trip([rng.randrange(256) for _ in range(500)])
+
+    def test_round_trip_runs(self):
+        self.assert_round_trip([0] * 1000)
+
+    def test_round_trip_mixed(self):
+        data = list(range(256)) + [7] * 50 + list(range(0, 256, 2)) * 3
+        self.assert_round_trip(data)
+
+    def test_size_header(self):
+        compressed = compress([1, 2, 3] * 10)
+        size = int.from_bytes(bytes(compressed[:2]), "little")
+        self.assertEqual(size, len(compressed))
+
+class TestIntersection(unittest.TestCase):
+    def test_intersection_preserves_first_list_order(self):
+        self.assertEqual(intersection([3, 1, 2, 5], [2, 3]), [3, 2])
+
+    def test_disjoint(self):
+        self.assertEqual(intersection([1, 2], [3, 4]), [])
+
+class TestWeightedRandom(unittest.TestCase):
+    def test_returns_valid_index(self):
+        rng_state = random.getstate()
+        try:
+            random.seed("weighted_random test")
+            for _ in range(100):
+                self.assertIn(weighted_random([1, 2, 3]), (0, 1, 2))
+        finally:
+            random.setstate(rng_state)
+
+    def test_zero_weights_never_chosen(self):
+        rng_state = random.getstate()
+        try:
+            random.seed("weighted_random test")
+            for _ in range(100):
+                self.assertEqual(weighted_random([0, 1, 0]), 1)
+        finally:
+            random.setstate(rng_state)
+
+class TestShuffleIf(unittest.TestCase):
+    def test_only_matching_elements_move(self):
+        rng_state = random.getstate()
+        try:
+            random.seed("shuffle_if test")
+            values = list(range(20))
+            original = list(values)
+            is_even = lambda value : value % 2 == 0
+
+            shuffle_if(values, is_even)
+
+            # odd elements stay in place
+            for index, value in enumerate(original):
+                if not is_even(value):
+                    self.assertEqual(values[index], value)
+            # even elements are permuted amongst themselves
+            self.assertEqual(sorted(v for v in values if is_even(v)),
+                             sorted(v for v in original if is_even(v)))
+        finally:
+            random.setstate(rng_state)
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_valid_rom_file.py b/tests/test_valid_rom_file.py
new file mode 100644
index 00000000..0b6f9415
--- /dev/null
+++ b/tests/test_valid_rom_file.py
@@ -0,0 +1,25 @@
+import hashlib
+import os
+import tempfile
+import unittest
+
+from valid_rom_file import get_sha256_hex, valid_rom_file
+
+class TestValidRomFile(unittest.TestCase):
+    def setUp(self):
+        with tempfile.NamedTemporaryFile(delete = False) as temp_file:
+            temp_file.write(b"not a rom" * 1000)
+            self.temp_path = temp_file.name
+
+    def tearDown(self):
+        os.unlink(self.temp_path)
+
+    def test_get_sha256_hex(self):
+        expected = hashlib.sha256(b"not a rom" * 1000).hexdigest()
+        self.assertEqual(get_sha256_hex(self.temp_path), expected)
+
+    def test_invalid_rom_rejected(self):
+        self.assertFalse(valid_rom_file(self.temp_path))
+
+if __name__ == "__main__":
+    unittest.main()

From 9407f3d08720e82c5fc1649ca9a64d682522a94f Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 11 Jun 2026 01:27:56 +0000
Subject: [PATCH 2/6] Fix inverted init_run_success setter in data/character.py

The setter stored (value - MAX_RUN_SUCCESS), a negative number, while the
getter computes (MAX_RUN_SUCCESS - raw). A set/get round trip returned the
wrong value and the negative raw value would corrupt the bit-packed byte at
init_data[21] on write.

Latent bug: nothing calls the setter today, so generated seeds are
unaffected; the first feature to use it would have silently corrupted
character init data. Regression tests added.

https://claude.ai/code/session_01QnF26DB2TYi1hpJE8L6EJr
---
 data/character.py       |  2 +-
 tests/test_character.py | 43 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_character.py

diff --git a/data/character.py b/data/character.py
index db012726..022cfef7 100644
--- a/data/character.py
+++ b/data/character.py
@@ -85,7 +85,7 @@ def init_run_success(self, value):
         if value < self.MIN_RUN_SUCCESS or value > self.MAX_RUN_SUCCESS:
             raise ValueError(f"Character.init_run_success setter: invalid value {value}")
 
-        self._init_run_success = value - self.MAX_RUN_SUCCESS
+        self._init_run_success = self.MAX_RUN_SUCCESS - value
 
     # initial level of characters is 3
     # when new character is recruited, their level is set to the average of all other recruited characters + init_level_factor
diff --git a/tests/test_character.py b/tests/test_character.py
new file mode 100644
index 00000000..537cd8fc
--- /dev/null
+++ b/tests/test_character.py
@@ -0,0 +1,43 @@
+import unittest
+
+from data.character import Character
+
+def make_character():
+    init_data = [0] * 22
+    name_data = [0xff] * 6 # padding bytes only, i.e. an empty name
+    return Character(0, init_data, name_data)
+
+class TestInitRunSuccess(unittest.TestCase):
+    # run success is stored inverted in 2 bits of init_data[21]:
+    # 0b11 = 2, 0b10 = 3, 0b01 = 4, 0b00 = 5 (run_value = 5 - bit_value)
+    def test_getter_decodes_stored_bits(self):
+        character = make_character()
+        for raw, expected in ((0b11, 2), (0b10, 3), (0b01, 4), (0b00, 5)):
+            character._init_run_success = raw
+            self.assertEqual(character.init_run_success, expected)
+
+    def test_setter_round_trip(self):
+        # regression test: the setter used to store (value - MAX) instead of
+        # (MAX - value), corrupting the bit-packed init data byte
+        character = make_character()
+        for value in range(Character.MIN_RUN_SUCCESS, Character.MAX_RUN_SUCCESS + 1):
+            character.init_run_success = value
+            self.assertEqual(character.init_run_success, value)
+            self.assertIn(character._init_run_success, (0b00, 0b01, 0b10, 0b11))
+
+    def test_setter_rejects_out_of_range(self):
+        character = make_character()
+        with self.assertRaises(ValueError):
+            character.init_run_success = Character.MIN_RUN_SUCCESS - 1
+        with self.assertRaises(ValueError):
+            character.init_run_success = Character.MAX_RUN_SUCCESS + 1
+
+class TestInitLevelFactor(unittest.TestCase):
+    def test_round_trip(self):
+        character = make_character()
+        for adjustment in (0, 2, 5, -3):
+            character.init_level_factor = adjustment
+            self.assertEqual(character.init_level_factor, adjustment)
+
+if __name__ == "__main__":
+    unittest.main()

From 0a5e176372eac86e500914a003f672be10258797 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 11 Jun 2026 01:28:49 +0000
Subject: [PATCH 3/6] Fix NameError in objective quest-lookup failure path

The assertion message referenced bare suplex_train_quest_name instead of
cls.suplex_train_quest_name, so a failed lookup raised NameError instead of
the intended diagnostic. Use an explicit RuntimeError so the check also
survives python -O.

https://claude.ai/code/session_01QnF26DB2TYi1hpJE8L6EJr
---
 objectives/objective.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/objectives/objective.py b/objectives/objective.py
index 323429d9..2a326c13 100644
--- a/objectives/objective.py
+++ b/objectives/objective.py
@@ -128,5 +128,5 @@ def _init_suplex_train_quest_value(cls):
             if value != 'r' and quest_bit[value].name == cls.suplex_train_quest_name:
                 cls.suplex_train_quest_value = value
                 return
-        assert False, f"'{suplex_train_quest_name}' quest value not found"
+        raise RuntimeError(f"'{cls.suplex_train_quest_name}' quest value not found")
 Objective._init_suplex_train_quest_value()

From 0f5cf0c9316ab09a7b148567763c6c35cb2971cf Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 11 Jun 2026 01:28:49 +0000
Subject: [PATCH 4/6] Remove dead code in args/challenges.py

Delete an unreachable 'return opts' (opts was never defined) after the
options() return statement, and the first of two definitions of
_format_spells_log_entries, which was silently shadowed by the second.
No behavior change: the surviving definition is the one that already ran.

https://claude.ai/code/session_01QnF26DB2TYi1hpJE8L6EJr
---
 args/challenges.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/args/challenges.py b/args/challenges.py
index 046e7c71..34e12f41 100644
--- a/args/challenges.py
+++ b/args/challenges.py
@@ -115,14 +115,6 @@ def options(args):
         ("Remove Learnable Spells", args.remove_learnable_spell_ids, "remove_learnable_spell_ids"),
         ("No Saves", args.no_saves, "no_saves"),
     ]
-        
-    return opts
-def _format_spells_log_entries(spell_ids):
-    from constants.spells import id_spell
-    spell_entries = []
-    for i, spell_id in enumerate(spell_ids):
-        spell_entries.append(("", id_spell[spell_id], f"rls_{i}"))
-    return spell_entries
 
 def _format_spells_log_entries(spell_ids):
     from constants.spells import id_spell

From 6712481cc48960087fbe3f55cf80cbb673986c44 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 11 Jun 2026 01:28:49 +0000
Subject: [PATCH 5/6] Remove dead RewardType.NONE check in whelk event

Reward.type is initialized to Python None and RewardType.NONE is never
assigned anywhere, so the early-return could never trigger. By the time
mod() runs, all rewards have been assigned a real type.

https://claude.ai/code/session_01QnF26DB2TYi1hpJE8L6EJr
---
 event/whelk.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/event/whelk.py b/event/whelk.py
index 91966d70..47efc50b 100644
--- a/event/whelk.py
+++ b/event/whelk.py
@@ -16,9 +16,6 @@ def init_event_bits(self, space):
         )
 
     def mod(self):
-        if self.reward.type == RewardType.NONE:
-            return
-
         self.dialog_mod()
         self.entrance_event_mod()
         self.cleanup_mod()

From c8b1d3822e835ac2f3cb5a5e76461444fc4ba25e Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 11 Jun 2026 01:29:46 +0000
Subject: [PATCH 6/6] Fix silent failures: compression truncation, metadata
 group condition, scaling description

- utils/compression.py: compress() printed an error and returned data with
  a truncated size header when output exceeded 65535 bytes, which would
  silently corrupt whatever consumed it; raise ValueError instead
  (regression test added)
- metadata/flag_metadata_writer.py: 'if type(group_title):' was always
  true (a type object is truthy); replace with an explicit str/None/callable
  dispatch that preserves the existing manifest output exactly
- battle/scaling.py: the xp/gp scaling routine was labeled 'scale hp/mp'
  in its space description (debug/error output only, not ROM content)

https://claude.ai/code/session_01QnF26DB2TYi1hpJE8L6EJr
---
 battle/scaling.py                | 2 +-
 metadata/flag_metadata_writer.py | 7 +++++--
 tests/test_utils.py              | 8 ++++++++
 utils/compression.py             | 4 ++--
 4 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/battle/scaling.py b/battle/scaling.py
index 27b2e502..592242b4 100644
--- a/battle/scaling.py
+++ b/battle/scaling.py
@@ -185,7 +185,7 @@ def scale_xp_gp_mod(self):
             asm.STA(0xea, asm.DIR),
             asm.JMP(self.scale_value, asm.ABS),
         ]
-        space = Write(Bank.C2, src, "scale hp/mp")
+        space = Write(Bank.C2, src, "scale xp/gp")
         self.scale_xp_gp = space.start_address
 
         if args.xp_gp_scaling:
diff --git a/metadata/flag_metadata_writer.py b/metadata/flag_metadata_writer.py
index c8337299..ab1712c9 100644
--- a/metadata/flag_metadata_writer.py
+++ b/metadata/flag_metadata_writer.py
@@ -57,8 +57,11 @@ def get_flag_metadata(self):
                     self.metadata[key].args = action.metavar
                 if action.choices is not None and isinstance(action.choices, list) and not isinstance(action.choices, range):
                     self.metadata[key].allowed_values = list(action.choices)
-                if type(group_title):
-                    self.metadata[key].group = group_title if type(group_title) == str else None if group_title == None else group_title()
+                # group titles may be a plain string, None, or a callable returning the name
+                if isinstance(group_title, str) or group_title is None:
+                    self.metadata[key].group = group_title
+                else:
+                    self.metadata[key].group = group_title()
                 if getattr(action, 'mutually_exclusive_group_title', None) is not None:
                     self.metadata[key].mutually_exclusive_group = action.mutually_exclusive_group_title
                 if getattr(action, 'choices', None) is not None:
diff --git a/tests/test_utils.py b/tests/test_utils.py
index dd69390d..b134a817 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -50,6 +50,14 @@ def test_size_header(self):
         size = int.from_bytes(bytes(compressed[:2]), "little")
         self.assertEqual(size, len(compressed))
 
+    def test_oversized_output_raises(self):
+        # incompressible data grows ~9/8x when compressed; this used to
+        # print an error and return a truncated size header instead of raising
+        rng = random.Random(42)
+        data = [rng.randrange(256) for _ in range(60000)]
+        with self.assertRaises(ValueError):
+            compress(data)
+
 class TestIntersection(unittest.TestCase):
     def test_intersection_preserves_first_list_order(self):
         self.assertEqual(intersection([3, 1, 2, 5], [2, 3]), [3, 2])
diff --git a/utils/compression.py b/utils/compression.py
index ab9afb1d..0705fbde 100644
--- a/utils/compression.py
+++ b/utils/compression.py
@@ -73,8 +73,8 @@ def compress(data):
 
     size = len(result) + 2
     if size > MAX_COMPRESS_SIZE:
-        print(f"Error: compress: data too large (compressed size {size} > 65535)")
-        size = MAX_COMPRESS_SIZE
+        # a wrong size header would silently corrupt the decompressed data
+        raise ValueError(f"compress: data too large (compressed size {size} > {MAX_COMPRESS_SIZE})")
     return list(size.to_bytes(2, "little")) + result
 
 def decompress(data):