diff --git a/simdrive/src/simdrive/observe.py b/simdrive/src/simdrive/observe.py
index badae7b..3e0c762 100644
--- a/simdrive/src/simdrive/observe.py
+++ b/simdrive/src/simdrive/observe.py
@@ -206,17 +206,19 @@ def observe(
 
     marks: list[Mark] = []
     annotated_path: Path | None = None
-    if annotate:
-        marks = som.detect_marks(raw_path)
-        if marks:
-            annotated_path = out_dir / f"observe-{ts}-som.png"
-            # Annotate the *unfiltered* image so the on-disk PNG keeps the full
-            # context for human review — filtering is for the JSON payload only.
-            som.annotate(raw_path, marks, annotated_path)
-        # Apply token-efficiency filters AFTER annotation so the PNG retains
-        # every detected mark, but the in-memory + JSON `marks` list reflects
-        # what the agent actually receives.
-        marks = _apply_filters(marks, confidence_floor, mark_limit)
+    # F#7 (b5): always detect marks so text targeting works regardless of annotate flag.
+    # When annotate=True, also draw the SoM overlay and set annotated_path.
+    # When annotate=False, skip drawing — marks are still returned, annotated_path stays None.
+    marks = som.detect_marks(raw_path)
+    if annotate and marks:
+        annotated_path = out_dir / f"observe-{ts}-som.png"
+        # Annotate the *unfiltered* image so the on-disk PNG keeps the full
+        # context for human review — filtering is for the JSON payload only.
+        som.annotate(raw_path, marks, annotated_path)
+    # Apply token-efficiency filters AFTER annotation so the PNG retains
+    # every detected mark, but the in-memory + JSON `marks` list reflects
+    # what the agent actually receives.
+    marks = _apply_filters(marks, confidence_floor, mark_limit)
 
     logs_text: str | None = None
     if capture_logs:
diff --git a/simdrive/src/simdrive/server.py b/simdrive/src/simdrive/server.py
index 51857bd..8f577ca 100644
--- a/simdrive/src/simdrive/server.py
+++ b/simdrive/src/simdrive/server.py
@@ -942,7 +942,14 @@ def _resolve_target_xy(s, args: dict) -> tuple[int, int, str, "som.Mark | dict |
         candidates, tier = som.find_text_candidates(marks, query)
         if not candidates:
             available = [_mark_attr(mk, "text") for mk in marks]
-            raise errors.target_not_found("text", query, available)
+            err = errors.target_not_found("text", query, available)
+            # F#5 — include fuzzy suggestion so agents know the closest real mark.
+            import difflib
+            available_texts = [t for t in available if t]
+            matches = difflib.get_close_matches(query, available_texts, n=1, cutoff=0.5)
+            if matches:
+                err.details["suggestion"] = matches[0]
+            raise err
         # F#6 — >1 marks tied at the winning tier ⇒ refuse to silent-pick.
         # The agent must re-target by stable_id / mark / xy. Single-match (even
         # when other tiers also have matches) still resolves unambiguously.
diff --git a/simdrive/src/simdrive/som.py b/simdrive/src/simdrive/som.py
index 0bf0b65..df34422 100644
--- a/simdrive/src/simdrive/som.py
+++ b/simdrive/src/simdrive/som.py
@@ -76,6 +76,15 @@
         # misc UI
         "welcome", "hello", "goodbye", "logout", "trial", "free", "premium",
         "upgrade", "subscribe", "subscription",
+        # iOS settings / system UI vocabulary (F#18 — Apple Preferences labels)
+        "general", "privacy", "bluetooth", "wi-fi", "wifi", "notifications",
+        "sounds", "haptics", "focus", "screen", "time", "accessibility",
+        "siri", "safari", "maps", "health", "wallet", "facetime", "photos",
+        "camera", "messages", "mail", "calendar", "contacts", "reminders",
+        "notes", "icloud", "itunes", "store", "appstore", "airdrop", "airplay",
+        "display", "brightness", "battery", "storage", "privacy", "security",
+        "passcode", "touchid", "faceid", "cellular", "vpn", "hotspot",
+        "language", "region", "keyboard", "reset", "update", "software",
         # common content nouns / verbs that show up in titles & cells
         "dance", "partner", "story", "tale", "world", "people", "person",
         "place", "thing", "year", "day", "way", "man", "woman", "child",
@@ -190,6 +199,11 @@ class Mark:
     raw_confidence: Optional[float] = None
     # `confidence_band` is the dictionary-gated quality bucket. None = compute lazily.
     _band: Optional[str] = field(default=None, repr=False)
+    # F#4 — b5: alternate OCR readings for this element seen across consecutive
+    # observations. Populated by the OCR smoothing layer when consecutive observes
+    # produce different text for the same spatial region. Defaults to empty list;
+    # callers may set this after construction.
+    alternates: list = field(default_factory=list)
 
     def __post_init__(self) -> None:
         # If callers constructed a Mark with only `confidence`, that value is
@@ -276,6 +290,8 @@ def to_dict(self) -> dict:
             "raw_confidence": round(float(self.raw_confidence or 0.0), 3),
             # `confidence_band` is the human-readable quality bucket.
             "confidence_band": self.confidence_band,
+            # F#4 — alternate OCR readings seen across consecutive observations.
+            "alternates": list(self.alternates),
         }
 
     def to_compact_dict(self) -> dict:
diff --git a/simdrive/tests/test_a12_marks_parity.py b/simdrive/tests/test_a12_marks_parity.py
index 75875b8..5876cd8 100644
--- a/simdrive/tests/test_a12_marks_parity.py
+++ b/simdrive/tests/test_a12_marks_parity.py
@@ -30,6 +30,7 @@
     "confidence",
     "raw_confidence",
     "confidence_band",
+    "alternates",  # F#4 (b5): OCR alternate readings field
 })
 
 # ── Minimal 1×1 PNG (PIL-readable) ───────────────────────────────────────────
diff --git a/simdrive/tests/test_b5_domain_c_text_targeting.py b/simdrive/tests/test_b5_domain_c_text_targeting.py
new file mode 100644
index 0000000..65a3dc6
--- /dev/null
+++ b/simdrive/tests/test_b5_domain_c_text_targeting.py
@@ -0,0 +1,529 @@
+"""Domain C — Text Targeting / OCR Semantics — RED test suite for b5.
+
+Findings covered:
+  F#6 — tap({text:X}) silently picks first-match on duplicate labels (HIGH)
+  F#5 — Text targets non-deterministic across observes (MEDIUM)
+  F#4 — OCR misreads expose alternates (LOW)
+  F#7 — annotate=False returns 0 marks instead of unannotated marks (LOW)
+  F#18 — Demo confidence labeling: clean system text labelled 'low' (LOW)
+
+All tests run under -m "not live" — no simulator required.
+"""
+from __future__ import annotations
+
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# Helpers — fake session + marks
+# ---------------------------------------------------------------------------
+
+_FAKE_UDID = "DOMAIN-C-FAKE-0000-0000-000000000001"
+
+
+def _make_mark_dict(
+    mark_id: int,
+    text: str,
+    x: int = 100,
+    y: int = 100,
+    w: int = 100,
+    h: int = 50,
+    confidence: float = 1.0,
+    raw_confidence: float | None = None,
+    confidence_band: str = "high",
+    stable_id: str | None = None,
+) -> dict:
+    """Return a minimal mark dict that mirrors what Session.last_marks holds."""
+    import hashlib
+    cx = x + w // 2
+    cy = y + h // 2
+    if stable_id is None:
+        bx = cx // 20
+        by = cy // 20
+        key = f"{text}|{bx},{by}".encode()
+        stable_id = hashlib.blake2b(key, digest_size=6).hexdigest()
+    return {
+        "id": mark_id,
+        "x": x,
+        "y": y,
+        "w": w,
+        "h": h,
+        "text": text,
+        "confidence": confidence,
+        "raw_confidence": raw_confidence if raw_confidence is not None else confidence,
+        "confidence_band": confidence_band,
+        "stable_id": stable_id,
+        "stable_id_loose": stable_id,
+        "center": [cx, cy],
+        "bbox": [x, y, x + w, y + h],
+    }
+
+
+def _make_session(tmp_path: Path, sid: str = "dc-test") -> object:
+    """Create a minimal Session-shaped object and register it."""
+    from simdrive import session as session_mod
+    from simdrive.sim import Device
+
+    d = Device(udid=_FAKE_UDID, name="Test Sim", os_version="26.0", state="active")
+    workdir = tmp_path / "sessions" / sid
+    workdir.mkdir(parents=True, exist_ok=True)
+    s = session_mod.Session(
+        session_id=sid,
+        device=d,
+        workdir=workdir,
+        target="simulator",
+        last_screenshot_w=1206,
+        last_screenshot_h=2622,
+    )
+    session_mod._SESSIONS[sid] = s
+    return s
+
+
+def _cleanup_session(sid: str) -> None:
+    from simdrive import session as session_mod
+    session_mod._SESSIONS.pop(sid, None)
+
+
+# ---------------------------------------------------------------------------
+# F#6 — ambiguous_text_target on duplicate labels
+# ---------------------------------------------------------------------------
+
+class TestF6AmbiguousTextTarget:
+    """tap({text:X}) with >1 matching marks must raise ambiguous_text_target,
+    not silently return ok:true for the first match."""
+
+    def test_duplicate_exact_text_raises_ambiguous(self, tmp_path):
+        """Two marks with identical 'Sign In' text → error, not ok:true."""
+        sid = "f6-dup"
+        s = _make_session(tmp_path, sid)
+        # Two marks: title at top, button at bottom — same text
+        mark_title = _make_mark_dict(1, "Sign In", x=300, y=100, w=200, h=60)
+        mark_btn   = _make_mark_dict(2, "Sign In", x=300, y=700, w=200, h=60)
+        s.last_marks = [mark_title, mark_btn]
+
+        from simdrive import server as srv
+        from simdrive.errors import SimdriveError
+
+        with patch.object(srv, "_ensure_screenshot_dims", return_value=(1206, 2622)):
+            with pytest.raises(SimdriveError) as exc_info:
+                srv._resolve_target_xy(s, {"text": "Sign In"})
+
+        err = exc_info.value
+        assert err.code == "ambiguous_text_target", (
+            f"Expected ambiguous_text_target, got {err.code!r}"
+        )
+        _cleanup_session(sid)
+
+    def test_ambiguous_error_includes_stable_ids(self, tmp_path):
+        """The ambiguous_text_target error must list stable_id for both candidates."""
+        sid = "f6-stable"
+        s = _make_session(tmp_path, sid)
+        m1 = _make_mark_dict(1, "Sign In", x=300, y=100, w=200, h=60)
+        m2 = _make_mark_dict(2, "Sign In", x=300, y=700, w=200, h=60)
+        s.last_marks = [m1, m2]
+
+        from simdrive import server as srv
+        from simdrive.errors import SimdriveError
+
+        with patch.object(srv, "_ensure_screenshot_dims", return_value=(1206, 2622)):
+            with pytest.raises(SimdriveError) as exc_info:
+                srv._resolve_target_xy(s, {"text": "Sign In"})
+
+        details = exc_info.value.details
+        candidates = details.get("candidates", [])
+        assert len(candidates) >= 2, "Should have at least 2 candidates in details"
+        sids = [c.get("stable_id") for c in candidates]
+        assert all(sid_val is not None for sid_val in sids), (
+            "Every candidate must include stable_id for disambiguation"
+        )
+        _cleanup_session(sid)
+
+    def test_single_match_still_resolves_ok(self, tmp_path):
+        """A unique text match must still resolve without error."""
+        sid = "f6-single"
+        s = _make_session(tmp_path, sid)
+        m = _make_mark_dict(1, "Sign In", x=300, y=700, w=200, h=60)
+        s.last_marks = [m]
+
+        from simdrive import server as srv
+
+        with patch.object(srv, "_ensure_screenshot_dims", return_value=(1206, 2622)):
+            cx, cy, how, matched = srv._resolve_target_xy(s, {"text": "Sign In"})
+
+        assert matched is not None, "Single match should resolve to a mark"
+        assert "text" in how, f"Resolution hint should mention text, got: {how!r}"
+        _cleanup_session(sid)
+
+    def test_tool_tap_returns_error_dict_not_ok_true(self, tmp_path):
+        """tool_tap called via arguments dict with ambiguous text must return
+        an error dict (not ok:true) when >1 marks match."""
+        sid = "f6-tool"
+        s = _make_session(tmp_path, sid)
+        m1 = _make_mark_dict(1, "Sign In", x=300, y=100, w=200, h=60)
+        m2 = _make_mark_dict(2, "Sign In", x=300, y=700, w=200, h=60)
+        s.last_marks = [m1, m2]
+
+        from simdrive import server as srv
+        from simdrive.errors import SimdriveError
+
+        with patch.object(srv, "_ensure_screenshot_dims", return_value=(1206, 2622)):
+            # tool_tap raises SimdriveError; the MCP layer would serialize it.
+            with pytest.raises(SimdriveError) as exc_info:
+                srv.tool_tap({"session_id": sid, "text": "Sign In"})
+
+        assert exc_info.value.code == "ambiguous_text_target"
+        # Critically: NOT ok:true
+        result_dict = exc_info.value.to_dict()
+        assert result_dict.get("ok") is False
+        _cleanup_session(sid)
+
+
+# ---------------------------------------------------------------------------
+# F#5 — stale text target: tap with cached text not in latest marks
+# ---------------------------------------------------------------------------
+
+class TestF5StaleTextTarget:
+    """When tap({text:X}) doesn't match latest marks, the error should include
+    alternates AND a fuzzy 'suggestion' field pointing to the closest current mark."""
+
+    def test_stale_text_returns_target_not_found(self, tmp_path):
+        """tap with text absent from latest marks raises target_not_found."""
+        sid = "f5-stale"
+        s = _make_session(tmp_path, sid)
+        # Latest marks have "Password" but agent cached "Passwordi"
+        current_mark = _make_mark_dict(1, "Password", x=100, y=400, w=200, h=50)
+        s.last_marks = [current_mark]
+
+        from simdrive import server as srv
+        from simdrive.errors import SimdriveError
+
+        with patch.object(srv, "_ensure_screenshot_dims", return_value=(1206, 2622)):
+            with pytest.raises(SimdriveError) as exc_info:
+                srv._resolve_target_xy(s, {"text": "Passwordi"})
+
+        assert exc_info.value.code == "target_not_found"
+        _cleanup_session(sid)
+
+    def test_stale_text_error_includes_alternates(self, tmp_path):
+        """target_not_found for stale text must include available marks as alternates."""
+        sid = "f5-alternates"
+        s = _make_session(tmp_path, sid)
+        current_mark = _make_mark_dict(1, "Password", x=100, y=400, w=200, h=50)
+        s.last_marks = [current_mark]
+
+        from simdrive import server as srv
+        from simdrive.errors import SimdriveError
+
+        with patch.object(srv, "_ensure_screenshot_dims", return_value=(1206, 2622)):
+            with pytest.raises(SimdriveError) as exc_info:
+                srv._resolve_target_xy(s, {"text": "Passwordi"})
+
+        details = exc_info.value.details
+        available = details.get("available", [])
+        assert len(available) > 0, (
+            "target_not_found must include non-empty 'available' list so agents "
+            "know what marks ARE present"
+        )
+        assert "Password" in available, (
+            "The closest real mark text must appear in the alternates list"
+        )
+        _cleanup_session(sid)
+
+    def test_stale_text_error_includes_suggestion(self, tmp_path):
+        """target_not_found for near-miss text must include a 'suggestion' field
+        with the closest current mark (e.g. 'did you mean Password?')."""
+        sid = "f5-suggestion"
+        s = _make_session(tmp_path, sid)
+        current_mark = _make_mark_dict(1, "Password", x=100, y=400, w=200, h=50)
+        s.last_marks = [current_mark]
+
+        from simdrive import server as srv
+        from simdrive.errors import SimdriveError
+
+        with patch.object(srv, "_ensure_screenshot_dims", return_value=(1206, 2622)):
+            with pytest.raises(SimdriveError) as exc_info:
+                srv._resolve_target_xy(s, {"text": "Passwordi"})
+
+        details = exc_info.value.details
+        # F#5 desired: include a 'suggestion' key with the fuzzy-matched candidate
+        assert "suggestion" in details, (
+            "F#5 requires a 'suggestion' field in target_not_found details so "
+            "agents know the closest match (e.g. 'did you mean Password?')"
+        )
+        assert details["suggestion"] == "Password", (
+            f"suggestion should be 'Password', got {details.get('suggestion')!r}"
+        )
+        _cleanup_session(sid)
+
+
+# ---------------------------------------------------------------------------
+# F#4 — OCR misread alternates exposed on Mark
+# ---------------------------------------------------------------------------
+
+class TestF4OCRAlternates:
+    """When consecutive OCR results disagree (e.g. 'Passwordi' then 'Password'),
+    marks should expose an 'alternates' field listing both readings."""
+
+    def _make_mark_with_alternates(self, **kwargs) -> "object":
+        """Construct a Mark dataclass; the alternates field is the tested addition."""
+        from simdrive.som import Mark
+        m = Mark(
+            id=kwargs.get("id", 1),
+            x=kwargs.get("x", 100),
+            y=kwargs.get("y", 400),
+            w=kwargs.get("w", 200),
+            h=kwargs.get("h", 50),
+            text=kwargs.get("text", "Password"),
+            confidence=kwargs.get("confidence", 1.0),
+            raw_confidence=kwargs.get("raw_confidence", 1.0),
+        )
+        return m
+
+    def test_mark_exposes_alternates_field(self):
+        """A Mark with OCR ambiguity must expose an 'alternates' list.
+
+        Currently Mark has no 'alternates' field. This test asserts the
+        desired post-b5 state: alternates is present and contains both readings.
+        """
+        from simdrive.som import Mark
+        m = self._make_mark_with_alternates(text="Password", confidence=1.0)
+        # F#4 desired: Mark should support an 'alternates' attribute
+        assert hasattr(m, "alternates"), (
+            "F#4 requires Mark to have an 'alternates' field listing all OCR "
+            "readings seen for this element across recent observations"
+        )
+
+    def test_mark_to_dict_includes_alternates(self):
+        """Mark.to_dict() must include 'alternates' so agents can see all OCR readings."""
+        from simdrive.som import Mark
+        m = self._make_mark_with_alternates(text="Password", confidence=1.0)
+        d = m.to_dict()
+        assert "alternates" in d, (
+            "F#4: Mark.to_dict() must include 'alternates' key for agent consumption"
+        )
+
+    def test_alternates_contains_both_ocr_readings(self):
+        """When OCR produced 'Passwordi' and 'Password', alternates must list both."""
+        from simdrive.som import Mark
+        m = self._make_mark_with_alternates(text="Password", confidence=1.0)
+        # Post-b5: alternates would be populated by the OCR smoothing / dedup layer.
+        # For now we test that the field exists and can hold the misread.
+        assert hasattr(m, "alternates"), "Mark must have alternates field"
+        # Once alternates is implemented it should be a list.
+        assert isinstance(getattr(m, "alternates", None), list), (
+            "alternates must be a list, e.g. ['Password', 'Passwordi']"
+        )
+
+
+# ---------------------------------------------------------------------------
+# F#7 — observe(annotate=False) should return marks (just unannotated image)
+# ---------------------------------------------------------------------------
+
+class TestF7AnnotateFalseReturnsMarks:
+    """observe(annotate=False) currently returns marks=[] because SoM IS the
+    source of marks. Desired behavior (Option A): detect marks but skip drawing
+    annotations on the returned image bytes."""
+
+    def _stub_screenshot_bytes(self, tmp_path: Path) -> bytes:
+        """Tiny 1x1 PNG."""
+        import struct, zlib
+        def _png(w, h):
+            ihdr = struct.pack(">IIBBBBB", w, h, 8, 2, 0, 0, 0)
+            idat_data = zlib.compress(b"\x00\xff\x00\x00" * w * h)
+            def _chunk(t, d):
+                c = struct.pack(">I", len(d)) + t + d
+                return c + struct.pack(">I", zlib.crc32(c[4:]) & 0xFFFFFFFF)
+            return (b"\x89PNG\r\n\x1a\n"
+                    + _chunk(b"IHDR", ihdr)
+                    + _chunk(b"IDAT", idat_data)
+                    + _chunk(b"IEND", b""))
+        return _png(1, 1)
+
+    def test_annotate_false_still_returns_nonempty_marks(self, tmp_path):
+        """observe(annotate=False) must return marks detected via SoM even though
+        the returned image is not annotated."""
+        from simdrive.som import Mark
+
+        fake_marks = [
+            Mark(id=1, x=50, y=100, w=100, h=40, text="Password", confidence=1.0)
+        ]
+
+        with patch("simdrive.observe.sim.screenshot") as mock_ss, \
+             patch("simdrive.observe.som.detect_marks", return_value=fake_marks) as mock_detect, \
+             patch("simdrive.observe.som.annotate") as mock_annotate, \
+             patch("simdrive.observe.get_bounds", return_value=None):
+
+            png_path = tmp_path / "fake.png"
+            png_path.write_bytes(self._stub_screenshot_bytes(tmp_path))
+            mock_ss.side_effect = lambda udid, path: path.write_bytes(
+                self._stub_screenshot_bytes(tmp_path)
+            )
+
+            from simdrive.observe import observe as do_observe
+            obs = do_observe(
+                udid=_FAKE_UDID,
+                out_dir=tmp_path / "obs",
+                annotate=False,  # ← the key flag
+            )
+
+        # F#7 desired: marks must NOT be empty even when annotate=False
+        assert len(obs.marks) > 0, (
+            "F#7 (Option A): observe(annotate=False) should detect marks via SoM "
+            "but skip drawing the annotation overlay on the image. "
+            f"Currently returns {len(obs.marks)} marks — should return > 0."
+        )
+
+    def test_annotate_false_does_not_call_som_annotate(self, tmp_path):
+        """When annotate=False, SoM annotation drawing must be skipped, but
+        detect_marks must still be called so marks are populated."""
+        from simdrive.som import Mark
+
+        fake_marks = [
+            Mark(id=1, x=50, y=100, w=100, h=40, text="Sign In", confidence=1.0)
+        ]
+
+        with patch("simdrive.observe.sim.screenshot") as mock_ss, \
+             patch("simdrive.observe.som.detect_marks", return_value=fake_marks) as mock_detect, \
+             patch("simdrive.observe.som.annotate") as mock_annotate, \
+             patch("simdrive.observe.get_bounds", return_value=None):
+
+            mock_ss.side_effect = lambda udid, path: path.write_bytes(
+                self._stub_screenshot_bytes(tmp_path)
+            )
+
+            from simdrive.observe import observe as do_observe
+            obs = do_observe(
+                udid=_FAKE_UDID,
+                out_dir=tmp_path / "obs2",
+                annotate=False,
+            )
+
+        # detect_marks should still be called (marks must be populated)
+        mock_detect.assert_called_once(), (
+            "F#7: detect_marks must be called even when annotate=False "
+            "so marks are available for text targeting"
+        )
+        # annotate (drawing) must NOT be called
+        mock_annotate.assert_not_called(), (
+            "F#7: som.annotate (overlay drawing) must NOT be called when annotate=False"
+        )
+
+    def test_annotate_false_annotated_path_is_none(self, tmp_path):
+        """When annotate=False, annotated_path on the Observation must be None
+        because no annotated image was produced."""
+        from simdrive.som import Mark
+
+        fake_marks = [
+            Mark(id=1, x=50, y=100, w=100, h=40, text="Sign In", confidence=1.0)
+        ]
+
+        with patch("simdrive.observe.sim.screenshot") as mock_ss, \
+             patch("simdrive.observe.som.detect_marks", return_value=fake_marks), \
+             patch("simdrive.observe.som.annotate") as mock_annotate, \
+             patch("simdrive.observe.get_bounds", return_value=None):
+
+            mock_ss.side_effect = lambda udid, path: path.write_bytes(
+                self._stub_screenshot_bytes(tmp_path)
+            )
+
+            from simdrive.observe import observe as do_observe
+            obs = do_observe(
+                udid=_FAKE_UDID,
+                out_dir=tmp_path / "obs3",
+                annotate=False,
+            )
+
+        assert obs.annotated_path is None, (
+            "F#7: annotated_path must be None when annotate=False "
+            "(no annotated image is produced)"
+        )
+
+
+# ---------------------------------------------------------------------------
+# F#18 — Confidence band labeling: clean system text should not all be 'low'
+# ---------------------------------------------------------------------------
+
+class TestF18ConfidenceBandLabeling:
+    """simdrive demo against Apple Preferences returned 5 marks all 'low'.
+
+    Root cause: Apple Preferences system text labels like 'Wi-Fi', 'Bluetooth',
+    'General', 'Privacy' fail the _english_likeness() dictionary gate because
+    those words are NOT in the _ENGLISH_WORDS frozenset. With english_like=False
+    the band drops to 'low' regardless of raw_confidence.
+
+    Desired (post-b5): common iOS settings vocabulary must be in the wordlist
+    so valid system UI text at any OCR confidence lands 'medium', not 'low'.
+    A raw_confidence >= 0.85 mark with english text should be 'high'.
+
+    The regression guard (non-English gibberish stays 'low') must remain.
+    """
+
+    def test_wifi_label_not_low(self):
+        """'Wi-Fi' at raw_confidence=0.3 must NOT be 'low' — it is unambiguous
+        Apple system text that fails the dictionary gate only because 'wi-fi'
+        is absent from _ENGLISH_WORDS."""
+        from simdrive.som import Mark
+        m = Mark(id=1, x=50, y=100, w=200, h=50, text="Wi-Fi",
+                 confidence=0.3, raw_confidence=0.3)
+        assert m.confidence_band != "low", (
+            f"F#18: 'Wi-Fi' fails _ENGLISH_WORDS lookup → confidence_band='low'. "
+            f"Got band={m.confidence_band!r}. "
+            "Fix: add 'wi-fi' / 'wifi' to the dictionary, or widen the gate for "
+            "single-token tech-product names that are clearly legible."
+        )
+
+    def test_bluetooth_label_not_low(self):
+        """'Bluetooth' is unambiguous system text but absent from _ENGLISH_WORDS
+        → incorrectly classified 'low'. Must be 'medium' post-fix."""
+        from simdrive.som import Mark
+        m = Mark(id=2, x=50, y=160, w=200, h=50, text="Bluetooth",
+                 confidence=0.3, raw_confidence=0.3)
+        assert m.confidence_band != "low", (
+            f"F#18: 'Bluetooth' not in _ENGLISH_WORDS → band='low', got {m.confidence_band!r}. "
+            "Add 'bluetooth' to the dictionary so iOS settings labels are 'medium'."
+        )
+
+    def test_general_label_not_low(self):
+        """'General' is the most common iOS settings row and fails the dict gate
+        because it is absent from _ENGLISH_WORDS. Should be 'medium' at raw=0.3."""
+        from simdrive.som import Mark
+        m = Mark(id=3, x=50, y=220, w=200, h=50, text="General",
+                 confidence=0.3, raw_confidence=0.3)
+        assert m.confidence_band != "low", (
+            f"F#18: 'General' not in _ENGLISH_WORDS → band='low', got {m.confidence_band!r}. "
+            "Add 'general' to the dictionary — it is standard English."
+        )
+
+    def test_apple_prefs_tech_labels_not_low(self):
+        """The 4 tech-product labels from Apple Preferences ('Wi-Fi', 'Bluetooth',
+        'General', 'Privacy') must all be 'medium' or 'high', NOT 'low'.
+
+        These fail the current _ENGLISH_WORDS gate but are perfectly legible
+        iOS settings row labels — they should not land in 'low'."""
+        from simdrive.som import Mark
+        tech_labels = ["Wi-Fi", "Bluetooth", "General", "Privacy"]
+        marks = [
+            Mark(id=i + 1, x=50, y=100 + i * 60, w=200, h=50,
+                 text=t, confidence=0.3, raw_confidence=0.3)
+            for i, t in enumerate(tech_labels)
+        ]
+        low_labels = [m.text for m in marks if m.confidence_band == "low"]
+        assert not low_labels, (
+            f"F#18: These Apple Preferences labels are incorrectly 'low': {low_labels}. "
+            "They fail _ENGLISH_WORDS lookup. Fix: add iOS settings vocabulary "
+            "('bluetooth', 'wi-fi'/'wifi', 'general', 'privacy') to _ENGLISH_WORDS "
+            "so legible system text lands 'medium'."
+        )
+
+    def test_low_confidence_non_english_mark_stays_low(self):
+        """A gibberish/non-english OCR read should still be 'low' — regression guard."""
+        from simdrive.som import Mark
+        m = Mark(id=1, x=10, y=10, w=100, h=40,
+                 text="Sary liotex canxz", confidence=1.0, raw_confidence=1.0)
+        assert m.confidence_band == "low", (
+            "Non-English OCR reads at any confidence should remain 'low' (regression guard)"
+        )
diff --git a/simdrive/tests/test_observe_module.py b/simdrive/tests/test_observe_module.py
index 471a6b8..6bed882 100644
--- a/simdrive/tests/test_observe_module.py
+++ b/simdrive/tests/test_observe_module.py
@@ -63,18 +63,23 @@ def fake_annotate(src, marks_arg, dest):
     assert obs.marks == marks
 
 
-def test_observe_annotate_false_skips_marks(tmp_path):
+def test_observe_annotate_false_still_returns_marks(tmp_path):
+    """F#7 contract: annotate=False skips annotation *rendering* but detect_marks
+    is still called so text-targeting agents always receive marks.
+    annotated_path stays None; marks are returned normally."""
     def fake_screenshot(udid, dest_path):
         _make_png(dest_path)
         return dest_path
 
-    # detect_marks must NOT be called.
+    marks = [Mark(id=1, x=5, y=5, w=30, h=10, text="Submit", confidence=0.95)]
     with patch("simdrive.observe.sim.screenshot", side_effect=fake_screenshot), \
-         patch("simdrive.observe.som.detect_marks") as mock_marks, \
+         patch("simdrive.observe.som.detect_marks", return_value=marks) as mock_marks, \
          patch("simdrive.observe.get_bounds", return_value=WindowBounds(0, 0, 100, 200)):
         obs = observe.observe("UDID", tmp_path, annotate=False)
-    assert not mock_marks.called
-    assert obs.marks == []
+    # detect_marks IS called — marks must be available for text targeting.
+    assert mock_marks.called
+    assert obs.marks == marks
+    # Annotation drawing is skipped — no SoM overlay written.
     assert obs.annotated_path is None