Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,10 @@ def save_snapshot(self, data: dict[str, Any]):
except sqlite3.Error as e:
self._db_error("Snapshot Save Error", e)
except TypeError as e:
self.logger.error(f"Snapshot serialization error: {e}")
# A non-serializable snapshot is a persistent write failure (data lost on restart),
# so flag persistence unhealthy like every other write path — otherwise the #131
# badge stays green while snapshots silently never persist.
self._db_error("Snapshot Serialization Error", e)

def load_snapshot(self) -> dict[str, Any] | None:
"""Loads the last persisted application state snapshot."""
Expand Down
53 changes: 53 additions & 0 deletions build/dashboard/tests/frontend/components.test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,59 @@ test('ProxyTotals footer is hidden until the proxy reports data', () => {
assert.match(renderApp({ state: s }), /Proxy totals/);
});

test('ProxyTotals reddens the rejected figure only when reject_level is high', () => {
// The base fixture's workers are all clean, so nothing reaches the styled-rejects branch.
const s = clone();
Object.assign(s.proxy_summary, {
has_data: true, accepted: '1200', rejected: '50', reject_pct: '4%',
reject_level: 'high', invalid: '0', best: '123',
});
assert.match(renderApp({ state: s }), /status-bad">50/); // high -> rejected total is reddened
s.proxy_summary.reject_level = 'ok';
assert.doesNotMatch(renderApp({ state: s }), /status-bad">50/); // ok -> plain, not reddened
});

test('WorkersTable surfaces the per-rig api-unreadable and reject badges, and the pool badge variants', () => {
// The single fixture pins both workers to pool=p2pool, api_ok=null, reject_flag=null, so these
// three problem-rig signals — the whole point of the pool/api/rejected columns — never render.
const s = clone();
s.workers[0].api_ok = false; // xmrig API unreadable -> "api ⚠"
s.workers[0].reject_flag = { text: '90% rejected', title: 'high reject rate' };
s.workers[0].pool = 'xvb'; // purple XvB badge
s.workers[1].pool = 'somethingelse'; // unrecognised -> Unknown (bad) badge
const html = renderApp({ state: s });
assert.match(html, /api ⚠/); // api_ok===false badge (only UI signal a rig's API is unreadable)
assert.match(html, /90% rejected/); // per-row reject badge (how you spot a problem rig)
assert.match(html, /badge-purple">XvB/);
assert.match(html, /badge-bad">Unknown/);
});

test('Tari status gates the ✔ on a live gRPC channel, never on active-but-dead (#278/#313)', () => {
// The ✔ must mean the merge-mine channel is actually up. A dead channel that still reads "active"
// must show status-warn and NO check — otherwise a TRANSIENT_FAILURE reads as healthy (#278/#313).
const connected = clone();
Object.assign(connected.tari, { connected: true, active: true, status: 'Merge mining' });
const cHtml = renderApp({ state: connected });
assert.match(cHtml, /status-ok">Merge mining/);
assert.match(cHtml, /check-inline/); // connected -> the ✔ shows

const deadButActive = clone();
Object.assign(deadButActive.tari, { connected: false, active: true, status: 'Merge mining' });
const dHtml = renderApp({ state: deadButActive });
assert.match(dHtml, /status-warn">Merge mining/);
assert.doesNotMatch(dHtml, /check-inline/); // active-but-dead -> NO ✔ (the invariant)
});

test('Sync gauge shows a ✔ for a done chain and a live percent while syncing', () => {
const s = clone();
s.syncing = true;
s.sync.monero.state = 'syncing';
s.sync.monero.percent = 42;
assert.match(renderApp({ state: s }), /42%/); // syncing chain shows its percent
s.sync.monero.state = 'done';
assert.match(renderApp({ state: s }), /check-big/); // done chain shows the ✔, not a percent
});

// --- Component Health & Egress (#170) ---------------------------------------------------

test('ComponentHealth shows a Tor-only summary, the topology nodes, and the egress drawer', () => {
Expand Down
10 changes: 10 additions & 0 deletions build/dashboard/tests/service/test_storage_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,16 @@ def test_empty_snapshot_not_saved(self, state_manager):
def test_load_missing_snapshot_returns_none(self, state_manager):
assert state_manager.load_snapshot() is None

def test_unserializable_snapshot_flags_persistence_unhealthy(self, state_manager):
# A snapshot json.dumps can't serialize (here a set) is a persistent write failure: the
# data is lost and will be lost on restart. Like every other write path it must flip
# db_healthy so /api/state raises the #131 badge — not log-and-look-green (regression guard
# for save_snapshot's TypeError branch that used to call logger.error directly).
assert state_manager.is_db_healthy() is True
state_manager.save_snapshot({"workers": {1, 2, 3}}) # set -> TypeError in json.dumps
assert state_manager.is_db_healthy() is False
assert state_manager.load_snapshot() is None # nothing was persisted

def test_share_stats_persist_across_instances(self, tmp_path):
# Issue #82: the per-worker share counts and the proxy /summary totals ride along in the
# latest_data snapshot, so they survive a dashboard restart (the snapshot is what
Expand Down
30 changes: 20 additions & 10 deletions docs/test-inventory.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ _Generated by `make test-inventory` ([`tests/inventory.sh`](../tests/inventory.s
edit by hand** — re-run the target to refresh. See [Testing Strategy](testing-strategy.md) for
how the tiers fit together._

**Totals:** 608 dashboard unit tests · 12 contract tests · 60 frontend
tests · 51 `pithead` shell sections · 17 harness self-test sections ·
**Totals:** 609 dashboard unit tests · 12 contract tests · 64 frontend
tests · 52 `pithead` shell sections · 17 harness self-test sections ·
9 live config scenarios (17 axis values) · 7 mini-stack scenarios.

> Counts are **test functions / named cases** (parametrized pytest cases expand to more at
Expand All @@ -14,9 +14,9 @@ tests · 51 `pithead` shell sections · 17 harness self-test sections ·

| Tier | Suite | Cases |
|---|---|---|
| 1 — Unit | dashboard pytest | 608 |
| 1 — Unit | frontend (node --test) | 60 |
| 1 — Unit | `pithead` shell suite | 51 sections |
| 1 — Unit | dashboard pytest | 609 |
| 1 — Unit | frontend (node --test) | 64 |
| 1 — Unit | `pithead` shell suite | 52 sections |
| 1 — Unit | compose interpolation + hardening (#90) | 1 |
| 2 — Contract | fake-daemon clients | 12 |
| 3 — Mini-stack | docker control-plane scenarios | 7 |
Expand All @@ -27,7 +27,7 @@ tests · 51 `pithead` shell sections · 17 harness self-test sections ·

## Tier 1 — Unit & component

### Dashboard (pytest) — 608 tests
### Dashboard (pytest) — 609 tests

#### tests/client/test_docker_control.py — 6
- test_tcp_scheme_rewritten_to_http
Expand Down Expand Up @@ -450,7 +450,7 @@ tests · 51 `pithead` shell sections · 17 harness self-test sections ·
- test_routed_fraction_in_unit_interval
- test_max_donation_fraction_within_reserve_bounds

#### tests/service/test_storage_service.py — 30
#### tests/service/test_storage_service.py — 31
- test_get_tiers
- test_default_xvb_stats
- test_partial_updates
Expand All @@ -472,6 +472,7 @@ tests · 51 `pithead` shell sections · 17 harness self-test sections ·
- test_roundtrip
- test_empty_snapshot_not_saved
- test_load_missing_snapshot_returns_none
- test_unserializable_snapshot_flags_persistence_unhealthy
- test_share_stats_persist_across_instances
- test_state_persists_across_instances
- test_legacy_kv_keys_migrated_on_load
Expand Down Expand Up @@ -691,7 +692,7 @@ tests · 51 `pithead` shell sections · 17 harness self-test sections ·
- test_no_when_in_tier_but_no_share
- test_na_when_xvb_off

### Frontend logic (node --test) — 60 tests
### Frontend logic (node --test) — 64 tests
- withAlpha: appends an 8-bit alpha to a #rrggbb hex
- withAlpha: non-#rrggbb values pass through opaque (a palette change cannot break fills)
- padYAxis: pads the range and clamps the floor to zero
Expand All @@ -710,6 +711,10 @@ tests · 51 `pithead` shell sections · 17 harness self-test sections ·
- WorkersTable renders headers and a row per worker with status classes
- WorkersTable with no workers still renders the headers but no rows
- ProxyTotals footer is hidden until the proxy reports data
- ProxyTotals reddens the rejected figure only when reject_level is high
- WorkersTable surfaces the per-rig api-unreadable and reject badges, and the pool badge variants
- Tari status gates the ✔ on a live gRPC channel, never on active-but-dead (#278/#313)
- Sync gauge shows a ✔ for a done chain and a live percent while syncing
- ComponentHealth shows a Tor-only summary, the topology nodes, and the egress drawer
- ComponentHealth flips to a warning summary when the posture leaks
- ComponentHealth still renders the panel but omits the drawer when egress is absent
Expand Down Expand Up @@ -753,7 +758,7 @@ tests · 51 `pithead` shell sections · 17 harness self-test sections ·
- edgePath: column-crossing edges route orthogonally through a clear lane
- route palette + names cover every route the server can emit

### `pithead` shell suite (tests/stack/run.sh) — 51 sections
### `pithead` shell suite (tests/stack/run.sh) — 52 sections
- unit: resolve_default
- unit: assert_safe_dir
- unit: is_public_ip classifier (#113)
Expand Down Expand Up @@ -785,6 +790,7 @@ tests · 51 `pithead` shell sections · 17 harness self-test sections ·
- unit: node credential helpers
- unit: randomx_boot_params (#176)
- unit: grub heal + boot-param insert (#176)
- unit: ensure_owner conditional recursive chown (#255)
- unit: disk_component_gib
- unit: check_disk_grouped (mocked df)
- node configs: no clearnet DNS egress (#161 monerod, #162 tari)
Expand Down Expand Up @@ -881,6 +887,7 @@ tests · 51 `pithead` shell sections · 17 harness self-test sections ·
- TARI_REQUIRED env matches config
- XVB_ENABLED matches config
- XvB stats + auto-register wired to the Tor SOCKS (#206/#163)
- apply migrates root-owned CONTENTS to the container uid (#255)
- backup archive contains .env
- backup archive contains config.json
- backup/rollback prerequisites present (writable backups/, tar)
Expand All @@ -893,6 +900,9 @@ tests · 51 `pithead` shell sections · 17 harness self-test sections ·
- default-off stratum: no --access-password live (#152)
- disk headroom on the live chain FS (${avail} GiB free)
- egress posture section present
- firewall apply degrades gracefully on an insert failure (rc 0)
- firewall reinstated after recovery
- insert failure leaves NO half-open firewall (rolled back)
- memory ceiling live on $svc (#132)
- monero auto-transitioned clearnet→Tor (#234)
- monero display mode determinate ($dmode)
Expand Down Expand Up @@ -961,5 +971,5 @@ tests · 51 `pithead` shell sections · 17 harness self-test sections ·

---

_Grand total: **764** enumerated cases/sections across the four tiers (plus the live
_Grand total: **770** enumerated cases/sections across the four tiers (plus the live
lifecycle and fault-injection phases, which are exercised on a real server)._
43 changes: 43 additions & 0 deletions docs/testing-strategy.md
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,49 @@ These are deliberately not yet covered and are the road to full production confi
separate exercise (`SECURITY.md`). These tests pin the decisions we've already made; they don't
find new ones.

### Coverage-audit follow-ups (2026-06)

A source-vs-tests audit added Tier-1 coverage for a real bug (snapshot serialization failure left
the #131 persistence badge green), the firewall install-failure rollback (#270), the wallet
hard-fail guards (#250), remote-host/subnet validation (#180), `ensure_owner`'s whole-tree scan
(#255), and several dashboard render branches (per-worker api/reject badges, XvB/Unknown pool
badges, the #278/#313 Tari-✔ invariant, `Gauge` done vs syncing). The gaps it surfaced that are
**not yet covered at an automatable tier** — all needing Docker or the real box, so they land at
tier 3/4:

- **Firewall rollback, real kernel.** ✅ Now a tier-4 `--fault-injection` case: it shadows `iptables`
with a wrapper that fails every `-I` insert, re-runs `apply_tor_egress_firewall`, and asserts the
box ends fail-closed (no `pithead-tor-egress` rule left half-installed), then reinstates the real
firewall. The tier-1 stubbed test proves the control flow; this proves the real-kernel strip.
Runs at the release gate only (destructive-then-restored, local box).
- **`ensure_owner` real mixed-ownership tree.** ✅ Now a tier-4 `--lifecycle` step: it plants a
root-owned file under the dashboard data dir and asserts the pool-flip `apply` (which runs
`ensure_directories` → `ensure_owner`) chowns it to uid 1000 — the #255 "scan contents, not just
the dir" regression. Runs at the release gate only (needs root to create a foreign-uid inode).
- **Real-container monerod failover in PR CI.** The primary-node reject/readmit cycle only runs on
the manual tier-4 box (`--fault-injection`); the mini-stack (tier 3) breaks Tari, not monerod.
- **Non-blocking-Tari "ignore" path with real containers.** Unit-tested only; the mini-stack proves
Tari-down-while-required (reject) but never Tari-down-while-optional (keep mining). This is the
path that silently kills yield if it regresses to a reject.
- **monerod busy / mid-reorg failover.** The contract test proves the client reads a busy node as
unreachable; no mini-stack or fault-injection scenario asserts the dashboard actually rejects
workers on a busy-but-alive node (a real reorg state, distinct from a clean stop).
- **Double outage, both-must-recover.** Unit-tested (monerod ∧ Tari down → readmit only when both
healthy); never driven with real containers, so the recovery ordering is unproven end-to-end.
- **Partial-start / stop-failure idempotency.** The control loop's "container fails to start/stop →
retry next cycle" is unit-only; no tier-3/4 scenario injects a docker start/stop error.
- **`pithead doctor` on a real box.** Only its exit code is unit-tested; its NTP/clock-drift check
(mining is time-sensitive) is never fault-injected or asserted at tier 4.
- **Disk-full / ENOSPC verdict.** Only a disk-headroom *warning* is checked; a real
container-unhealthy-on-ENOSPC verdict is never forced, though the disk badge + db-write-error
paths are unit-tested.
- **Tor-container-down partial start.** No Caddy/Tor services exist in the mini-stack compose, so
"what happens when the Tor container is down" (SOCKS unreachable) is exercised at no tier below
the manual real box; every all-Tor egress assertion is read-path only.
- **Insecure + main matrix row.** `dashboard.secure=false` only ever pairs with `p2pool.pool=nano`,
so the Caddy-scheme / bind assertions for insecure mode are entangled with the nano path; an
insecure+main regression has no row.

## Adding a scenario

- Logic (a new decision/branch) → a unit test (tier 1). Cheapest, fastest.
Expand Down
Loading