From 72992af7c08f612cd4eb12d9d101ad56ddf54960 Mon Sep 17 00:00:00 2001 From: SyniRon <66834451+SyniRon@users.noreply.github.com> Date: Mon, 8 Jun 2026 08:53:23 -0400 Subject: [PATCH 1/3] contract: replay golden witnesses over-ceiling repeated uint32 filter 400 (#196) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add battery case tickets/list_category_over_uint32 — GET /api/v1/tickets?category_id=4294967296 with read:tickets auth — and record its golden. The repeated categoryId[] filter's array items declare maximum: 4294967295, pinned structurally by TestSpec_Uint32ParamsBounded, but libopenapi-validator does not enforce numeric maximum on array items and no golden replayed the binder's response-level 400 for an over-ceiling value. This case witnesses status 400, code 3, and the binder's "parsing list \"category_id\": ... value out of range", closing the declare+replay coverage gap for the array surface (the repeated-filter complement of the scalar list_modified_since_over_uint32 case). Coverage only — no binder behavior change; the structural spec test is untouched. This was generated by AI --- contract/battery.go | 2 ++ .../list_category_over_uint32.golden.json | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 contract/goldens/tickets/list_category_over_uint32.golden.json diff --git a/contract/battery.go b/contract/battery.go index 29539f9..4ba0212 100644 --- a/contract/battery.go +++ b/contract/battery.go @@ -173,6 +173,8 @@ func Cases() []Case { Notes: "Unknown query parameter ignored: identical payload to list_default."}, {Name: "tickets/list_modified_since_over_uint32", Method: "GET", Path: "/api/v1/tickets?modified_since=4294967296", Auth: AuthReadTickets, Notes: "modifiedSince is uint32-backed: the handler rejects values above 4294967295 with 'value out of range', pinned here at the response level. Sent via the snake_case alias 'modified_since', which the spec validator treats as an ignored unknown param — so this does NOT exercise the request-level maximum; that bound is pinned by TestSpec_Uint32ParamsBounded (and enforced on the camelCase modifiedSince key by the request validator, cf. get_by_id_over_uint32)."}, + {Name: "tickets/list_category_over_uint32", Method: "GET", Path: "/api/v1/tickets?category_id=4294967296", Auth: AuthReadTickets, + Notes: "categoryId[] is a repeated uint32 filter whose array items carry maximum: 4294967295: the binder rejects an over-ceiling value with 'value out of range', pinned here at the response level — the repeated-filter complement of list_modified_since_over_uint32. Sent via the snake_case alias 'category_id', which the spec validator treats as an ignored unknown param (libopenapi-validator does not enforce numeric maximum on array items anyway), so this does NOT exercise the request-level bound; the DECLARED items maximum is pinned structurally by TestSpec_Uint32ParamsBounded. This case adds the REPLAY half for the array surface."}, // --- Tickets: get by id / ref --------------------------------------- {Name: "tickets/get_by_id_happy", Method: "GET", Path: "/api/v1/tickets/42", Auth: AuthReadTickets, diff --git a/contract/goldens/tickets/list_category_over_uint32.golden.json b/contract/goldens/tickets/list_category_over_uint32.golden.json new file mode 100644 index 0000000..072d762 --- /dev/null +++ b/contract/goldens/tickets/list_category_over_uint32.golden.json @@ -0,0 +1,16 @@ +{ + "auth": "read:tickets", + "body": { + "code": 3, + "details": [], + "message": "parsing list \"category_id\": strconv.ParseUint: parsing \"4294967296\": value out of range" + }, + "case": "tickets/list_category_over_uint32", + "header": { + "Content-Type": "application/json" + }, + "method": "GET", + "notes": "categoryId[] is a repeated uint32 filter whose array items carry maximum: 4294967295: the binder rejects an over-ceiling value with 'value out of range', pinned here at the response level — the repeated-filter complement of list_modified_since_over_uint32. Sent via the snake_case alias 'category_id', which the spec validator treats as an ignored unknown param (libopenapi-validator does not enforce numeric maximum on array items anyway), so this does NOT exercise the request-level bound; the DECLARED items maximum is pinned structurally by TestSpec_Uint32ParamsBounded. This case adds the REPLAY half for the array surface.", + "path": "/api/v1/tickets?category_id=4294967296", + "status": 400 +} From 248072cf1adbbfc5e8a0408918ba654865c79f6c Mon Sep 17 00:00:00 2001 From: SyniRon <66834451+SyniRon@users.noreply.github.com> Date: Mon, 8 Jun 2026 08:54:14 -0400 Subject: [PATCH 2/3] rest: make trustedProxies cache once-write/read-after structural (#197) Wrap the trusted-proxy cache in atomic.Pointer[[]*net.IPNet] behind an unexported loadTrustedProxies accessor. InitTrustedProxies now publishes via an atomic Store rather than a direct assignment; clientIP loads via the accessor. This turns the "write once before any listener serves, read-only after" invariant from a comment-only convention into a structural one and removes the theoretical data race surfaced in the #194 type-design review. Behavior is identical: empty/unset still trusts nothing, a non-empty malformed value is still a fatal startup error (made fatal by the composition root), and a read before init loads the zero value (nil) -> trust-nothing, the safe direction. resolveClientIP stays pure and unchanged. InitTrustedProxies keeps its exported `() error` signature; servers/server.go is untouched and its wiring guard still passes. Adjust the test helper that poked the cache directly to use the atomic API, and add a concurrent-reads-during-publish test the race detector exercises. This was generated by AI --- rest/clientip.go | 27 ++++++++++++++++++++------- rest/clientip_test.go | 37 +++++++++++++++++++++++++++++++++---- 2 files changed, 53 insertions(+), 11 deletions(-) diff --git a/rest/clientip.go b/rest/clientip.go index 93f1375..03a7501 100644 --- a/rest/clientip.go +++ b/rest/clientip.go @@ -5,6 +5,7 @@ import ( "net" "net/http" "strings" + "sync/atomic" "github.com/spf13/viper" ) @@ -16,11 +17,23 @@ import ( // globals) so the full trust rule is exhaustively table-testable; clientIP is // the thin ambient wrapper the log sites call over the cached set. -// trustedProxies is the parsed TRUSTED_PROXIES set, populated ONCE at startup -// by InitTrustedProxies and read by clientIP. nil/empty = trust nothing, which -// makes resolveClientIP ignore every forwarding header (the prior -// proxy-address logging behavior). -var trustedProxies []*net.IPNet +// trustedProxies holds the parsed TRUSTED_PROXIES set, published ONCE at +// startup by InitTrustedProxies and read by clientIP. The atomic pointer makes +// the once-write/read-after lifecycle structural: a reader can never observe a +// half-written set, and a read before the publish loads the zero value (nil) = +// trust nothing, which makes resolveClientIP ignore every forwarding header +// (the prior proxy-address logging behavior). Read it only via +// loadTrustedProxies. +var trustedProxies atomic.Pointer[[]*net.IPNet] + +// loadTrustedProxies returns the published trusted-proxy set, or nil before +// InitTrustedProxies has published one — the safe trust-nothing direction. +func loadTrustedProxies() []*net.IPNet { + if p := trustedProxies.Load(); p != nil { + return *p + } + return nil +} // parseTrustedProxies turns the comma-split TRUSTED_PROXIES specs into CIDR // networks. A bare IP is normalized to a host route (/32 for v4, /128 for v6). @@ -139,12 +152,12 @@ func InitTrustedProxies() error { if err != nil { return err } - trustedProxies = nets + trustedProxies.Store(&nets) return nil } // clientIP is the thin ambient wrapper the two 401 log sites call. It resolves // against the cached trusted set populated by InitTrustedProxies. func clientIP(r *http.Request) string { - return resolveClientIP(r, trustedProxies) + return resolveClientIP(r, loadTrustedProxies()) } diff --git a/rest/clientip_test.go b/rest/clientip_test.go index fd0b94b..bfaadc4 100644 --- a/rest/clientip_test.go +++ b/rest/clientip_test.go @@ -4,6 +4,7 @@ import ( "net" "net/http" "net/http/httptest" + "sync" "testing" "github.com/spf13/viper" @@ -16,11 +17,11 @@ import ( func withTrustedProxiesEnv(t *testing.T, value string) { t.Helper() prior := viper.GetString("TRUSTED_PROXIES") - priorSet := trustedProxies + priorSet := trustedProxies.Load() viper.Set("TRUSTED_PROXIES", value) t.Cleanup(func() { viper.Set("TRUSTED_PROXIES", prior) - trustedProxies = priorSet + trustedProxies.Store(priorSet) }) } @@ -192,7 +193,7 @@ func TestParseTrustedProxies_MalformedEntryErrors(t *testing.T) { func TestInitTrustedProxies_EmptyTrustsNothing(t *testing.T) { withTrustedProxiesEnv(t, "") require.NoError(t, InitTrustedProxies()) - require.Empty(t, trustedProxies) + require.Empty(t, loadTrustedProxies()) } func TestInitTrustedProxies_NonEmptyMalformedReturnsError(t *testing.T) { @@ -203,5 +204,33 @@ func TestInitTrustedProxies_NonEmptyMalformedReturnsError(t *testing.T) { func TestInitTrustedProxies_ParsesAndCaches(t *testing.T) { withTrustedProxiesEnv(t, "10.0.0.0/8, 192.168.1.1") require.NoError(t, InitTrustedProxies()) - require.Len(t, trustedProxies, 2) + require.Len(t, loadTrustedProxies(), 2) +} + +// TestTrustedProxies_ConcurrentReadsDuringPublish proves the publish/read path +// is race-clean: clientIP readers running while InitTrustedProxies publishes +// must never observe a torn set. Under -race this is the structural guarantee +// the atomic.Pointer buys over the old plain-slice assignment. +func TestTrustedProxies_ConcurrentReadsDuringPublish(t *testing.T) { + withTrustedProxiesEnv(t, "10.0.0.0/8") + + var wg sync.WaitGroup + for range 8 { + wg.Add(1) + go func() { + defer wg.Done() + r := reqWith("10.0.0.5:443", map[string]string{ + "X-Forwarded-For": "203.0.113.9, 10.0.0.4", + }) + for range 200 { + // Result is non-deterministic mid-publish (peer or client), + // but the read itself must never tear or race. + _ = clientIP(r) + } + }() + } + for range 50 { + require.NoError(t, InitTrustedProxies()) + } + wg.Wait() } From c341a56c22ed93f074b4533f08d7248f11038e66 Mon Sep 17 00:00:00 2001 From: SyniRon <66834451+SyniRon@users.noreply.github.com> Date: Mon, 8 Jun 2026 08:56:39 -0400 Subject: [PATCH 3/3] datastores: warn loudly on zero-row read of a must-be-populated phrase family (#195) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit loadPhraseMap returned (emptyMap, nil) whenever the xf_phrase query succeeded but matched zero rows, so the reference cache warmed a fully blank status/priority map and every statusName/priorityName resolved to "" on healthy 200s with nothing in logs, metrics, or errors — the exact silent-degradation mode #189 fixed at the title-prefix level, still latent one layer down (any future drift: another add-on phrase rename, a collation/charset change, a language_id regression). Introduce phraseFamily{prefix, name, mustBePopulated} for the three NF Tickets families (status/priority must be populated; prefix may be legitimately sparse). After the read, warnIfEmpty surfaces a zero-row must-be-populated family at Warn severity, naming the family, and STILL returns the empty map with a nil error — the refresh must not hard-fail on one drifted family and take down the rest of the cache (categories etc.). The healthy populated path is unchanged, and per-id misses inside a populated family still resolve to "" (the guard only fires on the all-or-nothing empty case). The documented status/priority/prefix title asymmetry (the CAUTION block) is preserved, not normalised. Tests: datastores/phrasemap_test.go pins the decision (empty status and priority warn naming the family; empty prefix is silent; any populated map never warns) capturing datastores.Warn. tickets_harness_test.go adds an end-to-end MariaDB-harness test that deletes the seeded status phrases, asserts LoadStatusNames returns an empty map with nil error and emits a Warn naming the family. This was generated by AI --- datastores/phrasemap_test.go | 77 ++++++++++++++++++++++++++++++ datastores/tickets.go | 71 +++++++++++++++++++-------- datastores/tickets_harness_test.go | 38 +++++++++++++++ 3 files changed, 167 insertions(+), 19 deletions(-) create mode 100644 datastores/phrasemap_test.go diff --git a/datastores/phrasemap_test.go b/datastores/phrasemap_test.go new file mode 100644 index 0000000..ca700c2 --- /dev/null +++ b/datastores/phrasemap_test.go @@ -0,0 +1,77 @@ +package datastores + +import ( + "bytes" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// Pure unit tests for the zero-row guard on phrase-map families (#195). +// The DB read itself (loadPhraseMap against a real xf_phrase) is pinned +// end-to-end on the MariaDB harness in tickets_harness_test.go; here we +// pin the decision: a *must-be-populated* family that comes back empty is +// surfaced loudly (Warn, naming the family), while the legitimately-sparse +// prefix family stays silent and the healthy populated path never warns. + +// captureWarn swaps datastores.Warn's output to a buffer for the duration +// of fn, restoring it afterwards, and returns what was written. +func captureWarn(t *testing.T, fn func()) string { + t.Helper() + var buf bytes.Buffer + prevOut := Warn.Writer() + prevFlags := Warn.Flags() + prevPrefix := Warn.Prefix() + Warn.SetOutput(&buf) + Warn.SetFlags(0) + t.Cleanup(func() { + Warn.SetOutput(prevOut) + Warn.SetFlags(prevFlags) + Warn.SetPrefix(prevPrefix) + }) + fn() + return buf.String() +} + +func TestPhraseFamily_EmptyMustBePopulatedFamilyWarnsNamingFamily(t *testing.T) { + cases := []struct { + fam phraseFamily + wantName string + }{ + {familyStatus, "status"}, + {familyPriority, "priority"}, + } + for _, c := range cases { + t.Run(c.wantName, func(t *testing.T) { + out := captureWarn(t, func() { + c.fam.warnIfEmpty(map[uint32]string{}) + }) + require.NotEmpty(t, out, "an empty must-be-populated family must surface a warning, got silence") + assert.Contains(t, strings.ToLower(out), c.wantName, + "the warning must name the %q family so the drift is diagnosable", c.wantName) + }) + } +} + +func TestPhraseFamily_EmptyPrefixFamilyIsSilent(t *testing.T) { + out := captureWarn(t, func() { + familyPrefix.warnIfEmpty(map[uint32]string{}) + }) + assert.Empty(t, out, + "the prefix family may be legitimately sparse — an empty read must not warn") +} + +func TestPhraseFamily_PopulatedFamilyNeverWarns(t *testing.T) { + // A populated map is the healthy path — no warning regardless of which + // family, and individual missing ids inside it are NOT this guard's + // concern (they intentionally resolve to "" via the cache). + for _, fam := range []phraseFamily{familyStatus, familyPriority, familyPrefix} { + out := captureWarn(t, func() { + fam.warnIfEmpty(map[uint32]string{1: "Open"}) + }) + assert.Empty(t, out, + "a family that returned rows must behave exactly as today (no warning)") + } +} diff --git a/datastores/tickets.go b/datastores/tickets.go index 2a169f1..913f962 100644 --- a/datastores/tickets.go +++ b/datastores/tickets.go @@ -38,59 +38,92 @@ var ErrInvalidCursor = errors.New("invalid cursor") // Compile-time assertion: Mysql must implement referencecache.Loader. var _ referencecache.Loader = (*Mysql)(nil) -// xf_phrase title prefixes for the three NF Tickets reference families. +// phraseFamily names one NF Tickets reference family in xf_phrase: the +// LIKE prefix its titles carry, a short human name for diagnostics, and +// whether the family is ever legitimately empty. // -// CAUTION — the add-on's phrase naming is ASYMMETRIC, not a typo below: -// status and priority titles carry NO `ticket_` infix -// (`nf_tickets_status.`, `nf_tickets_priority.`), but prefix titles -// DO (`nf_tickets_ticket_prefix.`). This mirrors exactly what the NF +// CAUTION — the add-on's phrase naming is ASYMMETRIC, not a typo in the +// familyStatus/familyPriority/familyPrefix prefixes below: status and +// priority titles carry NO `ticket_` infix (`nf_tickets_status.`, +// `nf_tickets_priority.`), but prefix titles DO +// (`nf_tickets_ticket_prefix.`). This mirrors exactly what the NF // Tickets add-on writes to xf_phrase in production; querying status/priority // with the `ticket_` infix matches zero rows and silently warms an empty // cache (every statusName/priorityName resolves to ""). Do not "normalise" // these to a uniform shape — the inconsistency is in the source data, and // the testdb fixtures (testdb/fixtures.sql) seed these exact forms. -const ( - phrasePrefixStatus = "nf_tickets_status." - phrasePrefixPriority = "nf_tickets_priority." - phrasePrefixPrefix = "nf_tickets_ticket_prefix." +// +// mustBePopulated guards the silent-degradation mode of #195: status and +// priority are never empty in production, so a zero-row read for them means +// the source data has drifted (an add-on phrase rename, a collation/charset +// change, a language_id regression) and must be surfaced loudly instead of +// warming a blank cache (every statusName/priorityName then resolves to ""). +// prefix MAY be legitimately sparse, so it stays exempt. +type phraseFamily struct { + prefix string + name string + mustBePopulated bool +} + +var ( + familyStatus = phraseFamily{prefix: "nf_tickets_status.", name: "status", mustBePopulated: true} + familyPriority = phraseFamily{prefix: "nf_tickets_priority.", name: "priority", mustBePopulated: true} + familyPrefix = phraseFamily{prefix: "nf_tickets_ticket_prefix.", name: "prefix", mustBePopulated: false} ) +// warnIfEmpty surfaces a zero-row read for a must-be-populated family at +// Warn severity, naming the family. It is intentionally NOT triggered by +// individual missing ids within a populated map — those resolve to "" by +// design (new add-on records can land between refresh ticks). A legitimately +// sparse family (prefix) never warns. +func (f phraseFamily) warnIfEmpty(out map[uint32]string) { + if f.mustBePopulated && len(out) == 0 { + Warn.Printf( + "reference cache: phrase family %q (%s%%) read zero rows — status/priority names will resolve blank on healthy responses; the xf_phrase titles for this family have likely drifted", + f.name, f.prefix) + } +} + func (ds *Mysql) LoadStatusNames(ctx context.Context) (map[uint32]string, error) { - return ds.loadPhraseMap(ctx, phrasePrefixStatus) + return ds.loadPhraseMap(ctx, familyStatus) } func (ds *Mysql) LoadPriorityNames(ctx context.Context) (map[uint32]string, error) { - return ds.loadPhraseMap(ctx, phrasePrefixPriority) + return ds.loadPhraseMap(ctx, familyPriority) } func (ds *Mysql) LoadPrefixNames(ctx context.Context) (map[uint32]string, error) { - return ds.loadPhraseMap(ctx, phrasePrefixPrefix) + return ds.loadPhraseMap(ctx, familyPrefix) } -// loadPhraseMap reads rows from xf_phrase whose title starts with the given -// prefix (one of the phrasePrefix* consts above, e.g. "nf_tickets_status."), -// parses the trailing integer id, -// and returns id -> phrase_text. Rows where the trailing part isn't a +// loadPhraseMap reads rows from xf_phrase whose title starts with the +// family's prefix (e.g. "nf_tickets_status."), parses the trailing integer +// id, and returns id -> phrase_text. Rows where the trailing part isn't a // uint32 are skipped (not an error — the phrase table is shared, so // unrelated rows can be in scope of the LIKE pattern at the edges). -func (ds *Mysql) loadPhraseMap(ctx context.Context, prefix string) (map[uint32]string, error) { +// +// A successful read that matches zero rows is NOT an error (the refresh must +// not hard-fail on one drifted family and take down the rest of the cache), +// but for a must-be-populated family it is surfaced via warnIfEmpty (#195). +func (ds *Mysql) loadPhraseMap(ctx context.Context, fam phraseFamily) (map[uint32]string, error) { var rows []struct { Title string `gorm:"column:title"` PhraseText string `gorm:"column:phrase_text"` } tx := ds.Db.WithContext(ctx). - Raw(`SELECT title, phrase_text FROM xf_phrase WHERE title LIKE ?`, prefix+"%"). + Raw(`SELECT title, phrase_text FROM xf_phrase WHERE title LIKE ?`, fam.prefix+"%"). Scan(&rows) if tx.Error != nil { return nil, tx.Error } out := map[uint32]string{} for _, r := range rows { - idStr := r.Title[len(prefix):] + idStr := r.Title[len(fam.prefix):] parsed, err := strconv.ParseUint(idStr, 10, 32) if err != nil { continue } out[uint32(parsed)] = r.PhraseText } + fam.warnIfEmpty(out) return out, nil } diff --git a/datastores/tickets_harness_test.go b/datastores/tickets_harness_test.go index 473777e..047c6c6 100644 --- a/datastores/tickets_harness_test.go +++ b/datastores/tickets_harness_test.go @@ -1,8 +1,10 @@ package datastores_test import ( + "bytes" "context" "errors" + "strings" "testing" "github.com/7cav/api/datastores" @@ -566,3 +568,39 @@ func messagePositions(msgs []*proto.Message) []uint32 { } return out } + +// A must-be-populated phrase family (status) that reads zero rows from +// xf_phrase must surface a Warn naming the family and still return an empty +// map with a nil error — the refresh must NOT hard-fail on one drifted +// family (#195). This drives the real loader against the MariaDB harness +// with the status phrases deleted to reproduce the production drift mode. +func TestLoadStatusNames_ZeroRowsWarnsButDoesNotFail(t *testing.T) { + ds := openHarnessDatastore(t) + + // Remove the seeded status phrases so the (correct) LIKE prefix now + // matches zero rows — the exact silent-blank mode #189 fixed at the + // prefix level and #195 guards one layer down. + if err := ds.Db.Exec(`DELETE FROM xf_phrase WHERE title LIKE 'nf_tickets_status.%'`).Error; err != nil { + t.Fatalf("clearing status phrases: %v", err) + } + + var buf bytes.Buffer + prev := datastores.Warn.Writer() + datastores.Warn.SetOutput(&buf) + t.Cleanup(func() { datastores.Warn.SetOutput(prev) }) + + names, err := ds.LoadStatusNames(context.Background()) + if err != nil { + t.Fatalf("LoadStatusNames must not hard-fail on a zero-row read, got %v", err) + } + if len(names) != 0 { + t.Fatalf("status phrases were deleted, want empty map, got %v", names) + } + logged := buf.String() + if logged == "" { + t.Fatal("an empty status family must surface a Warn, got silence (the #195 silent-blank mode)") + } + if !strings.Contains(strings.ToLower(logged), "status") { + t.Errorf("the Warn must name the status family, got %q", logged) + } +}