diff --git a/packages/providers/src/__tests__/lexical-substitution.test.ts b/packages/providers/src/__tests__/lexical-substitution.test.ts index b0ef812..25674c8 100644 --- a/packages/providers/src/__tests__/lexical-substitution.test.ts +++ b/packages/providers/src/__tests__/lexical-substitution.test.ts @@ -74,4 +74,25 @@ describe("applyLexicalSubstitution", () => { expect(result).toContain("zumo"); expect(result).not.toContain("jugo"); }); + + it("does not swap ambiguous slang terms with common non-slang meanings", () => { + // "botón" = button (UI) but also es-UY slang for "cop". Must NOT swap to "tomba". + expect(applyLexicalSubstitution("Haga clic en el botón para continuar.", "es-CO")) + .toContain("botón"); + + // "agente" = agent (generic) but also es-EC variant for "cop". Must NOT swap. + expect(applyLexicalSubstitution("El agente secreto fue descubierto.", "es-CO")) + .toContain("agente"); + + // "cuero" = leather but also es-DO slang for "money" and "cop". Must NOT swap. + expect(applyLexicalSubstitution("El cuero del zapato está dañado.", "es-CO")) + .toContain("cuero"); + }); + + it("still swaps unambiguous slang terms correctly", () => { + // "paco" (es-CL cop slang) is unambiguous — should still swap + const result = applyLexicalSubstitution("El paco está en la esquina.", "es-CO"); + expect(result).toContain("tomba"); + expect(result).not.toContain("paco"); + }); }); diff --git a/packages/types/src/dialectal-dictionary.json b/packages/types/src/dialectal-dictionary.json index 2197b00..83360bc 100644 --- a/packages/types/src/dialectal-dictionary.json +++ b/packages/types/src/dialectal-dictionary.json @@ -44897,7 +44897,8 @@ "es-UY": { "term": "botón", "frequency": 1, - "register": "informal" + "register": "informal", + "ambiguous": true }, "es-PY": { "term": "mbae", @@ -44942,7 +44943,8 @@ "es-EC": { "term": "agente", "frequency": 1, - "register": "universal" + "register": "universal", + "ambiguous": true }, "es-BO": { "term": "choco", @@ -44952,7 +44954,8 @@ "es-DO": { "term": "cuero", "frequency": 1, - "register": "informal" + "register": "informal", + "ambiguous": true }, "es-PR": { "term": "policía", @@ -46001,7 +46004,8 @@ "es-DO": { "term": "cuero", "frequency": 2, - "register": "informal" + "register": "informal", + "ambiguous": true }, "es-PR": { "term": "chavos", diff --git a/packages/types/src/dialectal-dictionary.ts b/packages/types/src/dialectal-dictionary.ts index 49521c8..c84264f 100644 --- a/packages/types/src/dialectal-dictionary.ts +++ b/packages/types/src/dialectal-dictionary.ts @@ -19,6 +19,10 @@ export interface Variant { frequency: 1 | 2 | 3; register: "formal" | "informal" | "universal"; notes?: string; + /** When true, this variant has a common non-slang meaning and should not + * be used as an avoid-term for lexical substitution. Prevents false + * positives like "botón" (button) being swapped to "tomba" (cop slang). */ + ambiguous?: boolean; } export interface DictionaryEntry { diff --git a/packages/types/src/dialectal-vocabulary.ts b/packages/types/src/dialectal-vocabulary.ts index b556f92..e217946 100644 --- a/packages/types/src/dialectal-vocabulary.ts +++ b/packages/types/src/dialectal-vocabulary.ts @@ -73,7 +73,13 @@ export function getVocabularyForDialect(dialect: SpanishDialect): VocabularySwap const variant = resolveVariant(entry, dialect); if (!variant) continue; const allTerms = getAllTerms(entry); - const avoidTerms = allTerms.filter(t => t !== variant.term); + // Build avoid-terms, excluding ambiguous variants that have common + // non-slang meanings (e.g. "botón" = button, not just es-UY cop slang). + const ambiguousTerms = new Set(); + for (const v of Object.values(entry.variants ?? {})) { + if (v?.ambiguous) ambiguousTerms.add(v.term); + } + const avoidTerms = allTerms.filter(t => t !== variant.term && !ambiguousTerms.has(t)); swaps.push({ concept: entry.concept, englishGloss: entry.englishGloss,