Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions packages/providers/src/__tests__/lexical-substitution.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,25 @@ describe("applyLexicalSubstitution", () => {
expect(result).toContain("zumo");
expect(result).not.toContain("jugo");
});

it("does not swap ambiguous slang terms with common non-slang meanings", () => {
// "botón" = button (UI) but also es-UY slang for "cop". Must NOT swap to "tomba".
expect(applyLexicalSubstitution("Haga clic en el botón para continuar.", "es-CO"))
.toContain("botón");

// "agente" = agent (generic) but also es-EC variant for "cop". Must NOT swap.
expect(applyLexicalSubstitution("El agente secreto fue descubierto.", "es-CO"))
.toContain("agente");

// "cuero" = leather but also es-DO slang for "money" and "cop". Must NOT swap.
expect(applyLexicalSubstitution("El cuero del zapato está dañado.", "es-CO"))
.toContain("cuero");
});

it("still swaps unambiguous slang terms correctly", () => {
// "paco" (es-CL cop slang) is unambiguous — should still swap
const result = applyLexicalSubstitution("El paco está en la esquina.", "es-CO");
expect(result).toContain("tomba");
expect(result).not.toContain("paco");
});
});
12 changes: 8 additions & 4 deletions packages/types/src/dialectal-dictionary.json
Original file line number Diff line number Diff line change
Expand Up @@ -44897,7 +44897,8 @@
"es-UY": {
"term": "botón",
"frequency": 1,
"register": "informal"
"register": "informal",
"ambiguous": true
},
"es-PY": {
"term": "mbae",
Expand Down Expand Up @@ -44942,7 +44943,8 @@
"es-EC": {
"term": "agente",
"frequency": 1,
"register": "universal"
"register": "universal",
"ambiguous": true
},
"es-BO": {
"term": "choco",
Expand All @@ -44952,7 +44954,8 @@
"es-DO": {
"term": "cuero",
"frequency": 1,
"register": "informal"
"register": "informal",
"ambiguous": true
},
"es-PR": {
"term": "policía",
Expand Down Expand Up @@ -46001,7 +46004,8 @@
"es-DO": {
"term": "cuero",
"frequency": 2,
"register": "informal"
"register": "informal",
"ambiguous": true
},
"es-PR": {
"term": "chavos",
Expand Down
4 changes: 4 additions & 0 deletions packages/types/src/dialectal-dictionary.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ export interface Variant {
frequency: 1 | 2 | 3;
register: "formal" | "informal" | "universal";
notes?: string;
/** When true, this variant has a common non-slang meaning and should not
* be used as an avoid-term for lexical substitution. Prevents false
* positives like "botón" (button) being swapped to "tomba" (cop slang). */
ambiguous?: boolean;
}

export interface DictionaryEntry {
Expand Down
8 changes: 7 additions & 1 deletion packages/types/src/dialectal-vocabulary.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,13 @@ export function getVocabularyForDialect(dialect: SpanishDialect): VocabularySwap
const variant = resolveVariant(entry, dialect);
if (!variant) continue;
const allTerms = getAllTerms(entry);
const avoidTerms = allTerms.filter(t => t !== variant.term);
// Build avoid-terms, excluding ambiguous variants that have common
// non-slang meanings (e.g. "botón" = button, not just es-UY cop slang).
const ambiguousTerms = new Set<string>();
for (const v of Object.values(entry.variants ?? {})) {
if (v?.ambiguous) ambiguousTerms.add(v.term);
}
const avoidTerms = allTerms.filter(t => t !== variant.term && !ambiguousTerms.has(t));
swaps.push({
concept: entry.concept,
englishGloss: entry.englishGloss,
Expand Down
Loading