diff --git a/.gitignore b/.gitignore index 74124a748..11912c418 100644 --- a/.gitignore +++ b/.gitignore @@ -103,3 +103,5 @@ ENV/ *~ /uv.lock + +/.codex diff --git a/default_countries.csv b/default_countries.csv index a8a4a6545..788811c39 100644 --- a/default_countries.csv +++ b/default_countries.csv @@ -1,5 +1,6 @@ code af_za +agr_pe am_et ar_aa as_in @@ -33,6 +34,7 @@ gu_in gv_gb he_il hi_in +hif_fj hr_hr hu_hu hy_am @@ -55,6 +57,7 @@ lo_la lt_lt lv_lv mi_nz +miq_ni mk_mk ml_in mn_mn @@ -63,6 +66,7 @@ ms_my mt_mt my_mm ne_np +nhn_mx nl_nl nn_no no_no @@ -82,6 +86,7 @@ rw_rw sa_in sd_in si_lk +shs_ca sk_sk sl_si sq_al @@ -93,6 +98,7 @@ ta_in te_in tg_tj th_th +the_np tk_tm tl_ph tn_za @@ -100,6 +106,7 @@ tr_tr ts_za tt_ru uk_ua +unm_us ur_pk uz_uz ve_za @@ -107,6 +114,7 @@ vi_vn wa_be xh_za yi_us +yuw_pg zu_za ast_es gug_py diff --git a/languages.csv b/languages.csv index e2105fda8..8948d0ff8 100644 --- a/languages.csv +++ b/languages.csv @@ -12,6 +12,7 @@ af,Afrikaans,2,n != 1 afh,Afrihili,2,n != 1 aii,Assyrian Neo-Aramaic,2,n != 1 ain,Ainu (Japan),2,n != 1 +agr,Aguaruna,2,n != 1 ak,Akan,2,n > 1 akk,Akkadian,2,n != 1 ale,Aleut,2,n != 1 @@ -300,6 +301,7 @@ he_IL,Hebrew (Israel),4,(n == 1) ? 0 : ((n == 2) ? 1 : ((n > 10 && n % 10 == 0) hi,Hindi,2,n > 1 hi@hinglish,Hindi (Hinglish),2,n > 1 hi_Latn,Hindi (Latin script),2,n > 1 +hif,Hindi (Fiji),2,n != 1 hil,Hiligaynon,2,n != 1 hit,Hittite,2,n != 1 hmn,Hmong,2,n != 1 @@ -459,6 +461,7 @@ mi,Maori,2,n > 1 mia,Miami,2,n > 1 mic,Mi'kmaq,2,n != 1 min,Minangkabau,2,n != 1 +miq,Mískito,2,n != 1 mis,Milang,1,0 mjw,Karbi,2,n != 1 mk,Macedonian,2,n==1 || n%10==1 ? 0 : 1 @@ -510,6 +513,7 @@ ne,Nepali,2,n != 1 new,Newari,2,n != 1 ng,Ndonga,2,n != 1 ngl,Lomwe,2,n != 1 +nhn,Nahuatl (Central),2,n != 1 nia,Nias,2,n != 1 nij,Ngaju,2,n != 1 niu,Niuean,2,n != 1 @@ -648,6 +652,7 @@ smj,Sami (Lule),3,(n == 1) ? 0 : ((n == 2) ? 1 : 2) sml,Sama (Central),2,n != 1 smn,Sami (Inari),3,(n == 1) ? 0 : ((n == 2) ? 1 : 2) sms,Sami (Skolt),3,(n == 1) ? 0 : ((n == 2) ? 1 : 2) +shs,Shuswap,2,n != 1 sn,Shona,2,n != 1 snk,Soninke,2,n != 1 so,Somali,2,n != 1 @@ -710,6 +715,7 @@ tog,Tonga (Nyasa),2,n != 1 toi,Tonga (Zambia),2,n != 1 tok,Toki Pona,1,0 tpi,Tok Pisin,1,0 +the,Tharu (Chitwania),2,n != 1 tr,Turkish,2,n != 1 trv,Taroko,2,n != 1 ts,Tsonga,2,n != 1 @@ -733,6 +739,7 @@ uga,Ugaritic,2,n != 1 uk,Ukrainian,3,n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2 umb,Umbundu,2,n != 1 und,Undetermined,2,n != 1 +unm,Unami,2,n != 1 unr,Mundari,2,n != 1 ur,Urdu,2,n != 1 ur_IN,Urdu (India),2,n != 1 @@ -770,6 +777,7 @@ xog,Soga,2,n != 1 yao,Yao,2,n != 1 yap,Yapese,2,n != 1 yi,Yiddish,2,n != 1 +yuw,Yau (Morobe Province),2,n != 1 yo,Yoruba,1,0 yua,Yucateco,2,n != 1 yue_Hans,Cantonese (Simplified Han script),1,0 diff --git a/scripts/lint.py b/scripts/lint.py index 2cfeed494..9b07e2066 100755 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -5,6 +5,7 @@ # SPDX-License-Identifier: MIT import csv +import os from gettext import c2py from itertools import chain @@ -23,6 +24,59 @@ def parse_csv(name): return result +def normalize_supported_locale(locale): + return locale.split(".", 1)[0].lower() + + +def is_supported_locale(locale, languages, aliases, default_countries): + candidates = [locale] + normalized = locale + + if "@" in normalized: + base, modifier = normalized.split("@", 1) + if modifier == "euro": + candidates.append(base) + normalized = base + else: + language = base.split("_", 1)[0] + candidates.extend((f"{language}@{modifier}", base)) + normalized = base + + if "_" in normalized: + language = normalized.split("_", 1)[0] + candidates.append(language) + if normalized in default_countries: + candidates.append(language) + + return any( + candidate in languages or candidate in aliases for candidate in candidates + ) + + +def validate_supported_locales(languages, aliases, default_countries): + supported_path = "/usr/share/i18n/SUPPORTED" + if not os.path.exists(supported_path): + return + + missing = set() + + with open(supported_path) as handle: + for line in handle: + if not line.strip(): + continue + locale = normalize_supported_locale(line.split()[0]) + if locale in {"c", "posix"}: + continue + if not is_supported_locale(locale, languages, aliases, default_countries): + missing.add(locale) + + if missing: + raise ValueError( + "Missing locales from /usr/share/i18n/SUPPORTED: " + + ", ".join(sorted(missing)) + ) + + languages = parse_csv("languages.csv") aliases = parse_csv("aliases.csv") cldr = parse_csv("cldr.csv") @@ -80,3 +134,7 @@ def parse_csv(name): raise ValueError( f"Mismatching plural count for {code}: {plural_count} != {calculated}", ) + +validate_supported_locales( + set(languages.keys()), aliases, set(default_countries.keys()) +) diff --git a/weblate_language_data/countries.py b/weblate_language_data/countries.py index 1c473645c..e322a3e63 100644 --- a/weblate_language_data/countries.py +++ b/weblate_language_data/countries.py @@ -15,6 +15,7 @@ # List of default languages, omitting country code should be okay DEFAULT_LANGS: tuple[str, ...] = ( "af_za", + "agr_pe", "am_et", "ar_aa", "as_in", @@ -48,6 +49,7 @@ "gv_gb", "he_il", "hi_in", + "hif_fj", "hr_hr", "hu_hu", "hy_am", @@ -70,6 +72,7 @@ "lt_lt", "lv_lv", "mi_nz", + "miq_ni", "mk_mk", "ml_in", "mn_mn", @@ -78,6 +81,7 @@ "mt_mt", "my_mm", "ne_np", + "nhn_mx", "nl_nl", "nn_no", "no_no", @@ -97,6 +101,7 @@ "sa_in", "sd_in", "si_lk", + "shs_ca", "sk_sk", "sl_si", "sq_al", @@ -108,6 +113,7 @@ "te_in", "tg_tj", "th_th", + "the_np", "tk_tm", "tl_ph", "tn_za", @@ -115,6 +121,7 @@ "ts_za", "tt_ru", "uk_ua", + "unm_us", "ur_pk", "uz_uz", "ve_za", @@ -122,6 +129,7 @@ "wa_be", "xh_za", "yi_us", + "yuw_pg", "zu_za", "ast_es", "gug_py", diff --git a/weblate_language_data/language_codes.py b/weblate_language_data/language_codes.py index ed7843bc9..f67de15b1 100644 --- a/weblate_language_data/language_codes.py +++ b/weblate_language_data/language_codes.py @@ -29,6 +29,7 @@ "af", "afh", "afr", + "agr", "aii", "ain", "ais", @@ -494,6 +495,7 @@ "hi", "hi@hinglish", "hi_latn", + "hif", "hil", "hin", "hit", @@ -735,6 +737,7 @@ "mia", "mic", "min", + "miq", "mis", "mjw", "mk", @@ -810,6 +813,7 @@ "new", "ng", "ngl", + "nhn", "nia", "nij", "niu", @@ -1000,6 +1004,7 @@ "sh", "shi", "shn", + "shs", "si", "sid", "sin", @@ -1107,6 +1112,7 @@ "tgl", "th", "tha", + "the", "ti", "tib", "tig", @@ -1165,6 +1171,7 @@ "umb", "umu", "und", + "unm", "unr", "ur", "ur_in", @@ -1232,6 +1239,7 @@ "yue", "yue_hans", "yue_hant", + "yuw", "za", "zai", "zap", diff --git a/weblate_language_data/languages.py b/weblate_language_data/languages.py index d126ba4af..8773a0bdc 100644 --- a/weblate_language_data/languages.py +++ b/weblate_language_data/languages.py @@ -133,6 +133,15 @@ 2, "n != 1", ), + ( + "agr", + # Translators: Language name for ISO code "agr". The parenthesis clarifies + # variant of the language. It could contain a region, age (Old, Middle, ...) + # or other variant. + _("Aguaruna"), + 2, + "n != 1", + ), ( "ak", # Translators: Language name for ISO code "ak". The parenthesis clarifies @@ -2725,6 +2734,15 @@ 2, "n > 1", ), + ( + "hif", + # Translators: Language name for ISO code "hif". The parenthesis clarifies + # variant of the language. It could contain a region, age (Old, Middle, ...) + # or other variant. + _("Hindi (Fiji)"), + 2, + "n != 1", + ), ( "hil", # Translators: Language name for ISO code "hil". The parenthesis clarifies @@ -4156,6 +4174,15 @@ 2, "n != 1", ), + ( + "miq", + # Translators: Language name for ISO code "miq". The parenthesis clarifies + # variant of the language. It could contain a region, age (Old, Middle, ...) + # or other variant. + _("Mískito"), + 2, + "n != 1", + ), ( "mis", # Translators: Language name for ISO code "mis". The parenthesis clarifies @@ -4615,6 +4642,15 @@ 2, "n != 1", ), + ( + "nhn", + # Translators: Language name for ISO code "nhn". The parenthesis clarifies + # variant of the language. It could contain a region, age (Old, Middle, ...) + # or other variant. + _("Nahuatl (Central)"), + 2, + "n != 1", + ), ( "nia", # Translators: Language name for ISO code "nia". The parenthesis clarifies @@ -5857,6 +5893,15 @@ 3, "(n == 1) ? 0 : ((n == 2) ? 1 : 2)", ), + ( + "shs", + # Translators: Language name for ISO code "shs". The parenthesis clarifies + # variant of the language. It could contain a region, age (Old, Middle, ...) + # or other variant. + _("Shuswap"), + 2, + "n != 1", + ), ( "sn", # Translators: Language name for ISO code "sn". The parenthesis clarifies @@ -6415,6 +6460,15 @@ 1, "0", ), + ( + "the", + # Translators: Language name for ISO code "the". The parenthesis clarifies + # variant of the language. It could contain a region, age (Old, Middle, ...) + # or other variant. + _("Tharu (Chitwania)"), + 2, + "n != 1", + ), ( "tr", # Translators: Language name for ISO code "tr". The parenthesis clarifies @@ -6622,6 +6676,15 @@ 2, "n != 1", ), + ( + "unm", + # Translators: Language name for ISO code "unm". The parenthesis clarifies + # variant of the language. It could contain a region, age (Old, Middle, ...) + # or other variant. + _("Unami"), + 2, + "n != 1", + ), ( "unr", # Translators: Language name for ISO code "unr". The parenthesis clarifies @@ -6955,6 +7018,15 @@ 2, "n != 1", ), + ( + "yuw", + # Translators: Language name for ISO code "yuw". The parenthesis clarifies + # variant of the language. It could contain a region, age (Old, Middle, ...) + # or other variant. + _("Yau (Morobe Province)"), + 2, + "n != 1", + ), ( "yo", # Translators: Language name for ISO code "yo". The parenthesis clarifies diff --git a/weblate_language_data/population.py b/weblate_language_data/population.py index 9a5ba5dcd..938bc432e 100644 --- a/weblate_language_data/population.py +++ b/weblate_language_data/population.py @@ -27,6 +27,7 @@ "afh": 0, "aii": 0, "ain": 0, + "agr": 0, "ak": 13489749, "akk": 0, "ale": 0, @@ -315,6 +316,7 @@ "hi": 580318483, "hi@hinglish": 580318483, "hi_Latn": 1409130, + "hif": 390160, "hil": 9935268, "hit": 0, "hmn": 0, @@ -474,6 +476,7 @@ "mia": 0, "mic": 9310, "min": 8446860, + "miq": 0, "mis": 0, "mjw": 0, "mk": 1612846, @@ -525,6 +528,7 @@ "new": 1027039, "ng": 588768, "ngl": 2267868, + "nhn": 0, "nia": 0, "nij": 1041779, "niu": 1120, @@ -663,6 +667,7 @@ "sml": 0, "smn": 618, "sms": 618, + "shs": 0, "sn": 13891824, "snk": 1297445, "so": 18630626, @@ -725,6 +730,7 @@ "toi": 2287901, "tok": 0, "tpi": 7132802, + "the": 0, "tr": 82419542, "trv": 4719, "ts": 5313550, @@ -748,6 +754,7 @@ "uk": 24080585, "umb": 10788609, "und": 302, + "unm": 0, "unr": 1324582, "ur": 313093257, "ur_IN": 70456500, @@ -785,6 +792,7 @@ "yao": 800424, "yap": 6573, "yi": 916209, + "yuw": 0, "yo": 31761815, "yua": 875958, "yue_Hans": 73634080,