diff --git a/package.json b/package.json index 1a4d342..33a6ed7 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "react-lang-selecta", - "version": "1.3.0", + "version": "1.4.0", "description": "Language selector component with flag emojies", "author": "Marc Abonce Seguin", "license": "LiLiQ-R-1.1", diff --git a/py-src/build_locale_regions_json.py b/py-src/build_locale_regions_json.py index fbd4595..e6bf233 100644 --- a/py-src/build_locale_regions_json.py +++ b/py-src/build_locale_regions_json.py @@ -6,14 +6,21 @@ import json import re -from babel import Locale, languages, localedata +from babel import Locale, languages, localedata, UnknownLocaleError +from requests import get int_locale = Locale('ia') locale_regions = {} unofficial_locale_regions = {} +international_locales = {} +# SIL provided data from CLDR repo that was not included in CLDR release +cldr_extra_resp = get('https://raw.githubusercontent.com/unicode-org/cldr/refs/heads/main/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/external/langtags.json') + + +# aux function to add a language to a given dict or add a region into an already added language def add_locale_to_dict(lang, region, locale_dict, fallback_name=None): if lang in locale_dict: if region not in locale_dict[lang]['regions']: @@ -29,6 +36,7 @@ def add_locale_to_dict(lang, region, locale_dict, fallback_name=None): locale_dict[lang]['name'] = fallback_name +# get locale data from babel library for locale in localedata.locale_identifiers(): locale_parts = locale.split('_') if len(locale_parts) > 1 and re.match(r'^[A-Z]{2}$', locale_parts[-1]): @@ -44,29 +52,62 @@ def add_locale_to_dict(lang, region, locale_dict, fallback_name=None): add_locale_to_dict(lang, region, unofficial_locale_regions) elif locale_parts[-1] == '001': # add United Nations for languages like Esperanto - add_locale_to_dict(locale_parts[0], 'UN', locale_regions) + add_locale_to_dict(locale_parts[0], 'UN', international_locales) + + +# get more languages from extra CLDR data not found in babel +if cldr_extra_resp.ok: + cldr_extra = cldr_extra_resp.json() + for locale in cldr_extra: + if 'iso639_3' not in locale: + continue + lang = iso639_3 = locale['iso639_3'] + try: + # skip ISO639-3 locales that already have an equivalent ISO639-1 code + # in any of the locale dicts that we're building + # i.e. remove redundant codes like 'spa' ('es') + iso639_1 = Locale.parse(iso639_3).language + if iso639_1 in locale_regions or \ + iso639_1 in unofficial_locale_regions or \ + iso639_1 in international_locales: + continue + lang = iso639_1 + except UnknownLocaleError: + # extra locale is unknown by babel, + # therefore we should definitely add it to dict + # (because that's the whole point of this piece of code) + pass + + country_id = locale.get('region') + name = locale.get('localname') \ + or locale.get('localenames', [None])[0] \ + or locale.get('names', [None])[0] \ + or locale.get('name') + + if not country_id or country_id.isdigit(): + # locales with "international" regions like 001 or 419 + add_locale_to_dict( + lang, 'UN', international_locales, fallback_name=name + ) + else: + add_locale_to_dict( + lang, country_id, unofficial_locale_regions, fallback_name=name + ) # merge back languages with no official region for lang in unofficial_locale_regions: if lang not in locale_regions: locale_regions[lang] = unofficial_locale_regions[lang] - - -# get more language codes and names from Ethnologue -with open('./data/LanguageCodes.tab') as ethnologue_file: - ethnologue_table = csv.reader(ethnologue_file, delimiter='\t') - next(ethnologue_table) # skip header row - for lang_id, country_id, _, english_name in ethnologue_table: - if lang_id not in locale_regions: - add_locale_to_dict( - lang_id, country_id, locale_regions, fallback_name=english_name - ) +for lang in international_locales: + if lang not in locale_regions: + locale_regions[lang] = international_locales[lang] # sort regions so each run returns same output for locale in locale_regions: locale_regions[locale]['regions'] = sorted(locale_regions[locale]['regions']) + with open('../src/data/locale_regions.json', 'w+') as f: json.dump(locale_regions, f, sort_keys=True, indent=2) diff --git a/py-src/requirements.txt b/py-src/requirements.txt index e42d9da..419b8b2 100644 --- a/py-src/requirements.txt +++ b/py-src/requirements.txt @@ -1,2 +1,3 @@ Babel>=2.16.0 pycodestyle==2.12.1 +requests diff --git a/src/__tests__/__snapshots__/index.integration.test.tsx.snap b/src/__tests__/__snapshots__/index.integration.test.tsx.snap index cdc8c55..07acbb2 100644 --- a/src/__tests__/__snapshots__/index.integration.test.tsx.snap +++ b/src/__tests__/__snapshots__/index.integration.test.tsx.snap @@ -16,7 +16,7 @@ exports[`LangSelecta does not change 1`] = `