diff --git a/package.json b/package.json
index 1a4d342..33a6ed7 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "react-lang-selecta",
- "version": "1.3.0",
+ "version": "1.4.0",
"description": "Language selector component with flag emojies",
"author": "Marc Abonce Seguin",
"license": "LiLiQ-R-1.1",
diff --git a/py-src/build_locale_regions_json.py b/py-src/build_locale_regions_json.py
index fbd4595..e6bf233 100644
--- a/py-src/build_locale_regions_json.py
+++ b/py-src/build_locale_regions_json.py
@@ -6,14 +6,21 @@
import json
import re
-from babel import Locale, languages, localedata
+from babel import Locale, languages, localedata, UnknownLocaleError
+from requests import get
int_locale = Locale('ia')
locale_regions = {}
unofficial_locale_regions = {}
+international_locales = {}
+# SIL provided data from CLDR repo that was not included in CLDR release
+cldr_extra_resp = get('https://raw.githubusercontent.com/unicode-org/cldr/refs/heads/main/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/external/langtags.json')
+
+
+# aux function to add a language to a given dict or add a region into an already added language
def add_locale_to_dict(lang, region, locale_dict, fallback_name=None):
if lang in locale_dict:
if region not in locale_dict[lang]['regions']:
@@ -29,6 +36,7 @@ def add_locale_to_dict(lang, region, locale_dict, fallback_name=None):
locale_dict[lang]['name'] = fallback_name
+# get locale data from babel library
for locale in localedata.locale_identifiers():
locale_parts = locale.split('_')
if len(locale_parts) > 1 and re.match(r'^[A-Z]{2}$', locale_parts[-1]):
@@ -44,29 +52,62 @@ def add_locale_to_dict(lang, region, locale_dict, fallback_name=None):
add_locale_to_dict(lang, region, unofficial_locale_regions)
elif locale_parts[-1] == '001':
# add United Nations for languages like Esperanto
- add_locale_to_dict(locale_parts[0], 'UN', locale_regions)
+ add_locale_to_dict(locale_parts[0], 'UN', international_locales)
+
+
+# get more languages from extra CLDR data not found in babel
+if cldr_extra_resp.ok:
+ cldr_extra = cldr_extra_resp.json()
+ for locale in cldr_extra:
+ if 'iso639_3' not in locale:
+ continue
+ lang = iso639_3 = locale['iso639_3']
+ try:
+ # skip ISO639-3 locales that already have an equivalent ISO639-1 code
+ # in any of the locale dicts that we're building
+ # i.e. remove redundant codes like 'spa' ('es')
+ iso639_1 = Locale.parse(iso639_3).language
+ if iso639_1 in locale_regions or \
+ iso639_1 in unofficial_locale_regions or \
+ iso639_1 in international_locales:
+ continue
+ lang = iso639_1
+ except UnknownLocaleError:
+ # extra locale is unknown by babel,
+ # therefore we should definitely add it to dict
+ # (because that's the whole point of this piece of code)
+ pass
+
+ country_id = locale.get('region')
+ name = locale.get('localname') \
+ or locale.get('localenames', [None])[0] \
+ or locale.get('names', [None])[0] \
+ or locale.get('name')
+
+ if not country_id or country_id.isdigit():
+ # locales with "international" regions like 001 or 419
+ add_locale_to_dict(
+ lang, 'UN', international_locales, fallback_name=name
+ )
+ else:
+ add_locale_to_dict(
+ lang, country_id, unofficial_locale_regions, fallback_name=name
+ )
# merge back languages with no official region
for lang in unofficial_locale_regions:
if lang not in locale_regions:
locale_regions[lang] = unofficial_locale_regions[lang]
-
-
-# get more language codes and names from Ethnologue
-with open('./data/LanguageCodes.tab') as ethnologue_file:
- ethnologue_table = csv.reader(ethnologue_file, delimiter='\t')
- next(ethnologue_table) # skip header row
- for lang_id, country_id, _, english_name in ethnologue_table:
- if lang_id not in locale_regions:
- add_locale_to_dict(
- lang_id, country_id, locale_regions, fallback_name=english_name
- )
+for lang in international_locales:
+ if lang not in locale_regions:
+ locale_regions[lang] = international_locales[lang]
# sort regions so each run returns same output
for locale in locale_regions:
locale_regions[locale]['regions'] = sorted(locale_regions[locale]['regions'])
+
with open('../src/data/locale_regions.json', 'w+') as f:
json.dump(locale_regions, f, sort_keys=True, indent=2)
diff --git a/py-src/requirements.txt b/py-src/requirements.txt
index e42d9da..419b8b2 100644
--- a/py-src/requirements.txt
+++ b/py-src/requirements.txt
@@ -1,2 +1,3 @@
Babel>=2.16.0
pycodestyle==2.12.1
+requests
diff --git a/src/__tests__/__snapshots__/index.integration.test.tsx.snap b/src/__tests__/__snapshots__/index.integration.test.tsx.snap
index cdc8c55..07acbb2 100644
--- a/src/__tests__/__snapshots__/index.integration.test.tsx.snap
+++ b/src/__tests__/__snapshots__/index.integration.test.tsx.snap
@@ -16,7 +16,7 @@ exports[`LangSelecta does not change 1`] = `