Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "react-lang-selecta",
"version": "1.3.0",
"version": "1.4.0",
"description": "Language selector component with flag emojies",
"author": "Marc Abonce Seguin",
"license": "LiLiQ-R-1.1",
Expand Down
67 changes: 54 additions & 13 deletions py-src/build_locale_regions_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,21 @@
import json
import re

from babel import Locale, languages, localedata
from babel import Locale, languages, localedata, UnknownLocaleError
from requests import get


int_locale = Locale('ia')
locale_regions = {}
unofficial_locale_regions = {}
international_locales = {}


# SIL provided data from CLDR repo that was not included in CLDR release
cldr_extra_resp = get('https://raw.githubusercontent.com/unicode-org/cldr/refs/heads/main/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/external/langtags.json')


# aux function to add a language to a given dict or add a region into an already added language
def add_locale_to_dict(lang, region, locale_dict, fallback_name=None):
if lang in locale_dict:
if region not in locale_dict[lang]['regions']:
Expand All @@ -29,6 +36,7 @@ def add_locale_to_dict(lang, region, locale_dict, fallback_name=None):
locale_dict[lang]['name'] = fallback_name


# get locale data from babel library
for locale in localedata.locale_identifiers():
locale_parts = locale.split('_')
if len(locale_parts) > 1 and re.match(r'^[A-Z]{2}$', locale_parts[-1]):
Expand All @@ -44,29 +52,62 @@ def add_locale_to_dict(lang, region, locale_dict, fallback_name=None):
add_locale_to_dict(lang, region, unofficial_locale_regions)
elif locale_parts[-1] == '001':
# add United Nations for languages like Esperanto
add_locale_to_dict(locale_parts[0], 'UN', locale_regions)
add_locale_to_dict(locale_parts[0], 'UN', international_locales)


# get more languages from extra CLDR data not found in babel
if cldr_extra_resp.ok:
cldr_extra = cldr_extra_resp.json()
for locale in cldr_extra:
if 'iso639_3' not in locale:
continue
lang = iso639_3 = locale['iso639_3']
try:
# skip ISO639-3 locales that already have an equivalent ISO639-1 code
# in any of the locale dicts that we're building
# i.e. remove redundant codes like 'spa' ('es')
iso639_1 = Locale.parse(iso639_3).language
if iso639_1 in locale_regions or \
iso639_1 in unofficial_locale_regions or \
iso639_1 in international_locales:
continue
lang = iso639_1
except UnknownLocaleError:
# extra locale is unknown by babel,
# therefore we should definitely add it to dict
# (because that's the whole point of this piece of code)
pass

country_id = locale.get('region')
name = locale.get('localname') \
or locale.get('localenames', [None])[0] \
or locale.get('names', [None])[0] \
or locale.get('name')

if not country_id or country_id.isdigit():
# locales with "international" regions like 001 or 419
add_locale_to_dict(
lang, 'UN', international_locales, fallback_name=name
)
else:
add_locale_to_dict(
lang, country_id, unofficial_locale_regions, fallback_name=name
)


# merge back languages with no official region
for lang in unofficial_locale_regions:
if lang not in locale_regions:
locale_regions[lang] = unofficial_locale_regions[lang]


# get more language codes and names from Ethnologue
with open('./data/LanguageCodes.tab') as ethnologue_file:
ethnologue_table = csv.reader(ethnologue_file, delimiter='\t')
next(ethnologue_table) # skip header row
for lang_id, country_id, _, english_name in ethnologue_table:
if lang_id not in locale_regions:
add_locale_to_dict(
lang_id, country_id, locale_regions, fallback_name=english_name
)
for lang in international_locales:
if lang not in locale_regions:
locale_regions[lang] = international_locales[lang]


# sort regions so each run returns same output
for locale in locale_regions:
locale_regions[locale]['regions'] = sorted(locale_regions[locale]['regions'])


with open('../src/data/locale_regions.json', 'w+') as f:
json.dump(locale_regions, f, sort_keys=True, indent=2)
1 change: 1 addition & 0 deletions py-src/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Babel>=2.16.0
pycodestyle==2.12.1
requests
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ exports[`LangSelecta does not change 1`] = `
<option
value="otq"
>
🇲🇽 Otomi, Querétaro
🇲🇽 Ñhöñhö
</option>
<option
value="id"
Expand Down
Loading