summaryrefslogtreecommitdiff
path: root/searx/utils.py
diff options
context:
space:
mode:
authorMarc Abonce Seguin <marc-abonce@mailbox.org>2018-02-28 22:30:48 -0600
committerMarc Abonce Seguin <marc-abonce@mailbox.org>2018-03-27 00:08:03 -0600
commit772c048d01c7585fd60afca1ce30a1914e6e5b4a (patch)
tree96a5662897df2bcf0ab53456e0a67ace998f2169 /searx/utils.py
parentd1eae9359f8c5920632a730744ea2208070f06da (diff)
refactor engine's search language handling
Add match_language function in utils to match any user given language code with a list of engine's supported languages. Also add language_aliases dict on each engine to translate standard language codes into the custom codes used by the engine.
Diffstat (limited to 'searx/utils.py')
-rw-r--r--searx/utils.py61
1 files changed, 61 insertions, 0 deletions
diff --git a/searx/utils.py b/searx/utils.py
index 8f095f3b..77c39290 100644
--- a/searx/utils.py
+++ b/searx/utils.py
@@ -4,6 +4,7 @@ import hmac
import os
import re
+from babel.core import get_global
from babel.dates import format_date
from codecs import getincrementalencoder
from imp import load_source
@@ -12,6 +13,7 @@ from os.path import splitext, join
from random import choice
import sys
+from searx import settings
from searx.version import VERSION_STRING
from searx.languages import language_codes
from searx import settings
@@ -322,6 +324,65 @@ def is_valid_lang(lang):
return False
+# auxiliary function to match lang_code in lang_list
+def _match_language(lang_code, lang_list=[], custom_aliases={}):
+ # replace language code with a custom alias if necessary
+ if lang_code in custom_aliases:
+ lang_code = custom_aliases[lang_code]
+
+ if lang_code in lang_list:
+ return lang_code
+
+ # try to get the most likely country for this language
+ subtags = get_global('likely_subtags').get(lang_code)
+ if subtags:
+ subtag_parts = subtags.split('_')
+ new_code = subtag_parts[0] + '-' + subtag_parts[-1]
+ if new_code in custom_aliases:
+ new_code = custom_aliases[new_code]
+ if new_code in lang_list:
+ return new_code
+
+ # try to get the any supported country for this language
+ for lc in lang_list:
+ if lang_code == lc.split('-')[0]:
+ return lc
+
+ return None
+
+
+# get the language code from lang_list that best matches locale_code
+def match_language(locale_code, lang_list=[], custom_aliases={}, fallback='en-US'):
+ # try to get language from given locale_code
+ language = _match_language(locale_code, lang_list, custom_aliases)
+ if language:
+ return language
+
+ locale_parts = locale_code.split('-')
+ lang_code = locale_parts[0]
+
+ # try to get language using an equivalent country code
+ if len(locale_parts) > 1:
+ country_alias = get_global('territory_aliases').get(locale_parts[-1])
+ if country_alias:
+ language = _match_language(lang_code + '-' + country_alias[0], lang_list, custom_aliases)
+ if language:
+ return language
+
+ # try to get language using an equivalent language code
+ alias = get_global('language_aliases').get(lang_code)
+ if alias:
+ language = _match_language(alias, lang_list, custom_aliases)
+ if language:
+ return language
+
+ if lang_code != locale_code:
+ # try to get language from given language without giving the country
+ language = _match_language(lang_code, lang_list, custom_aliases)
+
+ return language or fallback
+
+
def load_module(filename, module_dir):
modname = splitext(filename)[0]
if modname in sys.modules: