summaryrefslogtreecommitdiff
path: root/searxng_extra
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2022-01-03 12:58:48 +0100
committerMarkus Heiser <markus.heiser@darmarit.de>2022-01-05 16:09:40 +0100
commit295876abaa93b8dea44dc0beaf8eb2596da69aed (patch)
tree519246c2905622455309ae7ba6ea634c5a356016 /searxng_extra
parentffea5d8ef5540bc4be08b2b26e1819d5401f854d (diff)
[pylint] add scripts from searxng_extra/update to pylint
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searxng_extra')
-rwxr-xr-xsearxng_extra/update/update_ahmia_blacklist.py7
-rwxr-xr-xsearxng_extra/update/update_currencies.py7
-rwxr-xr-xsearxng_extra/update/update_engine_descriptions.py7
-rwxr-xr-xsearxng_extra/update/update_firefox_version.py41
-rwxr-xr-xsearxng_extra/update/update_languages.py29
5 files changed, 51 insertions, 40 deletions
diff --git a/searxng_extra/update/update_ahmia_blacklist.py b/searxng_extra/update/update_ahmia_blacklist.py
index 57fb78b3..26c48519 100755
--- a/searxng_extra/update/update_ahmia_blacklist.py
+++ b/searxng_extra/update/update_ahmia_blacklist.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python
+# lint: pylint
# SPDX-License-Identifier: AGPL-3.0-or-later
"""This script saves `Ahmia's blacklist`_ for onion sites.
@@ -21,9 +22,7 @@ def fetch_ahmia_blacklist():
resp = requests.get(URL, timeout=3.0)
if resp.status_code != 200:
raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code)
- else:
- blacklist = resp.text.split()
- return blacklist
+ return resp.text.split()
def get_ahmia_blacklist_filename():
@@ -32,5 +31,5 @@ def get_ahmia_blacklist_filename():
if __name__ == '__main__':
blacklist = fetch_ahmia_blacklist()
- with open(get_ahmia_blacklist_filename(), "w") as f:
+ with open(get_ahmia_blacklist_filename(), "w", encoding='utf-8') as f:
f.write('\n'.join(blacklist))
diff --git a/searxng_extra/update/update_currencies.py b/searxng_extra/update/update_currencies.py
index cdff4cbc..e51692e7 100755
--- a/searxng_extra/update/update_currencies.py
+++ b/searxng_extra/update/update_currencies.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python
+# lint: pylint
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Fetch currencies from :origin:`searx/engines/wikidata.py` engine.
@@ -7,13 +8,15 @@ Output file: :origin:`searx/data/currencies.json` (:origin:`CI Update data ...
<.github/workflows/data-update.yml>`).
"""
+
+# pylint: disable=invalid-name
+
import re
import unicodedata
import json
# set path
-from sys import path
-from os.path import realpath, dirname, join
+from os.path import join
from searx import searx_dir
from searx.locales import LOCALE_NAMES
diff --git a/searxng_extra/update/update_engine_descriptions.py b/searxng_extra/update/update_engine_descriptions.py
index bab1a034..5b73fd39 100755
--- a/searxng_extra/update/update_engine_descriptions.py
+++ b/searxng_extra/update/update_engine_descriptions.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python
+# lint: pylint
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Fetch website description from websites and from
@@ -8,6 +9,8 @@ Output file: :origin:`searx/data/engine_descriptions.json`.
"""
+# pylint: disable=invalid-name, global-statement
+
import json
from urllib.parse import urlparse
from os.path import join
@@ -109,7 +112,7 @@ def get_wikipedia_summary(lang, pageid):
response.raise_for_status()
api_result = json.loads(response.text)
return api_result.get('extract')
- except:
+ except Exception: # pylint: disable=broad-except
return None
@@ -141,7 +144,7 @@ def get_website_description(url, lang1, lang2=None):
try:
response = searx.network.get(url, headers=headers, timeout=10)
response.raise_for_status()
- except Exception:
+ except Exception: # pylint: disable=broad-except
return (None, None)
try:
diff --git a/searxng_extra/update/update_firefox_version.py b/searxng_extra/update/update_firefox_version.py
index 163982b1..a447f9fd 100755
--- a/searxng_extra/update/update_firefox_version.py
+++ b/searxng_extra/update/update_firefox_version.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python
+# lint: pylint
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Fetch firefox useragent signatures
@@ -9,20 +10,21 @@ Output file: :origin:`searx/data/useragents.json` (:origin:`CI Update data ...
"""
import json
-import requests
import re
-from os.path import dirname, join
+from os.path import join
from urllib.parse import urlparse, urljoin
-from distutils.version import LooseVersion, StrictVersion
+from distutils.version import LooseVersion
+
+import requests
from lxml import html
from searx import searx_dir
URL = 'https://ftp.mozilla.org/pub/firefox/releases/'
RELEASE_PATH = '/pub/firefox/releases/'
-NORMAL_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?$')
-# BETA_REGEX = re.compile('.*[0-9]b([0-9\-a-z]+)$')
-# ESR_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?esr$')
+NORMAL_REGEX = re.compile(r'^[0-9]+\.[0-9](\.[0-9])?$')
+# BETA_REGEX = re.compile(r'.*[0-9]b([0-9\-a-z]+)$')
+# ESR_REGEX = re.compile(r'^[0-9]+\.[0-9](\.[0-9])?esr$')
#
useragents = {
@@ -39,20 +41,19 @@ def fetch_firefox_versions():
resp = requests.get(URL, timeout=2.0)
if resp.status_code != 200:
raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code)
- else:
- dom = html.fromstring(resp.text)
- versions = []
-
- for link in dom.xpath('//a/@href'):
- url = urlparse(urljoin(URL, link))
- path = url.path
- if path.startswith(RELEASE_PATH):
- version = path[len(RELEASE_PATH) : -1]
- if NORMAL_REGEX.match(version):
- versions.append(LooseVersion(version))
-
- list.sort(versions, reverse=True)
- return versions
+ dom = html.fromstring(resp.text)
+ versions = []
+
+ for link in dom.xpath('//a/@href'):
+ url = urlparse(urljoin(URL, link))
+ path = url.path
+ if path.startswith(RELEASE_PATH):
+ version = path[len(RELEASE_PATH) : -1]
+ if NORMAL_REGEX.match(version):
+ versions.append(LooseVersion(version))
+
+ list.sort(versions, reverse=True)
+ return versions
def fetch_firefox_last_versions():
diff --git a/searxng_extra/update/update_languages.py b/searxng_extra/update/update_languages.py
index 9a71566a..754180c4 100755
--- a/searxng_extra/update/update_languages.py
+++ b/searxng_extra/update/update_languages.py
@@ -1,4 +1,6 @@
#!/usr/bin/env python
+# lint: pylint
+
# SPDX-License-Identifier: AGPL-3.0-or-later
"""This script generates languages.py from intersecting each engine's supported
languages.
@@ -9,6 +11,8 @@ Output files: :origin:`searx/data/engines_languages.json` and
"""
+# pylint: disable=invalid-name
+
import json
from pathlib import Path
from pprint import pformat
@@ -28,7 +32,7 @@ languages_file = Path(searx_dir) / 'languages.py'
def fetch_supported_languages():
set_timeout_for_thread(10.0)
- engines_languages = dict()
+ engines_languages = {}
names = list(engines)
names.sort()
@@ -36,7 +40,7 @@ def fetch_supported_languages():
if hasattr(engines[engine_name], 'fetch_supported_languages'):
engines_languages[engine_name] = engines[engine_name].fetch_supported_languages()
print("fetched %s languages from engine %s" % (len(engines_languages[engine_name]), engine_name))
- if type(engines_languages[engine_name]) == list:
+ if type(engines_languages[engine_name]) == list: # pylint: disable=unidiomatic-typecheck
engines_languages[engine_name] = sorted(engines_languages[engine_name])
print("fetched languages from %s engines" % len(engines_languages))
@@ -59,7 +63,7 @@ def get_locale(lang_code):
# Join all language lists.
def join_language_lists(engines_languages):
- language_list = dict()
+ language_list = {}
for engine_name in engines_languages:
for lang_code in engines_languages[engine_name]:
@@ -95,7 +99,7 @@ def join_language_lists(engines_languages):
'name': language_name,
'english_name': english_name,
'counter': set(),
- 'countries': dict(),
+ 'countries': {},
}
# add language with country if not in list
@@ -123,6 +127,7 @@ def join_language_lists(engines_languages):
def filter_language_list(all_languages):
min_engines_per_lang = 13
min_engines_per_country = 7
+ # pylint: disable=consider-using-dict-items, consider-iterating-dictionary
main_engines = [
engine_name
for engine_name in engines.keys()
@@ -142,7 +147,7 @@ def filter_language_list(all_languages):
}
def _copy_lang_data(lang, country_name=None):
- new_dict = dict()
+ new_dict = {}
new_dict['name'] = all_languages[lang]['name']
new_dict['english_name'] = all_languages[lang]['english_name']
if country_name:
@@ -150,10 +155,10 @@ def filter_language_list(all_languages):
return new_dict
# for each language get country codes supported by most engines or at least one country code
- filtered_languages_with_countries = dict()
+ filtered_languages_with_countries = {}
for lang, lang_data in filtered_languages.items():
countries = lang_data['countries']
- filtered_countries = dict()
+ filtered_countries = {}
# get language's country codes with enough supported engines
for lang_country, country_data in countries.items():
@@ -215,7 +220,7 @@ def write_languages_file(languages):
language_codes = tuple(language_codes)
- with open(languages_file, 'w') as new_file:
+ with open(languages_file, 'w', encoding='utf-8') as new_file:
file_content = "{file_headers} {language_codes},\n)\n".format(
# fmt: off
file_headers = '\n'.join(file_headers),
@@ -228,7 +233,7 @@ def write_languages_file(languages):
if __name__ == "__main__":
load_engines(settings['engines'])
- engines_languages = fetch_supported_languages()
- all_languages = join_language_lists(engines_languages)
- filtered_languages = filter_language_list(all_languages)
- write_languages_file(filtered_languages)
+ _engines_languages = fetch_supported_languages()
+ _all_languages = join_language_lists(_engines_languages)
+ _filtered_languages = filter_language_list(_all_languages)
+ write_languages_file(_filtered_languages)