summaryrefslogtreecommitdiff
path: root/searxng_extra
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2022-01-03 12:40:06 +0100
committerMarkus Heiser <markus.heiser@darmarit.de>2022-01-05 16:09:40 +0100
commitffea5d8ef5540bc4be08b2b26e1819d5401f854d (patch)
treeb2285d42d9ed6debec82ebd25fca31358a452794 /searxng_extra
parentb630c5d7bc0bf5a4281ad402bb32adc7f6ab257f (diff)
[docs] add documentation for the scripts in searxng_extra/update
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searxng_extra')
-rwxr-xr-xsearxng_extra/update/update_ahmia_blacklist.py17
-rwxr-xr-xsearxng_extra/update/update_currencies.py6
-rwxr-xr-xsearxng_extra/update/update_engine_descriptions.py7
-rwxr-xr-xsearxng_extra/update/update_external_bangs.py13
-rwxr-xr-xsearxng_extra/update/update_firefox_version.py14
-rwxr-xr-xsearxng_extra/update/update_languages.py10
-rwxr-xr-xsearxng_extra/update/update_osm_keys_tags.py5
-rwxr-xr-xsearxng_extra/update/update_wikidata_units.py12
8 files changed, 64 insertions, 20 deletions
diff --git a/searxng_extra/update/update_ahmia_blacklist.py b/searxng_extra/update/update_ahmia_blacklist.py
index f7695dea..57fb78b3 100755
--- a/searxng_extra/update/update_ahmia_blacklist.py
+++ b/searxng_extra/update/update_ahmia_blacklist.py
@@ -1,10 +1,14 @@
#!/usr/bin/env python
# SPDX-License-Identifier: AGPL-3.0-or-later
+"""This script saves `Ahmia's blacklist`_ for onion sites.
-# This script saves Ahmia's blacklist for onion sites.
-# More info in https://ahmia.fi/blacklist/
+Output file: :origin:`searx/data/ahmia_blacklist.txt` (:origin:`CI Update data
+... <.github/workflows/data-update.yml>`).
+
+.. _Ahmia's blacklist: https://ahmia.fi/blacklist/
+
+"""
-# set path
from os.path import join
import requests
@@ -26,6 +30,7 @@ def get_ahmia_blacklist_filename():
return join(join(searx_dir, "data"), "ahmia_blacklist.txt")
-blacklist = fetch_ahmia_blacklist()
-with open(get_ahmia_blacklist_filename(), "w") as f:
- f.write('\n'.join(blacklist))
+if __name__ == '__main__':
+ blacklist = fetch_ahmia_blacklist()
+ with open(get_ahmia_blacklist_filename(), "w") as f:
+ f.write('\n'.join(blacklist))
diff --git a/searxng_extra/update/update_currencies.py b/searxng_extra/update/update_currencies.py
index 3373e245..cdff4cbc 100755
--- a/searxng_extra/update/update_currencies.py
+++ b/searxng_extra/update/update_currencies.py
@@ -1,6 +1,12 @@
#!/usr/bin/env python
# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Fetch currencies from :origin:`searx/engines/wikidata.py` engine.
+
+Output file: :origin:`searx/data/currencies.json` (:origin:`CI Update data ...
+<.github/workflows/data-update.yml>`).
+
+"""
import re
import unicodedata
import json
diff --git a/searxng_extra/update/update_engine_descriptions.py b/searxng_extra/update/update_engine_descriptions.py
index 51cfc7cc..bab1a034 100755
--- a/searxng_extra/update/update_engine_descriptions.py
+++ b/searxng_extra/update/update_engine_descriptions.py
@@ -1,6 +1,13 @@
#!/usr/bin/env python
# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Fetch website description from websites and from
+:origin:`searx/engines/wikidata.py` engine.
+
+Output file: :origin:`searx/data/engine_descriptions.json`.
+
+"""
+
import json
from urllib.parse import urlparse
from os.path import join
diff --git a/searxng_extra/update/update_external_bangs.py b/searxng_extra/update/update_external_bangs.py
index d5c6b585..be3aade0 100755
--- a/searxng_extra/update/update_external_bangs.py
+++ b/searxng_extra/update/update_external_bangs.py
@@ -1,17 +1,20 @@
#!/usr/bin/env python
# lint: pylint
# SPDX-License-Identifier: AGPL-3.0-or-later
-"""
-Update searx/data/external_bangs.json using the duckduckgo bangs.
+"""Update :origin:`searx/data/external_bangs.json` using the duckduckgo bangs
+(:origin:`CI Update data ... <.github/workflows/data-update.yml>`).
+
+https://duckduckgo.com/newbang loads:
-https://duckduckgo.com/newbang loads
* a javascript which provides the bang version ( https://duckduckgo.com/bv1.js )
* a JSON file which contains the bangs ( https://duckduckgo.com/bang.v260.js for example )
This script loads the javascript, then the bangs.
-The javascript URL may change in the future ( for example https://duckduckgo.com/bv2.js ),
-but most probably it will requires to update RE_BANG_VERSION
+The javascript URL may change in the future ( for example
+https://duckduckgo.com/bv2.js ), but most probably it will requires to update
+RE_BANG_VERSION
+
"""
# pylint: disable=C0116
diff --git a/searxng_extra/update/update_firefox_version.py b/searxng_extra/update/update_firefox_version.py
index 750e955f..163982b1 100755
--- a/searxng_extra/update/update_firefox_version.py
+++ b/searxng_extra/update/update_firefox_version.py
@@ -1,6 +1,13 @@
#!/usr/bin/env python
# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Fetch firefox useragent signatures
+
+Output file: :origin:`searx/data/useragents.json` (:origin:`CI Update data ...
+<.github/workflows/data-update.yml>`).
+
+"""
+
import json
import requests
import re
@@ -66,6 +73,7 @@ def get_useragents_filename():
return join(join(searx_dir, "data"), "useragents.json")
-useragents["versions"] = fetch_firefox_last_versions()
-with open(get_useragents_filename(), "w") as f:
- json.dump(useragents, f, indent=4, ensure_ascii=False)
+if __name__ == '__main__':
+ useragents["versions"] = fetch_firefox_last_versions()
+ with open(get_useragents_filename(), "w", encoding='utf-8') as f:
+ json.dump(useragents, f, indent=4, ensure_ascii=False)
diff --git a/searxng_extra/update/update_languages.py b/searxng_extra/update/update_languages.py
index f3734580..9a71566a 100755
--- a/searxng_extra/update/update_languages.py
+++ b/searxng_extra/update/update_languages.py
@@ -1,9 +1,13 @@
#!/usr/bin/env python
# SPDX-License-Identifier: AGPL-3.0-or-later
+"""This script generates languages.py from intersecting each engine's supported
+languages.
-# This script generates languages.py from intersecting each engine's supported languages.
-#
-# Output files: searx/data/engines_languages.json and searx/languages.py
+Output files: :origin:`searx/data/engines_languages.json` and
+:origin:`searx/languages.py` (:origin:`CI Update data ...
+<.github/workflows/data-update.yml>`).
+
+"""
import json
from pathlib import Path
diff --git a/searxng_extra/update/update_osm_keys_tags.py b/searxng_extra/update/update_osm_keys_tags.py
index 2916cbff..1d691c19 100755
--- a/searxng_extra/update/update_osm_keys_tags.py
+++ b/searxng_extra/update/update_osm_keys_tags.py
@@ -5,7 +5,10 @@
To get the i18n names, the scripts uses `Wikidata Query Service`_ instead of for
example `OSM tags API`_ (sidenote: the actual change log from
-map.atownsend.org.uk_ might be useful to normalize OSM tags)
+map.atownsend.org.uk_ might be useful to normalize OSM tags).
+
+Output file: :origin:`searx/data/osm_keys_tags` (:origin:`CI Update data ...
+<.github/workflows/data-update.yml>`).
.. _Wikidata Query Service: https://query.wikidata.org/
.. _OSM tags API: https://taginfo.openstreetmap.org/taginfo/apidoc
diff --git a/searxng_extra/update/update_wikidata_units.py b/searxng_extra/update/update_wikidata_units.py
index 43a872b1..e999b6cf 100755
--- a/searxng_extra/update/update_wikidata_units.py
+++ b/searxng_extra/update/update_wikidata_units.py
@@ -3,6 +3,13 @@
# lint: pylint
# pylint: disable=missing-module-docstring
+"""Fetch units from :origin:`searx/engines/wikidata.py` engine.
+
+Output file: :origin:`searx/data/wikidata_units.json` (:origin:`CI Update data
+... <.github/workflows/data-update.yml>`).
+
+"""
+
import json
import collections
@@ -54,5 +61,6 @@ def get_wikidata_units_filename():
return join(join(searx_dir, "data"), "wikidata_units.json")
-with open(get_wikidata_units_filename(), 'w', encoding="utf8") as f:
- json.dump(get_data(), f, indent=4, ensure_ascii=False)
+if __name__ == '__main__':
+ with open(get_wikidata_units_filename(), 'w', encoding="utf8") as f:
+ json.dump(get_data(), f, indent=4, ensure_ascii=False)