summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2023-09-30 18:41:13 +0200
committerMarkus Heiser <markus.heiser@darmarIT.de>2023-10-05 10:55:08 +0200
commitfd1422a67017443a0dc0773562ec98525b468bde (patch)
treeb8e5b2e952fe748e07379178ba39aab809f1d599
parent3e2ae756f0675aa5bbc65ead7fd02871e5dd66d2 (diff)
[mod] engine - simplify region & lang handling, make filters configurable
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
-rw-r--r--docs/dev/engines/online/radio_browser.rst13
-rw-r--r--searx/data/engine_traits.json337
-rw-r--r--searx/engines/radio_browser.py141
3 files changed, 453 insertions, 38 deletions
diff --git a/docs/dev/engines/online/radio_browser.rst b/docs/dev/engines/online/radio_browser.rst
new file mode 100644
index 00000000..a150e59c
--- /dev/null
+++ b/docs/dev/engines/online/radio_browser.rst
@@ -0,0 +1,13 @@
+.. _RadioBrowser engine:
+
+============
+RadioBrowser
+============
+
+.. contents::
+ :depth: 2
+ :local:
+ :backlinks: entry
+
+.. automodule:: searx.engines.radio_browser
+ :members:
diff --git a/searx/data/engine_traits.json b/searx/data/engine_traits.json
index e13538aa..aee199b3 100644
--- a/searx/data/engine_traits.json
+++ b/searx/data/engine_traits.json
@@ -4932,6 +4932,343 @@
"zh-HK": "zh_HK"
}
},
+ "radio browser": {
+ "all_locale": null,
+ "custom": {
+ "countrycodes": [
+ "AD",
+ "AE",
+ "AF",
+ "AG",
+ "AL",
+ "AM",
+ "AO",
+ "AQ",
+ "AR",
+ "AS",
+ "AT",
+ "AU",
+ "AW",
+ "AZ",
+ "BA",
+ "BB",
+ "BD",
+ "BE",
+ "BF",
+ "BG",
+ "BH",
+ "BI",
+ "BJ",
+ "BM",
+ "BN",
+ "BO",
+ "BQ",
+ "BR",
+ "BS",
+ "BT",
+ "BW",
+ "BY",
+ "BZ",
+ "CA",
+ "CC",
+ "CD",
+ "CF",
+ "CH",
+ "CI",
+ "CK",
+ "CL",
+ "CM",
+ "CN",
+ "CO",
+ "CR",
+ "CU",
+ "CV",
+ "CW",
+ "CY",
+ "CZ",
+ "DE",
+ "DK",
+ "DM",
+ "DO",
+ "DZ",
+ "EC",
+ "EE",
+ "EG",
+ "ES",
+ "ET",
+ "FI",
+ "FJ",
+ "FK",
+ "FO",
+ "FR",
+ "GA",
+ "GB",
+ "GD",
+ "GE",
+ "GF",
+ "GG",
+ "GH",
+ "GI",
+ "GL",
+ "GN",
+ "GP",
+ "GQ",
+ "GR",
+ "GS",
+ "GT",
+ "GU",
+ "GW",
+ "GY",
+ "HK",
+ "HN",
+ "HR",
+ "HT",
+ "HU",
+ "ID",
+ "IE",
+ "IL",
+ "IM",
+ "IN",
+ "IO",
+ "IQ",
+ "IR",
+ "IS",
+ "IT",
+ "JM",
+ "JO",
+ "JP",
+ "KE",
+ "KG",
+ "KH",
+ "KM",
+ "KN",
+ "KP",
+ "KR",
+ "KW",
+ "KY",
+ "KZ",
+ "LB",
+ "LC",
+ "LK",
+ "LT",
+ "LU",
+ "LV",
+ "LY",
+ "MA",
+ "MC",
+ "MD",
+ "ME",
+ "MG",
+ "MK",
+ "ML",
+ "MM",
+ "MN",
+ "MO",
+ "MQ",
+ "MT",
+ "MU",
+ "MW",
+ "MX",
+ "MY",
+ "MZ",
+ "NA",
+ "NC",
+ "NE",
+ "NF",
+ "NG",
+ "NI",
+ "NL",
+ "NO",
+ "NP",
+ "NZ",
+ "OM",
+ "PA",
+ "PE",
+ "PF",
+ "PH",
+ "PK",
+ "PL",
+ "PM",
+ "PR",
+ "PS",
+ "PT",
+ "PY",
+ "QA",
+ "RE",
+ "RO",
+ "RS",
+ "RU",
+ "RW",
+ "SA",
+ "SC",
+ "SD",
+ "SE",
+ "SG",
+ "SH",
+ "SI",
+ "SJ",
+ "SK",
+ "SL",
+ "SM",
+ "SN",
+ "SO",
+ "SR",
+ "ST",
+ "SV",
+ "SY",
+ "SZ",
+ "TC",
+ "TD",
+ "TF",
+ "TG",
+ "TH",
+ "TJ",
+ "TM",
+ "TN",
+ "TO",
+ "TR",
+ "TT",
+ "TW",
+ "TZ",
+ "UA",
+ "UG",
+ "UM",
+ "US",
+ "UY",
+ "UZ",
+ "VA",
+ "VC",
+ "VE",
+ "VG",
+ "VI",
+ "VN",
+ "VU",
+ "WF",
+ "XK",
+ "YE",
+ "YT",
+ "ZA",
+ "ZM",
+ "ZW"
+ ]
+ },
+ "data_type": "traits_v1",
+ "languages": {
+ "af": "afrikaans",
+ "ak": "akan",
+ "am": "amharic",
+ "ar": "arabic",
+ "ast": "asturian",
+ "az": "azerbaijani",
+ "be": "belarusian",
+ "bg": "bulgarian",
+ "bm": "bambara",
+ "bn": "bengali",
+ "bo": "tibetan",
+ "br": "breton",
+ "bs": "bosnian",
+ "ca": "catalan",
+ "cs": "czech",
+ "cv": "chuvash",
+ "cy": "welsh",
+ "da": "danish",
+ "de": "german",
+ "dsb": "lower sorbian",
+ "dz": "dzongkha",
+ "el": "greek",
+ "en": "english",
+ "eo": "esperanto",
+ "es": "spanish",
+ "et": "estonian",
+ "eu": "basque",
+ "fa": "persian",
+ "fi": "finnish",
+ "fil": "tagalog",
+ "fo": "faroese",
+ "fr": "french",
+ "ga": "irish",
+ "gd": "gaelic",
+ "gl": "galician",
+ "gsw": "swiss german",
+ "gu": "gujarati",
+ "gv": "manx",
+ "ha": "hausa",
+ "he": "hebrew",
+ "hi": "hindi",
+ "hr": "croatian",
+ "hsb": "upper sorbian",
+ "hu": "hungarian",
+ "hy": "armenian",
+ "id": "indonesian",
+ "is": "icelandic",
+ "it": "italian",
+ "ja": "japanese",
+ "jv": "javanese",
+ "ka": "georgian",
+ "kk": "kazakh",
+ "kl": "kalaallisut",
+ "km": "khmer",
+ "kn": "kannada",
+ "ko": "korean",
+ "ku": "kurdish",
+ "lb": "luxembourgish",
+ "ln": "lingala",
+ "lt": "lithuanian",
+ "lv": "latvian",
+ "mg": "malagasy",
+ "mk": "macedonian",
+ "ml": "malayalam",
+ "mn": "mongolian",
+ "mr": "marathi",
+ "ms": "malay",
+ "mt": "maltese",
+ "my": "burmese",
+ "nds": "low german",
+ "ne": "nepali",
+ "nl": "dutch",
+ "no": "norwegian",
+ "oc": "occitan",
+ "om": "oromo",
+ "os": "ossetian",
+ "pa": "panjabi",
+ "pl": "polish",
+ "pt": "portuguese",
+ "qu": "quechua",
+ "rm": "romansh",
+ "ro": "romanian",
+ "ru": "russian",
+ "rw": "kinyarwanda",
+ "sa": "sanskrit",
+ "sc": "sardinian",
+ "sd": "sindhi",
+ "si": "sinhala",
+ "sk": "slovak",
+ "sl": "slovenian",
+ "so": "somali",
+ "sq": "albanian",
+ "sr": "serbian",
+ "sv": "swedish",
+ "sw": "swahili",
+ "ta": "tamil",
+ "te": "telugu",
+ "tg": "tajik",
+ "th": "thai",
+ "tk": "turkmen",
+ "tr": "turkish",
+ "tt": "tatar",
+ "uk": "ukrainian",
+ "ur": "urdu",
+ "uz": "uzbek",
+ "vi": "vietnamese",
+ "wo": "wolof",
+ "xh": "xhosa",
+ "yi": "yiddish",
+ "yue": "cantonese",
+ "zh": "chinese",
+ "zh_Hans": "mandarin"
+ },
+ "regions": {}
+ },
"sepiasearch": {
"all_locale": null,
"custom": {},
diff --git a/searx/engines/radio_browser.py b/searx/engines/radio_browser.py
index 758ba1b3..6b60b398 100644
--- a/searx/engines/radio_browser.py
+++ b/searx/engines/radio_browser.py
@@ -1,30 +1,57 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
-"""Radio browser (music)
+"""Search radio stations from RadioBrowser by `Advanced station search API`_.
+
+.. _Advanced station search API:
+ https://de1.api.radio-browser.info/#Advanced_station_search
+
"""
from urllib.parse import urlencode
import babel
+from flask_babel import gettext
from searx.network import get
from searx.enginelib.traits import EngineTraits
-from searx.locales import language_tag, region_tag
+from searx.locales import language_tag
traits: EngineTraits
about = {
"website": 'https://www.radio-browser.info/',
+ "wikidata_id": 'Q111664849',
"official_api_documentation": 'https://de1.api.radio-browser.info/',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
paging = True
-categories = ['music']
+categories = ['music', 'radio']
base_url = "https://de1.api.radio-browser.info" # see https://api.radio-browser.info/ for all nodes
number_of_results = 10
+station_filters = [] # ['countrycode', 'language']
+"""A list of filters to be applied to the search of radio stations. By default
+none filters are applied. Valid filters are:
+
+``language``
+ Filter stations by selected language. For instance the ``de`` from ``:de-AU``
+ will be translated to `german` and used in the argument ``language=``.
+
+``countrycode``
+ Filter stations by selected country. The 2-digit countrycode of the station
+ comes from the region the user selected. For instance ``:de-AU`` will filter
+ out all stations not in ``AU``.
+
+.. note::
+
+ RadioBrowser has registered a lot of languages and countrycodes unknown to
+ :py:obj:`babel` and note that when searching for radio stations, users are
+ more likely to search by name than by region or language.
+
+"""
+
def request(query, params):
args = {
@@ -35,13 +62,17 @@ def request(query, params):
'hidebroken': 'true',
'reverse': 'true',
}
- lang = traits.get_language(params['searxng_locale'], None)
- if lang is not None:
- args['language'] = lang
- region = traits.get_region(params['searxng_locale'], None)
- if region is not None:
- args['countrycode'] = region.split('-')[1]
+ if 'language' in station_filters:
+ lang = traits.get_language(params['searxng_locale']) # type: ignore
+ if lang:
+ args['language'] = lang
+
+ if 'countrycode' in station_filters:
+ if len(params['searxng_locale'].split('-')) > 1:
+ countrycode = params['searxng_locale'].split('-')[-1].upper()
+ if countrycode in traits.custom['countrycodes']: # type: ignore
+ args['countrycode'] = countrycode
params['url'] = f"{base_url}/json/stations/search?{urlencode(args)}"
return params
@@ -50,22 +81,43 @@ def request(query, params):
def response(resp):
results = []
- for result in resp.json():
+ json_resp = resp.json()
+
+ for result in json_resp:
url = result['homepage']
if not url:
url = result['url_resolved']
+ content = []
+ tags = ', '.join(result.get('tags', '').split(','))
+ if tags:
+ content.append(tags)
+ for x in ['state', 'country']:
+ v = result.get(x)
+ if v:
+ v = str(v).strip()
+ content.append(v)
+
+ metadata = []
+ codec = result.get('codec')
+ if codec and codec.lower() != 'unknown':
+ metadata.append(f'{codec} ' + gettext('radio'))
+ for x, y in [
+ (gettext('bitrate'), 'bitrate'),
+ (gettext('votes'), 'votes'),
+ (gettext('clicks'), 'clickcount'),
+ ]:
+ v = result.get(y)
+ if v:
+ v = str(v).strip()
+ metadata.append(f"{x} {v}")
results.append(
{
- 'template': 'videos.html',
'url': url,
'title': result['name'],
- 'thumbnail': result.get('favicon', '').replace("http://", "https://"),
- 'content': result['country']
- + " / "
- + result["tags"]
- + f" / {result['votes']} votes"
- + f" / {result['clickcount']} clicks",
+ 'img_src': result.get('favicon', '').replace("http://", "https://"),
+ 'content': ' | '.join(content),
+ 'metadata': ' | '.join(metadata),
'iframe_src': result['url_resolved'].replace("http://", "https://"),
}
)
@@ -74,38 +126,51 @@ def response(resp):
def fetch_traits(engine_traits: EngineTraits):
- language_list = get(f'{base_url}/json/languages').json()
+ """Fetch languages and countrycodes from RadioBrowser
+
+ - ``traits.languages``: `list of languages API`_
+ - ``traits.custom['countrycodes']``: `list of countries API`_
+
+ .. _list of countries API: https://de1.api.radio-browser.info/#List_of_countries
+ .. _list of languages API: https://de1.api.radio-browser.info/#List_of_languages
+ """
+ # pylint: disable=import-outside-toplevel
+
+ from babel.core import get_global
+
+ babel_reg_list = get_global("territory_languages").keys()
- country_list = get(f'{base_url}/json/countrycodes').json()
+ language_list = get(f'{base_url}/json/languages').json() # type: ignore
+ country_list = get(f'{base_url}/json/countries').json() # type: ignore
for lang in language_list:
- # the language doesn't have any iso code, and hence can't be parsed
- if not lang['iso_639']:
+ babel_lang = lang.get('iso_639')
+ if not babel_lang:
+ # the language doesn't have any iso code, and hence can't be parsed
+ # print(f"ERROR: lang - no iso code in {lang}")
continue
-
try:
- lang_tag = lang['iso_639']
- sxng_tag = language_tag(babel.Locale.parse(lang_tag, sep="-"))
+ sxng_tag = language_tag(babel.Locale.parse(babel_lang, sep="-"))
except babel.UnknownLocaleError:
- print("ERROR: %s is unknown by babel" % lang_tag)
+ # print(f"ERROR: language tag {babel_lang} is unknown by babel")
continue
+ eng_tag = lang['name']
conflict = engine_traits.languages.get(sxng_tag)
if conflict:
+ if conflict != eng_tag:
+ print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
continue
+ engine_traits.languages[sxng_tag] = eng_tag
- engine_traits.languages[sxng_tag] = lang['name']
-
- for region in country_list:
- try:
- reg_tag = f"{lang['iso_639']}-{region['name']}"
- sxng_tag = region_tag(babel.Locale.parse(reg_tag, sep="-"))
- except babel.UnknownLocaleError:
- continue
-
- conflict = engine_traits.regions.get(sxng_tag)
- if conflict:
- continue
+ countrycodes = set()
+ for region in country_list:
+ if region['iso_3166_1'] not in babel_reg_list:
+ print(f"ERROR: region tag {region['iso_3166_1']} is unknown by babel")
+ continue
+ countrycodes.add(region['iso_3166_1'])
- engine_traits.regions[sxng_tag] = reg_tag
+ countrycodes = list(countrycodes)
+ countrycodes.sort()
+ engine_traits.custom['countrycodes'] = countrycodes