summaryrefslogtreecommitdiff
path: root/searx
diff options
context:
space:
mode:
authorMarkus Heiser <markus.heiser@darmarit.de>2023-11-13 19:12:50 +0100
committerMarkus Heiser <markus.heiser@darmarIT.de>2023-12-03 13:47:17 +0100
commit2274d55d5a4dea76b645e3495673545fea0fe529 (patch)
tree9bf3508dea187862996b6a44552182bc8c35e76e /searx
parent1e0813fd3a217fd5f012e17f8fc1386a0a16bf5b (diff)
[mod] add option max_page
Related: https://github.com/searxng/searxng/issues/2982 Closes: https://github.com/searxng/searxng/issues/2972 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Diffstat (limited to 'searx')
-rw-r--r--searx/engines/__init__.py1
-rw-r--r--searx/engines/google.py1
-rw-r--r--searx/engines/google_images.py1
-rw-r--r--searx/engines/google_scholar.py1
-rw-r--r--searx/engines/google_videos.py1
-rw-r--r--searx/search/processors/abstract.py5
-rw-r--r--searx/settings.yml1
-rw-r--r--searx/settings_defaults.py1
8 files changed, 12 insertions, 0 deletions
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index da2b2037..0bea37ca 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -45,6 +45,7 @@ ENGINE_DEFAULT_ARGS = {
"using_tor_proxy": False,
"send_accept_language_header": False,
"tokens": [],
+ "max_page": 0,
}
# set automatically when an engine does not have any tab category
DEFAULT_CATEGORY = 'other'
diff --git a/searx/engines/google.py b/searx/engines/google.py
index 51c6acbf..90b58e27 100644
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -48,6 +48,7 @@ about = {
# engine dependent config
categories = ['general', 'web']
paging = True
+max_page = 50
time_range_support = True
safesearch = True
diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py
index 1f9759c9..d2d33d40 100644
--- a/searx/engines/google_images.py
+++ b/searx/engines/google_images.py
@@ -47,6 +47,7 @@ about = {
# engine dependent config
categories = ['images', 'web']
paging = True
+max_page = 50
time_range_support = True
safesearch = True
send_accept_language_header = True
diff --git a/searx/engines/google_scholar.py b/searx/engines/google_scholar.py
index 6f33d1e1..8d11c956 100644
--- a/searx/engines/google_scholar.py
+++ b/searx/engines/google_scholar.py
@@ -51,6 +51,7 @@ about = {
# engine dependent config
categories = ['science', 'scientific publications']
paging = True
+max_page = 50
language_support = True
time_range_support = True
safesearch = False
diff --git a/searx/engines/google_videos.py b/searx/engines/google_videos.py
index f922e1f7..0b1a5111 100644
--- a/searx/engines/google_videos.py
+++ b/searx/engines/google_videos.py
@@ -57,6 +57,7 @@ about = {
categories = ['videos', 'web']
paging = True
+max_page = 50
language_support = True
time_range_support = True
safesearch = True
diff --git a/searx/search/processors/abstract.py b/searx/search/processors/abstract.py
index 0cabec97..baa031a0 100644
--- a/searx/search/processors/abstract.py
+++ b/searx/search/processors/abstract.py
@@ -150,6 +150,11 @@ class EngineProcessor(ABC):
if search_query.pageno > 1 and not self.engine.paging:
return None
+ # if max page is reached, skip
+ max_page = self.engine.max_page or settings['search']['max_page']
+ if max_page and max_page < search_query.pageno:
+ return None
+
# if time_range is not supported, skip
if search_query.time_range and not self.engine.time_range_support:
return None
diff --git a/searx/settings.yml b/searx/settings.yml
index 926cddb5..727b9534 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -32,6 +32,7 @@ search:
# Default search language - leave blank to detect from browser information or
# use codes from 'languages.py'
default_lang: "auto"
+ # max_page: 0 # if engine supports paging, 0 means unlimited numbers of pages
# Available languages
# languages:
# - all
diff --git a/searx/settings_defaults.py b/searx/settings_defaults.py
index a0d0daa0..6a56fdd7 100644
--- a/searx/settings_defaults.py
+++ b/searx/settings_defaults.py
@@ -169,6 +169,7 @@ SCHEMA = {
'recaptcha_SearxEngineCaptcha': SettingsValue(numbers.Real, 604800),
},
'formats': SettingsValue(list, OUTPUT_FORMATS),
+ 'max_page': SettingsValue(int, 0),
},
'server': {
'port': SettingsValue((int, str), 8888, 'SEARXNG_PORT'),