diff options
author | Adam Tauber <asciimoo@gmail.com> | 2016-07-15 13:02:29 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-07-15 13:02:29 +0200 |
commit | f7a3e9771d4aa957368bc79cf88b3432872f1a83 (patch) | |
tree | 15d845bcb555fac8aca39f08b3a25155f6f3ca56 /searx | |
parent | 8f2a3d241f51c3da8fdd31014d5569ac11c18021 (diff) | |
parent | 1dba6dcbac6891390653170f44cd7ba9de636cd9 (diff) |
Merge pull request #615 from mmuman/scanr
Add ScanR structures search engine
Diffstat (limited to 'searx')
-rw-r--r-- | searx/engines/scanr_structures.py | 78 | ||||
-rw-r--r-- | searx/settings.yml | 5 |
2 files changed, 83 insertions, 0 deletions
diff --git a/searx/engines/scanr_structures.py b/searx/engines/scanr_structures.py new file mode 100644 index 00000000..ad78155a --- /dev/null +++ b/searx/engines/scanr_structures.py @@ -0,0 +1,78 @@ +""" + ScanR Structures (Science) + + @website https://scanr.enseignementsup-recherche.gouv.fr + @provide-api yes (https://scanr.enseignementsup-recherche.gouv.fr/api/swagger-ui.html) + + @using-api yes + @results JSON + @stable yes + @parse url, title, content, img_src +""" + +from urllib import urlencode +from json import loads, dumps +from dateutil import parser +from searx.utils import html_to_text + +# engine dependent config +categories = ['science'] +paging = True +page_size = 20 + +# search-url +url = 'https://scanr.enseignementsup-recherche.gouv.fr/' +search_url = url + 'api/structures/search' + + +# do search-request +def request(query, params): + + params['url'] = search_url + params['method'] = 'POST' + params['headers']['Content-type'] = "application/json" + params['data'] = dumps({"query": query, + "searchField": "ALL", + "sortDirection": "ASC", + "sortOrder": "RELEVANCY", + "page": params['pageno'], + "pageSize": page_size}) + + return params + + +# get response from search-request +def response(resp): + results = [] + + search_res = loads(resp.text) + + # return empty array if there are no results + if search_res.get('total') < 1: + return [] + + # parse results + for result in search_res['results']: + if 'id' not in result: + continue + + # is it thumbnail or img_src?? + thumbnail = None + if 'logo' in result: + thumbnail = result['logo'] + if thumbnail[0] == '/': + thumbnail = url + thumbnail + + content = None + if 'highlights' in result: + content = result['highlights'][0]['value'] + + # append result + results.append({'url': url + 'structure/' + result['id'], + 'title': result['label'], + # 'thumbnail': thumbnail, + 'img_src': thumbnail, + 'content': html_to_text(content)}) + + # return results + return results diff --git a/searx/settings.yml b/searx/settings.yml index 55889888..d64b73a1 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -314,6 +314,11 @@ engines: engine : kickass shortcut : ka + - name : scanr_structures + shortcut: scs + engine : scanr_structures + disabled : True + - name : soundcloud engine : soundcloud shortcut : sc |