summaryrefslogtreecommitdiff
path: root/searx/engines/elasticsearch.py
blob: 99e93d87646517b6bfbbe164932759b2149f7747 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
from json import loads, dumps
from requests.auth import HTTPBasicAuth


base_url = 'http://localhost:9200'
username = ''
password = ''
index = ''
search_url = base_url + '/' + index + '/_search'
query_type = 'match'
custom_query_json = {}
show_metadata = False
categories = ['general']


def init(engine_settings):
    if 'query_type' in engine_settings and engine_settings['query_type'] not in _available_query_types:
        raise ValueError('unsupported query type', engine_settings['query_type'])

    if index == '':
        raise ValueError('index cannot be empty')


def request(query, params):
    if query_type not in _available_query_types:
        return params

    if username and password:
        params['auth'] = HTTPBasicAuth(username, password)

    params['url'] = search_url
    params['method'] = 'GET'
    params['data'] = dumps(_available_query_types[query_type](query))
    params['headers']['Content-Type'] = 'application/json'

    return params


def _match_query(query):
    """
    The standard for full text queries.
    searx format: "key:value" e.g. city:berlin
    REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query.html
    """

    try:
        key, value = query.split(':')
    except:
        raise ValueError('query format must be "key:value"')

    return {"query": {"match": {key: {'query': value}}}}


def _simple_query_string_query(query):
    """
    Accepts query strings, but it is less strict than query_string
    The field used can be specified in index.query.default_field in Elasticsearch.
    REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html
    """

    return {'query': {'simple_query_string': {'query': query}}}


def _term_query(query):
    """
    Accepts one term and the name of the field.
    searx format: "key:value" e.g. city:berlin
    REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-term-query.html
    """

    try:
        key, value = query.split(':')
    except:
        raise ValueError('query format must be key:value')

    return {'query': {'term': {key: value}}}


def _terms_query(query):
    """
    Accepts multiple terms and the name of the field.
    searx format: "key:value1,value2" e.g. city:berlin,paris
    REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-terms-query.html
    """

    try:
        key, values = query.split(':')
    except:
        raise ValueError('query format must be key:value1,value2')

    return {'query': {'terms': {key: values.split(',')}}}


def _custom_query(query):
    key, value = query.split(':')
    custom_query = custom_query_json
    for query_key, query_value in custom_query.items():
        if query_key == '{{KEY}}':
            custom_query[key] = custom_query.pop(query_key)
        if query_value == '{{VALUE}}':
            custom_query[query_key] = value
    return custom_query


def response(resp):
    results = []

    resp_json = loads(resp.text)
    if 'error' in resp_json:
        raise Exception(resp_json['error'])

    for result in resp_json['hits']['hits']:
        r = {key: str(value) if not key.startswith('_') else value for key, value in result['_source'].items()}
        r['template'] = 'key-value.html'

        if show_metadata:
            r['metadata'] = {'index': result['_index'],
                             'id': result['_id'],
                             'score': result['_score']}

        results.append(r)

    return results


_available_query_types = {
    # Full text queries
    # https://www.elastic.co/guide/en/elasticsearch/reference/current/full-text-queries.html
    'match': _match_query,
    'simple_query_string': _simple_query_string_query,

    # Term-level queries
    # https://www.elastic.co/guide/en/elasticsearch/reference/current/term-level-queries.html
    'term': _term_query,
    'terms': _terms_query,

    # Query JSON defined by the instance administrator.
    'custom': _custom_query,
}