forked from CottageLabs/facetview2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathes.js
535 lines (459 loc) · 19.7 KB
/
es.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
/******************************************************************
* ELASTICSEARCH INTEGRATION
*****************************************************************/
// The reserved characters in elasticsearch query strings
// Note that the "\" has to go first, as when these are substituted, that character
// will get introduced as an escape character
var esSpecialChars = ["\\", "+", "-", "=", "&&", "||", ">", "<", "!", "(", ")", "{", "}", "[", "]", "^", '"', "~", "*", "?", ":", "/"];
// the reserved special character set with * and " removed, so that users can do quote searches and wildcards
// if they want
var esSpecialCharsSubSet = ["\\", "+", "-", "=", "&&", "||", ">", "<", "!", "(", ")", "{", "}", "[", "]", "^", "~", "?", ":", "/"];
// values that have to be in even numbers in the query or they will be escaped
var esPairs = ['"'];
// FIXME: esSpecialChars is not currently used for encoding, but it would be worthwhile giving the facetview an option
// to allow/disallow specific values, but that requires a much better (automated) understanding of the
// query DSL
var elasticsearch_distance_units = ["km", "mi", "miles", "in", "inch", "yd", "yards", "kilometers", "mm", "millimeters", "cm", "centimeters", "m", "meters"]
function optionsFromQuery(query) {
function stripDistanceUnits(val) {
for (var i=0; i < elasticsearch_distance_units.length; i=i+1) {
var unit = elasticsearch_distance_units[i];
if (endsWith(val, unit)) {
return val.substring(0, val.length - unit.length)
}
}
return val
}
function unescapeQueryString(val) {
function escapeRegExp(string) {
return string.replace(/([.*+?^=!:${}()|\[\]\/\\])/g, "\\$1");
}
function unReplaceAll(string, find) {
return string.replace(new RegExp("\\\\(" + escapeRegExp(find) + ")", 'g'), "$1");
}
// Note we use the full list of special chars
for (var i = 0; i < esSpecialChars.length; i++) {
var char = esSpecialChars[i];
val = unReplaceAll(val, char)
}
return val;
}
var opts = {};
// FIXME: note that fields are not supported here
// from position
if (query.hasOwnProperty("from")) { opts["from"] = query.from }
// page size
if (query.size) { opts["page_size"] = query.size }
if (query["sort"]) { opts["sort"] = query["sort"] }
// get hold of the bool query if it is there
// and get hold of the query string and default operator if they have been provided
if (query.query) {
var sq = query.query;
var must = [];
var qs = undefined;
// if this is a filtered query, pull must and qs out of the filter
// otherwise the root of the query is the query_string object
if (sq.filtered) {
must = sq.filtered.filter.bool.must;
qs = sq.filtered.query
} else {
qs = sq
}
// go through each clause in the must and pull out the options
if (must.length > 0) {
opts["_active_filters"] = {};
opts["_selected_operators"] = {}
}
for (var i = 0; i < must.length; i++) {
var clause = must[i];
// could be a term query (implies AND on this field)
if ("term" in clause) {
for (var field in clause.term) {
if (clause.term.hasOwnProperty(field)) {
opts["_selected_operators"][field] = "AND";
var value = clause.term[field];
if (!(field in opts["_active_filters"])) {
opts["_active_filters"][field] = []
}
opts["_active_filters"][field].push(value)
}
}
}
// could be a terms query (implies OR on this field)
if ("terms" in clause) {
for (var field=0; field < clause.terms.length; field=field+1) {
opts["_selected_operators"][field] = "OR";
var values = clause.terms[field];
if (!(field in opts["_active_filters"])) {
opts["_active_filters"][field] = []
}
opts["_active_filters"][field] = opts["_active_filters"][field].concat(values)
}
}
// could be a range query (which may in turn be a range or a date histogram facet)
if ("range" in clause) {
// get the field that we're ranging on
var r = clause.range;
var fields = Object.keys(r);
var field = false;
if (fields.length > 0) {
field = fields[0];
}
if (field) {
var rparams = r[field];
var range = {};
if ("lt" in rparams) { range["to"] = rparams.lt }
if ("gte" in rparams) { range["from"] = rparams.gte }
opts["_active_filters"][field] = range;
}
}
// cound be a geo distance query
if ("geo_distance_range" in clause) {
var gdr = clause.geo_distance_range;
// the range is defined at the root of the range filter
var range = {};
if ("lt" in gdr) { range["to"] = stripDistanceUnits(gdr.lt) }
if ("gte" in gdr) { range["from"] = stripDistanceUnits(gdr.gte) }
// FIXME: at some point we may need to make this smarter, if we start including other data
// in the geo_distance_range filter definition
// then we have to go looking for the field name
for (var field=0; field < gdr.length; field=field+1) {
if (field === "lt" || field === "gte") { continue }
opts["_active_filters"][field] = range
break
}
}
// FIXME: support for statistical facet and terms_stats facet
}
if (qs) {
if (qs.query_string) {
var string = unescapeQueryString(qs.query_string.query);
var field = qs.query_string.default_field;
var multi_field = qs.query_string.fields;
var op = qs.query_string.default_operator;
if (string) { opts["q"] = string }
if (field) { opts["searchfield"] = field }
if (multi_field) { opts["fields"] = multi_field }
if (op) { opts["default_operator"] = op }
} else if (qs.match_all) {
opts["q"] = ""
}
}
return opts
}
}
function getFilters(params) {
var options = params.options;
// function to get the right facet from the options, based on the name
function selectFacet(name) {
for (var i = 0; i < options.facets.length; i++) {
var item = options.facets[i];
if ('field' in item) {
if (item['field'] === name) {
return item
}
}
}
}
function termsFilter(facet, filter_list) {
if (facet.logic === "AND") {
var filters = [];
for (var i=0; i < filter_list.length; i=i+1) {
var value = filter_list[i];
var tq = {"term" : {}};
tq["term"][facet.field] = value;
filters.push(tq);
}
return filters;
} else if (facet.logic === "OR") {
var tq = {"terms" : {}};
tq["terms"][facet.field] = filter_list;
return [tq];
}
}
function rangeFilter(facet, value) {
var rq = {"range" : {}};
rq["range"][facet.field] = {};
if (value.to) { rq["range"][facet.field]["lt"] = value.to }
if (value.from) { rq["range"][facet.field]["gte"] = value.from }
return rq
}
function geoFilter(facet, value) {
var gq = {"geo_distance_range" : {}};
if (value.to) { gq["geo_distance_range"]["lt"] = value.to + facet.unit }
if (value.from) { gq["geo_distance_range"]["gte"] = value.from + facet.unit }
gq["geo_distance_range"][facet.field] = [facet.lon, facet.lat]; // note the order of lon/lat to comply with GeoJSON
return gq
}
function dateHistogramFilter(facet, value) {
var rq = {"range" : {}};
rq["range"][facet.field] = {};
if (value.to) { rq["range"][facet.field]["lt"] = value.to }
if (value.from) { rq["range"][facet.field]["gte"] = value.from }
return rq
}
// function to make the relevant filters from the filter definition
function makeFilters(filter_definition) {
var filters = [];
for (var field in filter_definition) {
if (filter_definition.hasOwnProperty(field)) {
var facet = selectFacet(field);
// FIXME: is this the right behaviour?
// ignore any filters from disabled facets
if (facet.disabled) { continue }
var filter_list = filter_definition[field];
if (facet.type === "terms") {
filters = filters.concat(termsFilter(facet, filter_list)); // Note this is a concat not a push, unlike the others
} else if (facet.type === "range") {
filters.push(rangeFilter(facet, filter_list))
} else if (facet.type === "geo_distance") {
filters.push(geoFilter(facet, filter_list))
} else if (facet.type == "date_histogram") {
filters.push(dateHistogramFilter(facet, filter_list))
}
}
}
return filters
}
// read any filters out of the options and create an array of "must" queries which
// will constrain the search results
var filter_must = [];
if (options.active_filters) {
filter_must = filter_must.concat(makeFilters(options.active_filters))
}
if (options.predefined_filters) {
filter_must = filter_must.concat(makeFilters(options.predefined_filters))
}
if (options.fixed_filters) {
filter_must = filter_must.concat(options.fixed_filters)
}
return filter_must
}
function elasticSearchQuery(params) {
// break open the parameters
var options = params.options;
var include_facets = "include_facets" in params ? params.include_facets : true;
var include_fields = "include_fields" in params ? params.include_fields : true;
var filter_must = getFilters({"options" : options});
// search string and search field produce a query_string query element
var querystring = options.q;
var searchfield = options.searchfield;
var default_operator = options.default_operator;
var search_fields_multi = options.search_fields_multi;
var min_score = options.min_score;
var ftq = undefined;
if (querystring) {
ftq = {'query_string' : { 'query': fuzzify(querystring, options.default_freetext_fuzzify) }};
if (searchfield) {
ftq.query_string["default_field"] = searchfield
}
else if (search_fields_multi) {
ftq.query_string["fields"] = search_fields_multi
}
if (default_operator) {
ftq.query_string["default_operator"] = default_operator
}
} else {
ftq = {"match_all" : {}}
}
// if there are filter constraints (filter_must) then we create a filtered query,
// otherwise make a normal query
var qs = undefined;
if (filter_must.length > 0) {
qs = {"query" : {"filtered" : {"filter" : {"bool" : {"must" : filter_must}}}}};
qs.query.filtered["query"] = ftq;
} else {
qs = {"query" : ftq}
}
if (min_score) {
qs['min_score'] = min_score;
}
// sort order and direction
options.sort && options.sort.length > 0 ? qs['sort'] = options.sort : "";
// fields and partial fields
if (include_fields) {
options.fields ? qs['fields'] = options.fields : "";
options.partial_fields ? qs['partial_fields'] = options.partial_fields : "";
options.script_fields ? qs["script_fields"] = options.script_fields : "";
}
// paging (number of results, and start cursor)
if (options.from !== undefined) {
qs["from"] = options.from
}
if (options.page_size !== undefined) {
qs["size"] = options.page_size
}
// facets
if (include_facets) {
qs['facets'] = {};
for (var item = 0; item < options.facets.length; item++) {
var defn = options.facets[item];
if (defn.disabled) { continue }
var size = defn.size;
// add a bunch of extra values to the facets to deal with the shard count issue
size += options.elasticsearch_facet_inflation
var facet = {};
if (defn.type === "terms") {
facet["terms"] = {"field" : defn["field"], "size" : size, "order" : defn["order"]}
} else if (defn.type === "range") {
var ranges = [];
for (var r=0; r < defn["range"].length; r=r+1) {
var def = defn["range"][r];
var robj = {};
if (def.to) { robj["to"] = def.to }
if (def.from) { robj["from"] = def.from }
ranges.push(robj)
}
facet["range"] = { };
facet["range"][defn.field] = ranges
} else if (defn.type === "geo_distance") {
facet["geo_distance"] = {}
facet["geo_distance"][defn["field"]] = [defn.lon, defn.lat]; // note that the order is lon/lat because of GeoJSON
facet["geo_distance"]["unit"] = defn.unit;
var ranges = [];
for (var r=0; r < defn["distance"].length; r=r+1) {
var def = defn["distance"][r];
var robj = {};
if (def.to) { robj["to"] = def.to }
if (def.from) { robj["from"] = def.from }
ranges.push(robj)
}
facet["geo_distance"]["ranges"] = ranges
} else if (defn.type === "statistical") {
facet["statistical"] = {"field" : defn["field"]}
} else if (defn.type === "terms_stats") {
facet["terms_stats"] = {key_field : defn["field"], value_field: defn["value_field"], size : size, order : defn["order"]}
} else if (defn.type === "date_histogram") {
facet["date_histogram"] = {field : defn["field"], interval : defn["interval"]}
}
qs["facets"][defn["field"]] = facet
}
// and any extra facets
// NOTE: this does not include any treatment of the facet size inflation that may be required
if (options.extra_facets) {
$.extend(true, qs['facets'], options.extra_facets );
}
}
return qs
}
function fuzzify(querystr, default_freetext_fuzzify) {
var rqs = querystr;
if (default_freetext_fuzzify !== undefined) {
if (default_freetext_fuzzify == "*" || default_freetext_fuzzify == "~") {
if (querystr.indexOf('*') === -1 && querystr.indexOf('~') === -1 && querystr.indexOf(':') === -1) {
var optparts = querystr.split(' ');
pq = "";
for ( var oi = 0; oi < optparts.length; oi++ ) {
var oip = optparts[oi];
if ( oip.length > 0 ) {
oip = oip + default_freetext_fuzzify;
default_freetext_fuzzify == "*" ? oip = "*" + oip : false;
pq += oip + " ";
}
}
rqs = pq;
}
}
}
return rqs;
}
function jsonStringEscape(key, value) {
function escapeRegExp(string) {
return string.replace(/([.*+?^=!:${}()|\[\]\/\\])/g, "\\$1");
}
function replaceAll(string, find, replace) {
return string.replace(new RegExp(escapeRegExp(find), 'g'), replace);
}
function paired(string, pair) {
var matches = (string.match(new RegExp(escapeRegExp(pair), "g"))) || []
return matches.length % 2 === 0;
}
// if we are looking at the query string, make sure that it is escaped
// (note that this precludes the use of queries like "name:bob", as the ":" would
// get escaped)
if (key === "query" && typeof(value) === 'string') {
var scs = esSpecialCharsSubSet.slice(0);
// first check for pairs
for (var i = 0; i < esPairs.length; i++) {
var char = esPairs[i];
if (!paired(value, char)) {
scs.push(char);
}
}
for (var i = 0; i < scs.length; i++) {
var char = scs[i];
value = replaceAll(value, char, "\\" + char);
}
}
return value;
}
function serialiseQueryObject(qs) {
return JSON.stringify(qs, jsonStringEscape);
}
// closure for elastic search success, which ultimately calls
// the user's callback
function elasticSearchSuccess(callback) {
return function(data) {
var resultobj = {
"records" : [],
"start" : "",
"found" : data.hits.total,
"facets" : {}
};
// load the results into the records part of the result object
for (var item = 0; item < data.hits.hits.length; item++) {
var res = data.hits.hits[item];
if ("fields" in res) {
// partial_fields and script_fields are also included here - no special treatment
resultobj.records.push(res.fields)
} else { // TODO Make it possible to optionally set add'l fields to append to _source, such as _type.
resultobj.records.push(res._source);
if ("res._type") {resultobj.records[item]["type"] = res._type}
}
}
for (var item in data.facets) {
if (data.facets.hasOwnProperty(item)) {
var facet = data.facets[item];
// handle any terms facets
if ("terms" in facet) {
var terms = facet["terms"];
resultobj["facets"][item] = terms;
// handle any range/geo_distance_range facets
} else if ("ranges" in facet) {
var range = facet["ranges"];
resultobj["facets"][item] = range;
// handle statistical facets
} else if (facet["_type"] === "statistical") {
resultobj["facets"][item] = facet;
// handle terms_stats
} else if (facet["_type"] === "terms_stats") {
var terms = facet["terms"];
resultobj["facets"][item] = terms
} else if (facet["_type"] === "date_histogram") {
var entries = facet["entries"]
resultobj["facets"][item] = entries
}
}
}
callback(data, resultobj)
}
}
function doElasticSearchQuery(params) {
// extract the parameters of the request
var success_callback = params.success;
var complete_callback = params.complete;
var search_url = params.search_url;
var queryobj = params.queryobj;
var datatype = params.datatype;
// serialise the query
var querystring = serialiseQueryObject(queryobj);
// make the call to the elasticsearch web service
$.ajax({
type: "get",
url: search_url,
data: {source: querystring},
dataType: datatype,
success: elasticSearchSuccess(success_callback),
complete: complete_callback
});
}