From e11ab0cee9f9a8fe9794964c772a6a6e5a6241c5 Mon Sep 17 00:00:00 2001 From: Matthias Krueger Date: Mon, 11 Sep 2023 22:47:07 +0200 Subject: [PATCH] PoC to integrate the Vectara search engine (#128) * This introduces Vectara. * Not perfect but it does work. --------- Co-authored-by: Eric Pugh --- README.md | 44 +++++- factories/resolverFactory.js | 2 + factories/settingsValidatorFactory.js | 21 +++ factories/vectaraDocFactory.js | 85 +++++++++++ factories/vectaraSearcherFactory.js | 163 +++++++++++++++++++++ package.json | 2 +- services/searchSvc.js | 4 + services/vectaraSearcherPreprocessorSvc.js | 50 +++++++ services/vectaraUrlSvc.js | 24 +++ test/spec/vectaraSearchSvc.js | 119 +++++++++++++++ values/defaultVectaraConfig.js | 6 + 11 files changed, 517 insertions(+), 3 deletions(-) create mode 100644 factories/vectaraDocFactory.js create mode 100644 factories/vectaraSearcherFactory.js create mode 100644 services/vectaraSearcherPreprocessorSvc.js create mode 100644 services/vectaraUrlSvc.js create mode 100644 test/spec/vectaraSearchSvc.js create mode 100644 values/defaultVectaraConfig.js diff --git a/README.md b/README.md index 7e09cb9..0109383 100644 --- a/README.md +++ b/README.md @@ -3,10 +3,14 @@ # AngularJS Search Service -Splainer Search is an Angular Solr and OpenSearch and Elasticsearch Search library focussed on relevance diagnostics. It's used in the relevancy tuning tools [Quepid](http://quepid.com) and [Splainer](http://splainer.io). Its available for anyone to use (see [license](LICENSE.txt)). +Splainer Search is an Angular [Solr](https://solr.apache.org/), [OpenSearch](https://opensearch.org/) and [Elasticsearch](https://www.elastic.co/) search library +focussed on relevance diagnostics with some experimental support for other search engines, starting with [Vectara](https://www.vectara.com). +It's used in the relevancy tuning tools [Quepid](http://quepid.com) and [Splainer](http://splainer.io). It is available for anyone to use (see [license](LICENSE.txt)). -Splainer search utilizes a JSONP wrapper for communication with Solr. Elasticsearch and OpenSearch communicate with simple HTTP and JSON via CORS. All fields are explained and highlighted if requested. A friendly interface is provided to specify the arguments in terms of a Javascript object. See below for basic examples. +Splainer search utilizes a JSONP wrapper for communication with Solr. Elasticsearch, OpenSearch, and Vectara communication +happens with simple HTTP and JSON via CORS. +All fields are explained and highlighted if requested. A friendly interface is provided to specify the arguments in terms of a Javascript object. See below for basic examples. ## Basic usage @@ -59,6 +63,42 @@ var searcher = searchSvc.createSearcher( ); ``` +### Vectara + +Splainer-search has experimental support for Vectara. You can send queries in the Vectara format but must also pass in +the authorization headers as custom headers, e.g. + +```js +var searcher = searchSvc.createSearcher( + ['id:_id', 'title', 'body', 'author'], + 'https://api.vectara.io:443/v1/query', + { + "query": [ + { + "query": "#$query##", + "numResults": 10, + "corpusKey": [ + { + "customerId": 123456789, + "corpusId": 1 + } + ] + } + ] + }, + { + 'customHeaders': { + "customer-id": "123456789", + "x-api-key": "api_key" + } + }, + 'vectara' +); +``` + +Please note that the Vectara integration currently does not support explain or other advanced Splainer-search +functionality. + ## Paging Paging is done by asking the original searcher for another searcher. This searcher is already setup to get the next page for the current search results. Tell that searcher to `search()` just like you did above. diff --git a/factories/resolverFactory.js b/factories/resolverFactory.js index 87ca12b..5fc0fa0 100644 --- a/factories/resolverFactory.js +++ b/factories/resolverFactory.js @@ -56,6 +56,8 @@ }, size: ids.length, }; + } else if ( settings.searchEngine === 'vectara') { + // Vectara does not have an endpoint to retrieve per doc metadata directly } self.config = { diff --git a/factories/settingsValidatorFactory.js b/factories/settingsValidatorFactory.js index 9279c43..2a30591 100644 --- a/factories/settingsValidatorFactory.js +++ b/factories/settingsValidatorFactory.js @@ -37,6 +37,18 @@ args = { q: ['*:*'] }; } else if ( self.searchEngine === 'es' || self.searchEngine === 'os') { fields = null; + } else if ( self.searchEngine === 'vectara') { + + // When we have a caseOptions or engineOptions hash available, then this could look like "corpusId: '#$searchOptions['corpusId]##" + args = { query: [ + { + query: '#$query##', + numResults: 10, + corpusKey :[{ + corpusId: 1 + }] + } + ]}; } self.searcher = searchSvc.createSearcher( @@ -58,6 +70,15 @@ return doc.doc; } else if (self.searchEngine === 'es' || self.searchEngine === 'os') { return doc.doc._source; + } else if ( self.searchEngine === 'vectara' ) { + // Vectara returns doc properties in a metadata array of objects containing 'name' + 'value pairs + const fieldsFromDocumentMetadata = doc.doc.metadata.reduce(function(map, obj) { + map[obj.name] = obj.value; + return map; + }, {}); + return Object.assign({}, { + 'id': doc.doc.id + }, fieldsFromDocumentMetadata); } } diff --git a/factories/vectaraDocFactory.js b/factories/vectaraDocFactory.js new file mode 100644 index 0000000..be4ecf3 --- /dev/null +++ b/factories/vectaraDocFactory.js @@ -0,0 +1,85 @@ +'use strict'; + +/*jslint latedef:false*/ + +(function() { + angular.module('o19s.splainer-search') + .factory('VectaraDocFactory', [ + 'vectaraUrlSvc', + 'DocFactory', + VectaraDocFactory + ]); + + function VectaraDocFactory(vectaraUrlSvc, DocFactory) { + const Doc = function(doc, options) { + DocFactory.call(this, doc, options); + + const self = this; + + angular.forEach(self.fieldsProperty(), function(fieldValue, fieldName) { + if ( fieldValue !== null && fieldValue.constructor === Array && fieldValue.length === 1 ) { + self[fieldName] = fieldValue[0]; + } else { + self[fieldName] = fieldValue; + } + }); + }; + + Doc.prototype = Object.create(DocFactory.prototype); + Doc.prototype.constructor = Doc; // Reset the constructor + Doc.prototype._url = _url; + Doc.prototype.origin = origin; + Doc.prototype.fieldsProperty = fieldsProperty; + Doc.prototype.explain = explain; + Doc.prototype.snippet = snippet; + Doc.prototype.highlight = highlight; + + + function _url () { + return 'unavailable'; + } + + function origin () { + /*jslint validthis:true*/ + var self = this; + + var src = {}; + angular.forEach(self, function(value, field) { + if (!angular.isFunction(value)) { + src[field] = value; + } + }); + delete src.doc; + delete src.metadata; + delete src.opts; + return src; + } + + function fieldsProperty() { + /*jslint validthis:true*/ + const self = this; + const metadata = self.metadata; + return metadata.reduce(function(map, obj) { + map[obj.name] = obj.value; + return map; + }, {}); + } + + function explain () { + // no explain functionality implemented + return {}; + } + + function snippet () { + // no snippet functionality implemented + return null; + } + + function highlight () { + // no highlighting functionality implemented + return null; + } + + return Doc; + } +})(); diff --git a/factories/vectaraSearcherFactory.js b/factories/vectaraSearcherFactory.js new file mode 100644 index 0000000..9ce89fb --- /dev/null +++ b/factories/vectaraSearcherFactory.js @@ -0,0 +1,163 @@ +'use strict'; + +/*jslint latedef:false*/ + +(function() { + angular.module('o19s.splainer-search') + .factory('VectaraSearcherFactory', [ + '$http', + '$q', + '$log', + 'VectaraDocFactory', + 'activeQueries', + 'vectaraSearcherPreprocessorSvc', + 'vectaraUrlSvc', + 'SearcherFactory', + 'transportSvc', + VectaraSearcherFactory + ]); + + function VectaraSearcherFactory( + $http, $q, $log, + VectaraDocFactory, + activeQueries, + vectaraSearcherPreprocessorSvc, + vectaraUrlSvc, + SearcherFactory, + transportSvc + ) { + + var Searcher = function(options) { + SearcherFactory.call(this, options, vectaraSearcherPreprocessorSvc); + }; + + Searcher.prototype = Object.create(SearcherFactory.prototype); + Searcher.prototype.constructor = Searcher; // Reset the constructor + + Searcher.prototype.addDocToGroup = addDocToGroup; + Searcher.prototype.pager = pager; + Searcher.prototype.search = search; + + + function addDocToGroup (groupedBy, group, vectaraDoc) { + /*jslint validthis:true*/ + const self = this; + + if (!self.grouped.hasOwnProperty(groupedBy)) { + self.grouped[groupedBy] = []; + } + + var found = null; + angular.forEach(self.grouped[groupedBy], function(groupedDocs) { + if (groupedDocs.value === group && !found) { + found = groupedDocs; + } + }); + + if (!found) { + found = {docs:[], value:group}; + self.grouped[groupedBy].push(found); + } + + found.docs.push(vectaraDoc); + } + + // return a new searcher that will give you + // the next page upon search(). To get the subsequent + // page, call pager on that searcher + function pager (){ + /*jslint validthis:true*/ + const self = this; + let pagerArgs = {}; + let nextArgs = angular.copy(self.args); + + if (nextArgs.hasOwnProperty('pager') && nextArgs.pager !== undefined) { + pagerArgs = nextArgs.pager; + } else if (self.hasOwnProperty('pagerArgs') && self.pagerArgs !== undefined) { + pagerArgs = self.pagerArgs; + } + + if (pagerArgs.hasOwnProperty('from')) { + pagerArgs.from = parseInt(pagerArgs.from) + pagerArgs.size; + + if (pagerArgs.from >= self.numFound) { + return null; // no more results + } + } else { + pagerArgs.from = pagerArgs.size; + } + + nextArgs.pager = pagerArgs; + var options = { + args: nextArgs, + config: self.config, + fieldList: self.fieldList, + queryText: self.queryText, + type: self.type, + url: self.url, + }; + + return new Searcher(options); + } + + // search (execute the query) and produce results + // to the returned future + function search () { + /*jslint validthis:true*/ + const self= this; + var apiMethod = 'POST'; + var url = self.url; + var transport = transportSvc.getTransport({apiMethod: apiMethod}); + + var queryDslWithPagerArgs = angular.copy(self.queryDsl); + if (self.pagerArgs) { + queryDslWithPagerArgs.from = self.pagerArgs.from; + queryDslWithPagerArgs.size = self.pagerArgs.size; + } + + self.inError = false; + + const headers = vectaraUrlSvc.getHeaders(self.config.customHeaders); + + activeQueries.count++; + return transport.query(url, queryDslWithPagerArgs, headers) + .then(function success(httpConfig) { + var data = httpConfig.data; + activeQueries.count--; + + const documents = data.responseSet && data.responseSet.length > 0 ? data.responseSet[0].document : []; + + self.numFound = documents.length; + + var parseDoc = function(doc, groupedBy, group) { + var options = { + groupedBy: groupedBy, + group: group, + fieldList: self.fieldList, + url: self.url + }; + + return new VectaraDocFactory(doc, options); + }; + + angular.forEach(documents, function(docFromApi) { + const doc = parseDoc(docFromApi); + self.docs.push(doc); + }); + + }, function error(msg) { + activeQueries.count--; + self.inError = true; + msg.searchError = 'Error with Vectara query or server. Review request manually.'; + return $q.reject(msg); + }) + .catch(function(response) { + $log.debug('Failed to execute search'); + return $q.reject(response); + }); + } // end of search() + + // Return factory object + return Searcher; + } +})(); diff --git a/package.json b/package.json index fc280fb..6fa7245 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "splainer-search", - "version": "2.24.0", + "version": "2.25.0", "main": "splainer-search.js", "authors": [ "Doug Turnbull ", diff --git a/services/searchSvc.js b/services/searchSvc.js index cb9d21e..df07fb8 100644 --- a/services/searchSvc.js +++ b/services/searchSvc.js @@ -6,11 +6,13 @@ angular.module('o19s.splainer-search') .service('searchSvc', [ 'SolrSearcherFactory', 'EsSearcherFactory', + 'VectaraSearcherFactory', 'activeQueries', 'defaultSolrConfig', function searchSvc( SolrSearcherFactory, EsSearcherFactory, + VectaraSearcherFactory, activeQueries, defaultSolrConfig ) { @@ -52,6 +54,8 @@ angular.module('o19s.splainer-search') searcher = new EsSearcherFactory(options); } else if ( searchEngine === 'os') { searcher = new EsSearcherFactory(options); + } else if ( searchEngine === 'vectara') { + searcher = new VectaraSearcherFactory(options); } return searcher; diff --git a/services/vectaraSearcherPreprocessorSvc.js b/services/vectaraSearcherPreprocessorSvc.js new file mode 100644 index 0000000..0577c4c --- /dev/null +++ b/services/vectaraSearcherPreprocessorSvc.js @@ -0,0 +1,50 @@ +'use strict'; + +angular.module('o19s.splainer-search') + .service('vectaraSearcherPreprocessorSvc', [ + 'queryTemplateSvc', + 'defaultVectaraConfig', + function vectaraSearcherPreprocessorSvc(queryTemplateSvc, defaultVectaraConfig) { + const self = this; + + // Functions + self.prepare = prepare; + + const replaceQuery = function(args, queryText) { + var replaced = angular.toJson(args, true); + + replaced = queryTemplateSvc.hydrate(replaced, queryText, {encodeURI: false, defaultKw: '\\"\\"'}); + replaced = angular.fromJson(replaced); + + return replaced; + }; + + var preparePostRequest = function (searcher) { + var pagerArgs = angular.copy(searcher.args.pager); + if ( angular.isUndefined(pagerArgs) || pagerArgs === null ) { + pagerArgs = {}; + } + + var defaultPagerArgs = {}; + + searcher.pagerArgs = angular.merge({}, defaultPagerArgs, pagerArgs); + delete searcher.args.pager; + + var queryDsl = replaceQuery(searcher.args, searcher.queryText); + + searcher.queryDsl = queryDsl; + }; + + function prepare (searcher) { + if (searcher.config === undefined) { + searcher.config = defaultVectaraConfig; + } else { + // make sure config params that weren't passed through are set from + // the default config object. + searcher.config = angular.merge({}, defaultVectaraConfig, searcher.config); + } + + preparePostRequest(searcher); + } + } + ]); diff --git a/services/vectaraUrlSvc.js b/services/vectaraUrlSvc.js new file mode 100644 index 0000000..be64891 --- /dev/null +++ b/services/vectaraUrlSvc.js @@ -0,0 +1,24 @@ +'use strict'; + +angular.module('o19s.splainer-search') + .service('vectaraUrlSvc', [ + + function vectaraUrlSvc() { + // no real URL manipulation required, all requests go to a fixed endpoint + + const self = this; + self.getHeaders = getHeaders; + + function getHeaders(customHeaders) { + var headers = {}; + customHeaders = customHeaders || ''; + + if (customHeaders.length > 0) { + headers = JSON.parse(customHeaders); + } + + return headers; + } + + } + ]); diff --git a/test/spec/vectaraSearchSvc.js b/test/spec/vectaraSearchSvc.js new file mode 100644 index 0000000..1808a8e --- /dev/null +++ b/test/spec/vectaraSearchSvc.js @@ -0,0 +1,119 @@ +'use strict'; + +/*global describe,beforeEach,inject,it,expect*/ +describe('Service: searchSvc: Vectara', function() { + + // load the service's module + beforeEach(module('o19s.splainer-search')); + + let searcher; + let searchSvc; + let vectaraUrlSvc; + let $httpBackend; + let fieldSpecSvc = null; + let mockVectaraUrl = 'https://api.vectara.io:443/v1/query'; + let mockFieldSpec = null; + const mockQueryText = 'test'; + const mockVectaraParam = { query: [ + { + query: '#$query##', + numResults: 10, + corpusKey :[{ + corpusId: 1 + }] + } + ]}; + + beforeEach(inject(function($injector) { + $httpBackend = $injector.get('$httpBackend'); + })); + + beforeEach(inject(function (_searchSvc_, _fieldSpecSvc_, _vectaraUrlSvc_) { + searchSvc = _searchSvc_; + fieldSpecSvc = _fieldSpecSvc_; + vectaraUrlSvc = _vectaraUrlSvc_; + mockFieldSpec = fieldSpecSvc.createFieldSpec('field1 field2'); + })); + + + var mockVectaraResults = { + responseSet: [ { + response: [ + // ignored and omitted here, the response contains the extracted matches, but in splainer we only evaluate + // use the information from the documents array below + ], + status: [], + document: [ + { + "id": "1", + "metadata": [ + { + "name": "field1", + "value": "1--field1 value" + }, + { + "name": "field2", + "value": "1--field2 value" + } + ] + }, + { + "id": "2", + "metadata": [ + { + "name": "field1", + "value": "2--field1 value" + }, + { + "name": "field2", + "value": "2--field2 value" + } + ] + }, + ], + generated: [], + summary: [], + futureId: 1 + }], + "status": [], + "metrics": null + } + + describe('vectara search', function () { + + beforeEach(inject(function () { + searcher = searchSvc.createSearcher( + mockFieldSpec, + mockVectaraUrl, + mockVectaraParam, + mockQueryText, + {}, + 'vectara' + ); + })); + + it('returns docs', function () { + $httpBackend.expectPOST(mockVectaraUrl).respond(200, mockVectaraResults); + + var called = 0; + + searcher.search() + .then(function () { + var docs = searcher.docs; + expect(docs.length === 2); + + expect(docs[0].field1).toEqual("1--field1 value"); + expect(docs[0].field2).toEqual("1--field2 value"); + expect(docs[1].field1).toEqual("2--field1 value"); + expect(docs[1].field2).toEqual("2--field2 value"); + called++; + }); + + $httpBackend.flush(); + $httpBackend.verifyNoOutstandingExpectation(); + expect(called).toEqual(1); + }); + + }); + +}); diff --git a/values/defaultVectaraConfig.js b/values/defaultVectaraConfig.js new file mode 100644 index 0000000..2d3a44f --- /dev/null +++ b/values/defaultVectaraConfig.js @@ -0,0 +1,6 @@ +'use strict'; + +angular.module('o19s.splainer-search') + .value('defaultVectaraConfig', { + apiMethod: 'POST', + });