Skip to content

Commit

Permalink
PoC to integrate the Vectara search engine (#128)
Browse files Browse the repository at this point in the history
* This introduces Vectara.
* Not perfect but it does work.
---------

Co-authored-by: Eric Pugh <[email protected]>
  • Loading branch information
mkr and epugh authored Sep 11, 2023
1 parent 0eec831 commit e11ab0c
Show file tree
Hide file tree
Showing 11 changed files with 517 additions and 3 deletions.
44 changes: 42 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,14 @@

# AngularJS Search Service

Splainer Search is an Angular Solr and OpenSearch and Elasticsearch Search library focussed on relevance diagnostics. It's used in the relevancy tuning tools [Quepid](http://quepid.com) and [Splainer](http://splainer.io). Its available for anyone to use (see [license](LICENSE.txt)).
Splainer Search is an Angular [Solr](https://solr.apache.org/), [OpenSearch](https://opensearch.org/) and [Elasticsearch](https://www.elastic.co/) search library
focussed on relevance diagnostics with some experimental support for other search engines, starting with [Vectara](https://www.vectara.com).
It's used in the relevancy tuning tools [Quepid](http://quepid.com) and [Splainer](http://splainer.io). It is available for anyone to use (see [license](LICENSE.txt)).


Splainer search utilizes a JSONP wrapper for communication with Solr. Elasticsearch and OpenSearch communicate with simple HTTP and JSON via CORS. All fields are explained and highlighted if requested. A friendly interface is provided to specify the arguments in terms of a Javascript object. See below for basic examples.
Splainer search utilizes a JSONP wrapper for communication with Solr. Elasticsearch, OpenSearch, and Vectara communication
happens with simple HTTP and JSON via CORS.
All fields are explained and highlighted if requested. A friendly interface is provided to specify the arguments in terms of a Javascript object. See below for basic examples.

## Basic usage

Expand Down Expand Up @@ -59,6 +63,42 @@ var searcher = searchSvc.createSearcher(
);
```
### Vectara
Splainer-search has experimental support for Vectara. You can send queries in the Vectara format but must also pass in
the authorization headers as custom headers, e.g.
```js
var searcher = searchSvc.createSearcher(
['id:_id', 'title', 'body', 'author'],
'https://api.vectara.io:443/v1/query',
{
"query": [
{
"query": "#$query##",
"numResults": 10,
"corpusKey": [
{
"customerId": 123456789,
"corpusId": 1
}
]
}
]
},
{
'customHeaders': {
"customer-id": "123456789",
"x-api-key": "api_key"
}
},
'vectara'
);
```
Please note that the Vectara integration currently does not support explain or other advanced Splainer-search
functionality.
## Paging
Paging is done by asking the original searcher for another searcher. This searcher is already setup to get the next page for the current search results. Tell that searcher to `search()` just like you did above.
Expand Down
2 changes: 2 additions & 0 deletions factories/resolverFactory.js
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@
},
size: ids.length,
};
} else if ( settings.searchEngine === 'vectara') {
// Vectara does not have an endpoint to retrieve per doc metadata directly
}

self.config = {
Expand Down
21 changes: 21 additions & 0 deletions factories/settingsValidatorFactory.js
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,18 @@
args = { q: ['*:*'] };
} else if ( self.searchEngine === 'es' || self.searchEngine === 'os') {
fields = null;
} else if ( self.searchEngine === 'vectara') {

// When we have a caseOptions or engineOptions hash available, then this could look like "corpusId: '#$searchOptions['corpusId]##"
args = { query: [
{
query: '#$query##',
numResults: 10,
corpusKey :[{
corpusId: 1
}]
}
]};
}

self.searcher = searchSvc.createSearcher(
Expand All @@ -58,6 +70,15 @@
return doc.doc;
} else if (self.searchEngine === 'es' || self.searchEngine === 'os') {
return doc.doc._source;
} else if ( self.searchEngine === 'vectara' ) {
// Vectara returns doc properties in a metadata array of objects containing 'name' + 'value pairs
const fieldsFromDocumentMetadata = doc.doc.metadata.reduce(function(map, obj) {
map[obj.name] = obj.value;
return map;
}, {});
return Object.assign({}, {
'id': doc.doc.id
}, fieldsFromDocumentMetadata);
}
}

Expand Down
85 changes: 85 additions & 0 deletions factories/vectaraDocFactory.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
'use strict';

/*jslint latedef:false*/

(function() {
angular.module('o19s.splainer-search')
.factory('VectaraDocFactory', [
'vectaraUrlSvc',
'DocFactory',
VectaraDocFactory
]);

function VectaraDocFactory(vectaraUrlSvc, DocFactory) {
const Doc = function(doc, options) {
DocFactory.call(this, doc, options);

const self = this;

angular.forEach(self.fieldsProperty(), function(fieldValue, fieldName) {
if ( fieldValue !== null && fieldValue.constructor === Array && fieldValue.length === 1 ) {
self[fieldName] = fieldValue[0];
} else {
self[fieldName] = fieldValue;
}
});
};

Doc.prototype = Object.create(DocFactory.prototype);
Doc.prototype.constructor = Doc; // Reset the constructor
Doc.prototype._url = _url;
Doc.prototype.origin = origin;
Doc.prototype.fieldsProperty = fieldsProperty;
Doc.prototype.explain = explain;
Doc.prototype.snippet = snippet;
Doc.prototype.highlight = highlight;


function _url () {
return 'unavailable';
}

function origin () {
/*jslint validthis:true*/
var self = this;

var src = {};
angular.forEach(self, function(value, field) {
if (!angular.isFunction(value)) {
src[field] = value;
}
});
delete src.doc;
delete src.metadata;
delete src.opts;
return src;
}

function fieldsProperty() {
/*jslint validthis:true*/
const self = this;
const metadata = self.metadata;
return metadata.reduce(function(map, obj) {
map[obj.name] = obj.value;
return map;
}, {});
}

function explain () {
// no explain functionality implemented
return {};
}

function snippet () {
// no snippet functionality implemented
return null;
}

function highlight () {
// no highlighting functionality implemented
return null;
}

return Doc;
}
})();
163 changes: 163 additions & 0 deletions factories/vectaraSearcherFactory.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
'use strict';

/*jslint latedef:false*/

(function() {
angular.module('o19s.splainer-search')
.factory('VectaraSearcherFactory', [
'$http',
'$q',
'$log',
'VectaraDocFactory',
'activeQueries',
'vectaraSearcherPreprocessorSvc',
'vectaraUrlSvc',
'SearcherFactory',
'transportSvc',
VectaraSearcherFactory
]);

function VectaraSearcherFactory(
$http, $q, $log,
VectaraDocFactory,
activeQueries,
vectaraSearcherPreprocessorSvc,
vectaraUrlSvc,
SearcherFactory,
transportSvc
) {

var Searcher = function(options) {
SearcherFactory.call(this, options, vectaraSearcherPreprocessorSvc);
};

Searcher.prototype = Object.create(SearcherFactory.prototype);
Searcher.prototype.constructor = Searcher; // Reset the constructor

Searcher.prototype.addDocToGroup = addDocToGroup;
Searcher.prototype.pager = pager;
Searcher.prototype.search = search;


function addDocToGroup (groupedBy, group, vectaraDoc) {
/*jslint validthis:true*/
const self = this;

if (!self.grouped.hasOwnProperty(groupedBy)) {
self.grouped[groupedBy] = [];
}

var found = null;
angular.forEach(self.grouped[groupedBy], function(groupedDocs) {
if (groupedDocs.value === group && !found) {
found = groupedDocs;
}
});

if (!found) {
found = {docs:[], value:group};
self.grouped[groupedBy].push(found);
}

found.docs.push(vectaraDoc);
}

// return a new searcher that will give you
// the next page upon search(). To get the subsequent
// page, call pager on that searcher
function pager (){
/*jslint validthis:true*/
const self = this;
let pagerArgs = {};
let nextArgs = angular.copy(self.args);

if (nextArgs.hasOwnProperty('pager') && nextArgs.pager !== undefined) {
pagerArgs = nextArgs.pager;
} else if (self.hasOwnProperty('pagerArgs') && self.pagerArgs !== undefined) {
pagerArgs = self.pagerArgs;
}

if (pagerArgs.hasOwnProperty('from')) {
pagerArgs.from = parseInt(pagerArgs.from) + pagerArgs.size;

if (pagerArgs.from >= self.numFound) {
return null; // no more results
}
} else {
pagerArgs.from = pagerArgs.size;
}

nextArgs.pager = pagerArgs;
var options = {
args: nextArgs,
config: self.config,
fieldList: self.fieldList,
queryText: self.queryText,
type: self.type,
url: self.url,
};

return new Searcher(options);
}

// search (execute the query) and produce results
// to the returned future
function search () {
/*jslint validthis:true*/
const self= this;
var apiMethod = 'POST';
var url = self.url;
var transport = transportSvc.getTransport({apiMethod: apiMethod});

var queryDslWithPagerArgs = angular.copy(self.queryDsl);
if (self.pagerArgs) {
queryDslWithPagerArgs.from = self.pagerArgs.from;
queryDslWithPagerArgs.size = self.pagerArgs.size;
}

self.inError = false;

const headers = vectaraUrlSvc.getHeaders(self.config.customHeaders);

activeQueries.count++;
return transport.query(url, queryDslWithPagerArgs, headers)
.then(function success(httpConfig) {
var data = httpConfig.data;
activeQueries.count--;

const documents = data.responseSet && data.responseSet.length > 0 ? data.responseSet[0].document : [];

self.numFound = documents.length;

var parseDoc = function(doc, groupedBy, group) {
var options = {
groupedBy: groupedBy,
group: group,
fieldList: self.fieldList,
url: self.url
};

return new VectaraDocFactory(doc, options);
};

angular.forEach(documents, function(docFromApi) {
const doc = parseDoc(docFromApi);
self.docs.push(doc);
});

}, function error(msg) {
activeQueries.count--;
self.inError = true;
msg.searchError = 'Error with Vectara query or server. Review request manually.';
return $q.reject(msg);
})
.catch(function(response) {
$log.debug('Failed to execute search');
return $q.reject(response);
});
} // end of search()

// Return factory object
return Searcher;
}
})();
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "splainer-search",
"version": "2.24.0",
"version": "2.25.0",
"main": "splainer-search.js",
"authors": [
"Doug Turnbull <[email protected]>",
Expand Down
4 changes: 4 additions & 0 deletions services/searchSvc.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ angular.module('o19s.splainer-search')
.service('searchSvc', [
'SolrSearcherFactory',
'EsSearcherFactory',
'VectaraSearcherFactory',
'activeQueries',
'defaultSolrConfig',
function searchSvc(
SolrSearcherFactory,
EsSearcherFactory,
VectaraSearcherFactory,
activeQueries,
defaultSolrConfig
) {
Expand Down Expand Up @@ -52,6 +54,8 @@ angular.module('o19s.splainer-search')
searcher = new EsSearcherFactory(options);
} else if ( searchEngine === 'os') {
searcher = new EsSearcherFactory(options);
} else if ( searchEngine === 'vectara') {
searcher = new VectaraSearcherFactory(options);
}

return searcher;
Expand Down
Loading

0 comments on commit e11ab0c

Please sign in to comment.