Skip to content

Commit

Permalink
add validation for vector field aliases
Browse files Browse the repository at this point in the history
- If there are vector field Aliases
  (fields with same name and type as vector)
  across defaultMapping and typeMapping, Then,
  their dimensions and similarity value must
  be same
  • Loading branch information
moshaad7 committed Dec 14, 2023
1 parent 4f0fa6b commit 4023370
Show file tree
Hide file tree
Showing 5 changed files with 136 additions and 43 deletions.
30 changes: 19 additions & 11 deletions mapping/document.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,16 +50,21 @@ type DocumentMapping struct {
StructTagKey string `json:"struct_tag_key,omitempty"`
}

func (dm *DocumentMapping) Validate(cache *registry.Cache) error {
func (dm *DocumentMapping) Validate(cache *registry.Cache,
parentName string, fieldAliasCtx map[string]*FieldMapping) error {
var err error
if dm.DefaultAnalyzer != "" {
_, err := cache.AnalyzerNamed(dm.DefaultAnalyzer)
if err != nil {
return err
}
}
for _, property := range dm.Properties {
err = property.Validate(cache)
for propertyName, property := range dm.Properties {
newParent := propertyName
if parentName != "" {
newParent = fmt.Sprintf("%s.%s", parentName, propertyName)
}
err = property.Validate(cache, newParent, fieldAliasCtx)
if err != nil {
return err
}
Expand All @@ -78,21 +83,24 @@ func (dm *DocumentMapping) Validate(cache *registry.Cache) error {
}
}

err := validateFieldType(field.Type)
err := validateFieldMapping(field, parentName, fieldAliasCtx)
if err != nil {
return err
}

if field.Type == "vector" {
err := validateVectorField(field)
if err != nil {
return err
}
}
}
return nil
}

func validateFieldType(field *FieldMapping) error {
switch field.Type {
case "text", "datetime", "number", "boolean", "geopoint", "geoshape", "IP":
return nil
default:
return fmt.Errorf("field: '%s', unknown field type: '%s'",
field.Name, field.Type)
}
}

// analyzerNameForPath attempts to first find the field
// described by this path, then returns the analyzer
// configured for that field
Expand Down
6 changes: 4 additions & 2 deletions mapping/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,12 +174,14 @@ func (im *IndexMappingImpl) Validate() error {
if err != nil {
return err
}
err = im.DefaultMapping.Validate(im.cache)

fieldAliasCtx := make(map[string]*FieldMapping)
err = im.DefaultMapping.Validate(im.cache, "", fieldAliasCtx)
if err != nil {
return err
}
for _, docMapping := range im.TypeMapping {
err = docMapping.Validate(im.cache)
err = docMapping.Validate(im.cache, "", fieldAliasCtx)
if err != nil {
return err
}
Expand Down
17 changes: 3 additions & 14 deletions mapping/mapping_no_vectors.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@

package mapping

import "fmt"

func NewVectorFieldMapping() *FieldMapping {
return nil
}
Expand All @@ -31,16 +29,7 @@ func (fm *FieldMapping) processVector(propertyMightBeVector interface{},
// -----------------------------------------------------------------------------
// document validation functions

func validateVectorField(fieldMapping *FieldMapping) error {
return nil
}

func validateFieldType(fieldType string) error {
switch fieldType {
case "text", "datetime", "number", "boolean", "geopoint", "geoshape", "IP":
default:
return fmt.Errorf("unknown field type: '%s'", fieldType)
}

return nil
func validateFieldMapping(field *FieldMapping, parentName string,
fieldAliasCtx map[string]*FieldMapping) error {
return validateFieldType(field)
}
57 changes: 41 additions & 16 deletions mapping/mapping_vectors.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,22 @@ func (fm *FieldMapping) processVector(propertyMightBeVector interface{},
// -----------------------------------------------------------------------------
// document validation functions

func validateVectorField(field *FieldMapping) error {
if field.Dims <= 0 || field.Dims > 2048 {
return fmt.Errorf("invalid vector dimension,"+
" value should be in range (%d, %d)", 0, 2048)
func validateFieldMapping(field *FieldMapping, parentName string,
fieldAliasCtx map[string]*FieldMapping) error {
switch field.Type {
case "vector":
return validateVectorFieldAlias(field, parentName, fieldAliasCtx)
default: // non-vector field
return validateFieldType(field)
}
}

func validateVectorFieldAlias(field *FieldMapping, parentName string,
fieldAliasCtx map[string]*FieldMapping) error {

if field.Name == "" {
field.Name = parentName
}
if field.Similarity == "" {
field.Similarity = index.DefaultSimilarityMetric
}
Expand All @@ -103,22 +113,37 @@ func validateVectorField(field *FieldMapping) error {
field.DocValues = false
field.SkipFreqNorm = true

if _, ok := index.SupportedSimilarityMetrics[field.Similarity]; !ok {
return fmt.Errorf("invalid similarity metric: '%s', "+
"valid metrics are: %+v", field.Similarity,
reflect.ValueOf(index.SupportedSimilarityMetrics).MapKeys())
// # If alias is present, validate the field options as per the alias
if fieldAlias, ok := fieldAliasCtx[field.Name]; ok {
if field.Dims != fieldAlias.Dims {
return fmt.Errorf("field: '%s', invalid alias "+
"(different dimensions %d and %d)", fieldAlias.Name, field.Dims,
fieldAlias.Dims)
}

if field.Similarity != fieldAlias.Similarity {
return fmt.Errorf("field: '%s', invalid alias "+
"(different similarity values %s and %s)", fieldAlias.Name,
field.Similarity, fieldAlias.Similarity)
}

return nil
}

return nil
}
// # Validate field options

func validateFieldType(fieldType string) error {
switch fieldType {
case "text", "datetime", "number", "boolean", "geopoint", "geoshape",
"IP", "vector":
default:
return fmt.Errorf("unknown field type: '%s'", fieldType)
if field.Dims <= 0 || field.Dims > 2048 {
return fmt.Errorf("field: '%s', invalid vector dimension: %d,"+
" value should be in range (%d, %d)", field.Name, field.Dims, 0, 2048)
}

if _, ok := index.SupportedSimilarityMetrics[field.Similarity]; !ok {
return fmt.Errorf("field: '%s', invalid similarity "+
"metric: '%s', valid metrics are: %+v", field.Name, field.Similarity,
reflect.ValueOf(index.SupportedSimilarityMetrics).MapKeys())
}

fieldAliasCtx[field.Name] = field

return nil
}
69 changes: 69 additions & 0 deletions mapping/mapping_vectors_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build vectors
// +build vectors

package mapping

import "testing"

func TestVectorFieldAliasValidation(t *testing.T) {
tests := []struct {
// input
name string // name of the test
mappingStr string //index mapping json string

// expected output
expValidity bool // validity of the mapping
}{
{
name: "no vector field alias",
mappingStr: `{
"default_mapping": {
"properties": {
"cityVec" {
"fields": [
{
"type": "vector",
"dims": 3
},
{
"name": "cityVec",
"type": "vector",
"dims": 4
}
]
}
}
}
}`,
expValidity: false,
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
im := NewIndexMapping()
err := im.UnmarshalJSON([]byte(test.mappingStr))
if err != nil {
t.Fatalf("failed to unmarshal index mapping: %v", err)
}

im.Validate()

})
}
}

0 comments on commit 4023370

Please sign in to comment.