From 402337002440e3cd5b5394aed1062d898a0a09ee Mon Sep 17 00:00:00 2001 From: moshaad7 Date: Tue, 31 Oct 2023 16:17:13 +0530 Subject: [PATCH 1/6] add validation for vector field aliases - If there are vector field Aliases (fields with same name and type as vector) across defaultMapping and typeMapping, Then, their dimensions and similarity value must be same --- mapping/document.go | 30 ++++++++------ mapping/index.go | 6 ++- mapping/mapping_no_vectors.go | 17 ++------ mapping/mapping_vectors.go | 57 +++++++++++++++++++-------- mapping/mapping_vectors_test.go | 69 +++++++++++++++++++++++++++++++++ 5 files changed, 136 insertions(+), 43 deletions(-) create mode 100644 mapping/mapping_vectors_test.go diff --git a/mapping/document.go b/mapping/document.go index 9f5aea581..2a702f323 100644 --- a/mapping/document.go +++ b/mapping/document.go @@ -50,7 +50,8 @@ type DocumentMapping struct { StructTagKey string `json:"struct_tag_key,omitempty"` } -func (dm *DocumentMapping) Validate(cache *registry.Cache) error { +func (dm *DocumentMapping) Validate(cache *registry.Cache, + parentName string, fieldAliasCtx map[string]*FieldMapping) error { var err error if dm.DefaultAnalyzer != "" { _, err := cache.AnalyzerNamed(dm.DefaultAnalyzer) @@ -58,8 +59,12 @@ func (dm *DocumentMapping) Validate(cache *registry.Cache) error { return err } } - for _, property := range dm.Properties { - err = property.Validate(cache) + for propertyName, property := range dm.Properties { + newParent := propertyName + if parentName != "" { + newParent = fmt.Sprintf("%s.%s", parentName, propertyName) + } + err = property.Validate(cache, newParent, fieldAliasCtx) if err != nil { return err } @@ -78,21 +83,24 @@ func (dm *DocumentMapping) Validate(cache *registry.Cache) error { } } - err := validateFieldType(field.Type) + err := validateFieldMapping(field, parentName, fieldAliasCtx) if err != nil { return err } - - if field.Type == "vector" { - err := validateVectorField(field) - if err != nil { - return err - } - } } return nil } +func validateFieldType(field *FieldMapping) error { + switch field.Type { + case "text", "datetime", "number", "boolean", "geopoint", "geoshape", "IP": + return nil + default: + return fmt.Errorf("field: '%s', unknown field type: '%s'", + field.Name, field.Type) + } +} + // analyzerNameForPath attempts to first find the field // described by this path, then returns the analyzer // configured for that field diff --git a/mapping/index.go b/mapping/index.go index 1c08bc589..171ee1a72 100644 --- a/mapping/index.go +++ b/mapping/index.go @@ -174,12 +174,14 @@ func (im *IndexMappingImpl) Validate() error { if err != nil { return err } - err = im.DefaultMapping.Validate(im.cache) + + fieldAliasCtx := make(map[string]*FieldMapping) + err = im.DefaultMapping.Validate(im.cache, "", fieldAliasCtx) if err != nil { return err } for _, docMapping := range im.TypeMapping { - err = docMapping.Validate(im.cache) + err = docMapping.Validate(im.cache, "", fieldAliasCtx) if err != nil { return err } diff --git a/mapping/mapping_no_vectors.go b/mapping/mapping_no_vectors.go index f4987596a..b5e033a62 100644 --- a/mapping/mapping_no_vectors.go +++ b/mapping/mapping_no_vectors.go @@ -17,8 +17,6 @@ package mapping -import "fmt" - func NewVectorFieldMapping() *FieldMapping { return nil } @@ -31,16 +29,7 @@ func (fm *FieldMapping) processVector(propertyMightBeVector interface{}, // ----------------------------------------------------------------------------- // document validation functions -func validateVectorField(fieldMapping *FieldMapping) error { - return nil -} - -func validateFieldType(fieldType string) error { - switch fieldType { - case "text", "datetime", "number", "boolean", "geopoint", "geoshape", "IP": - default: - return fmt.Errorf("unknown field type: '%s'", fieldType) - } - - return nil +func validateFieldMapping(field *FieldMapping, parentName string, + fieldAliasCtx map[string]*FieldMapping) error { + return validateFieldType(field) } diff --git a/mapping/mapping_vectors.go b/mapping/mapping_vectors.go index 3e697f5b4..cf9c24772 100644 --- a/mapping/mapping_vectors.go +++ b/mapping/mapping_vectors.go @@ -85,12 +85,22 @@ func (fm *FieldMapping) processVector(propertyMightBeVector interface{}, // ----------------------------------------------------------------------------- // document validation functions -func validateVectorField(field *FieldMapping) error { - if field.Dims <= 0 || field.Dims > 2048 { - return fmt.Errorf("invalid vector dimension,"+ - " value should be in range (%d, %d)", 0, 2048) +func validateFieldMapping(field *FieldMapping, parentName string, + fieldAliasCtx map[string]*FieldMapping) error { + switch field.Type { + case "vector": + return validateVectorFieldAlias(field, parentName, fieldAliasCtx) + default: // non-vector field + return validateFieldType(field) } +} + +func validateVectorFieldAlias(field *FieldMapping, parentName string, + fieldAliasCtx map[string]*FieldMapping) error { + if field.Name == "" { + field.Name = parentName + } if field.Similarity == "" { field.Similarity = index.DefaultSimilarityMetric } @@ -103,22 +113,37 @@ func validateVectorField(field *FieldMapping) error { field.DocValues = false field.SkipFreqNorm = true - if _, ok := index.SupportedSimilarityMetrics[field.Similarity]; !ok { - return fmt.Errorf("invalid similarity metric: '%s', "+ - "valid metrics are: %+v", field.Similarity, - reflect.ValueOf(index.SupportedSimilarityMetrics).MapKeys()) + // # If alias is present, validate the field options as per the alias + if fieldAlias, ok := fieldAliasCtx[field.Name]; ok { + if field.Dims != fieldAlias.Dims { + return fmt.Errorf("field: '%s', invalid alias "+ + "(different dimensions %d and %d)", fieldAlias.Name, field.Dims, + fieldAlias.Dims) + } + + if field.Similarity != fieldAlias.Similarity { + return fmt.Errorf("field: '%s', invalid alias "+ + "(different similarity values %s and %s)", fieldAlias.Name, + field.Similarity, fieldAlias.Similarity) + } + + return nil } - return nil -} + // # Validate field options -func validateFieldType(fieldType string) error { - switch fieldType { - case "text", "datetime", "number", "boolean", "geopoint", "geoshape", - "IP", "vector": - default: - return fmt.Errorf("unknown field type: '%s'", fieldType) + if field.Dims <= 0 || field.Dims > 2048 { + return fmt.Errorf("field: '%s', invalid vector dimension: %d,"+ + " value should be in range (%d, %d)", field.Name, field.Dims, 0, 2048) } + if _, ok := index.SupportedSimilarityMetrics[field.Similarity]; !ok { + return fmt.Errorf("field: '%s', invalid similarity "+ + "metric: '%s', valid metrics are: %+v", field.Name, field.Similarity, + reflect.ValueOf(index.SupportedSimilarityMetrics).MapKeys()) + } + + fieldAliasCtx[field.Name] = field + return nil } diff --git a/mapping/mapping_vectors_test.go b/mapping/mapping_vectors_test.go new file mode 100644 index 000000000..a5a2e8a56 --- /dev/null +++ b/mapping/mapping_vectors_test.go @@ -0,0 +1,69 @@ +// Copyright (c) 2023 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build vectors +// +build vectors + +package mapping + +import "testing" + +func TestVectorFieldAliasValidation(t *testing.T) { + tests := []struct { + // input + name string // name of the test + mappingStr string //index mapping json string + + // expected output + expValidity bool // validity of the mapping + }{ + { + name: "no vector field alias", + mappingStr: `{ + "default_mapping": { + "properties": { + "cityVec" { + "fields": [ + { + "type": "vector", + "dims": 3 + }, + { + "name": "cityVec", + "type": "vector", + "dims": 4 + } + ] + } + } + + } + }`, + expValidity: false, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + im := NewIndexMapping() + err := im.UnmarshalJSON([]byte(test.mappingStr)) + if err != nil { + t.Fatalf("failed to unmarshal index mapping: %v", err) + } + + im.Validate() + + }) + } +} From 50de1e774b4467fa5dc5915a6c691a6030ea5850 Mon Sep 17 00:00:00 2001 From: moshaad7 Date: Thu, 14 Dec 2023 19:30:25 +0530 Subject: [PATCH 2/6] add test cases --- mapping/mapping_vectors_test.go | 153 +++++++++++++++++++++++++++++--- 1 file changed, 141 insertions(+), 12 deletions(-) diff --git a/mapping/mapping_vectors_test.go b/mapping/mapping_vectors_test.go index a5a2e8a56..d5da2fb33 100644 --- a/mapping/mapping_vectors_test.go +++ b/mapping/mapping_vectors_test.go @@ -23,34 +23,154 @@ func TestVectorFieldAliasValidation(t *testing.T) { tests := []struct { // input name string // name of the test - mappingStr string //index mapping json string + mappingStr string // index mapping json string // expected output - expValidity bool // validity of the mapping + expValidity bool // validity of the mapping + errMsg string // error message, given expValidity is false }{ { - name: "no vector field alias", - mappingStr: `{ + name: "test1", + mappingStr: ` + { "default_mapping": { "properties": { - "cityVec" { + "cityVec": { "fields": [ + { + "type": "vector", + "dims": 3 + }, + { + "name": "cityVec", + "type": "vector", + "dims": 4 + } + ] + } + } + } + }`, + expValidity: false, + errMsg: `field: 'cityVec', invalid alias (different dimensions 4 and 3)`, + }, + { + name: "test2", + mappingStr: ` + { + "default_mapping": { + "properties": { + "cityVec": { + "fields": [ + { + "type": "vector", + "dims": 3, + "similarity": "l2_norm" + }, + { + "name": "cityVec", + "type": "vector", + "dims": 3, + "similarity": "dot_product" + } + ] + } + } + } + }`, + expValidity: false, + errMsg: `field: 'cityVec', invalid alias (different similarity values dot_product and l2_norm)`, + }, + { + name: "test3", + mappingStr: ` + { + "default_mapping": { + "properties": { + "cityVec": { + "fields": [ + { + "type": "vector", + "dims": 3 + }, + { + "name": "cityVec", + "type": "vector", + "dims": 3 + } + ] + } + } + } + }`, + expValidity: true, + errMsg: "", + }, + { + name: "test4", + mappingStr: ` + { + "default_mapping": { + "properties": { + "cityVec": { + "fields": [ + { + "name": "vecData", + "type": "vector", + "dims": 4 + } + ] + }, + "countryVec": { + "fields": [ + { + "name": "vecData", + "type": "vector", + "dims": 3 + } + ] + } + } + } + }`, + expValidity: false, + errMsg: `field: 'vecData', invalid alias (different dimensions 3 and 4)`, + }, + { + name: "test5", + mappingStr: ` + { + "default_mapping": { + "properties": { + "cityVec": { + "fields": [ + { + "name": "vecData", + "type": "vector", + "dims": 3 + } + ] + } + } + }, + "types": { + "type1": { + "properties": { + "cityVec": { + "fields": [ { - "type": "vector", - "dims": 3 - }, - { - "name": "cityVec", + "name": "vecData", "type": "vector", "dims": 4 } ] } } - + } } }`, expValidity: false, + errMsg: `field: 'vecData', invalid alias (different dimensions 4 and 3)`, }, } @@ -62,8 +182,17 @@ func TestVectorFieldAliasValidation(t *testing.T) { t.Fatalf("failed to unmarshal index mapping: %v", err) } - im.Validate() + err = im.Validate() + isValid := err == nil + if test.expValidity != isValid { + t.Fatalf("validity mismatch, expected: %v, got: %v", + test.expValidity, isValid) + } + if !isValid && err.Error() != test.errMsg { + t.Fatalf("invalid error message, expected: %v, got: %v", + test.errMsg, err.Error()) + } }) } } From 39bb689bd3fa25febb39ae68005f05882550a760 Mon Sep 17 00:00:00 2001 From: moshaad7 Date: Thu, 14 Dec 2023 19:37:32 +0530 Subject: [PATCH 3/6] handle nil fieldAliasCtx case --- mapping/mapping_vectors.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mapping/mapping_vectors.go b/mapping/mapping_vectors.go index cf9c24772..f5367b994 100644 --- a/mapping/mapping_vectors.go +++ b/mapping/mapping_vectors.go @@ -114,6 +114,7 @@ func validateVectorFieldAlias(field *FieldMapping, parentName string, field.SkipFreqNorm = true // # If alias is present, validate the field options as per the alias + // note: reading from a nil map is safe if fieldAlias, ok := fieldAliasCtx[field.Name]; ok { if field.Dims != fieldAlias.Dims { return fmt.Errorf("field: '%s', invalid alias "+ @@ -143,7 +144,9 @@ func validateVectorFieldAlias(field *FieldMapping, parentName string, reflect.ValueOf(index.SupportedSimilarityMetrics).MapKeys()) } - fieldAliasCtx[field.Name] = field + if fieldAliasCtx != nil { // writing to a nil map is unsafe + fieldAliasCtx[field.Name] = field + } return nil } From d8fb08574d38faf845a30a197dc177d1587f3bc6 Mon Sep 17 00:00:00 2001 From: moshaad7 Date: Thu, 14 Dec 2023 20:37:10 +0530 Subject: [PATCH 4/6] use min and max dims variables from bleve_index_api --- mapping/mapping_vectors.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mapping/mapping_vectors.go b/mapping/mapping_vectors.go index f5367b994..a10f43d4f 100644 --- a/mapping/mapping_vectors.go +++ b/mapping/mapping_vectors.go @@ -133,9 +133,10 @@ func validateVectorFieldAlias(field *FieldMapping, parentName string, // # Validate field options - if field.Dims <= 0 || field.Dims > 2048 { + if field.Dims < index.MinVectorDims || field.Dims > index.MaxVectorDims { return fmt.Errorf("field: '%s', invalid vector dimension: %d,"+ - " value should be in range (%d, %d)", field.Name, field.Dims, 0, 2048) + " value should be in range (%d, %d)", field.Name, field.Dims, + index.MinVectorDims, index.MaxVectorDims) } if _, ok := index.SupportedSimilarityMetrics[field.Similarity]; !ok { From b5ac9c0570c15ad2f9910f877ca64fd9568ee425 Mon Sep 17 00:00:00 2001 From: moshaad7 Date: Thu, 14 Dec 2023 21:05:56 +0530 Subject: [PATCH 5/6] add min and max allowed vector dims values --- mapping/field.go | 6 ++++++ mapping/mapping_vectors.go | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/mapping/field.go b/mapping/field.go index 8190c2ab1..d3a169e67 100644 --- a/mapping/field.go +++ b/mapping/field.go @@ -79,6 +79,12 @@ type FieldMapping struct { Similarity string `json:"similarity,omitempty"` } +// Min and Max allowed dimensions for a vector field +const ( + MinVectorDims = 1 + MaxVectorDims = 2048 +) + // NewTextFieldMapping returns a default field mapping for text func NewTextFieldMapping() *FieldMapping { return &FieldMapping{ diff --git a/mapping/mapping_vectors.go b/mapping/mapping_vectors.go index a10f43d4f..babf1af30 100644 --- a/mapping/mapping_vectors.go +++ b/mapping/mapping_vectors.go @@ -133,10 +133,10 @@ func validateVectorFieldAlias(field *FieldMapping, parentName string, // # Validate field options - if field.Dims < index.MinVectorDims || field.Dims > index.MaxVectorDims { + if field.Dims < MinVectorDims || field.Dims > MaxVectorDims { return fmt.Errorf("field: '%s', invalid vector dimension: %d,"+ " value should be in range (%d, %d)", field.Name, field.Dims, - index.MinVectorDims, index.MaxVectorDims) + MinVectorDims, MaxVectorDims) } if _, ok := index.SupportedSimilarityMetrics[field.Similarity]; !ok { From bb1408eeb2dca3b78287c131e3d3117af2e282ad Mon Sep 17 00:00:00 2001 From: Abhinav Dangeti Date: Thu, 14 Dec 2023 11:16:51 -0700 Subject: [PATCH 6/6] Relocate MinVectorDims & MaxVectorDims --- mapping/field.go | 6 ------ mapping/mapping_vectors.go | 6 ++++++ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/mapping/field.go b/mapping/field.go index d3a169e67..8190c2ab1 100644 --- a/mapping/field.go +++ b/mapping/field.go @@ -79,12 +79,6 @@ type FieldMapping struct { Similarity string `json:"similarity,omitempty"` } -// Min and Max allowed dimensions for a vector field -const ( - MinVectorDims = 1 - MaxVectorDims = 2048 -) - // NewTextFieldMapping returns a default field mapping for text func NewTextFieldMapping() *FieldMapping { return &FieldMapping{ diff --git a/mapping/mapping_vectors.go b/mapping/mapping_vectors.go index babf1af30..502857b71 100644 --- a/mapping/mapping_vectors.go +++ b/mapping/mapping_vectors.go @@ -26,6 +26,12 @@ import ( index "github.com/blevesearch/bleve_index_api" ) +// Min and Max allowed dimensions for a vector field +const ( + MinVectorDims = 1 + MaxVectorDims = 2048 +) + func NewVectorFieldMapping() *FieldMapping { return &FieldMapping{ Type: "vector",