Skip to content

Commit

Permalink
add validation for vector field aliases
Browse files Browse the repository at this point in the history
- Code refactoring
- Handle empty field name
- Add validation for non-vector field alias (field.Type should match)
  • Loading branch information
moshaad7 committed Nov 2, 2023
1 parent 23cdcd4 commit f506d22
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 59 deletions.
40 changes: 31 additions & 9 deletions mapping/document.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,16 +51,16 @@ type DocumentMapping struct {
}

func (dm *DocumentMapping) Validate(cache *registry.Cache,
fieldAliasCtx map[string]*FieldMapping) error {
parentName string, fieldAliasCtx map[string]*FieldMapping) error {
var err error
if dm.DefaultAnalyzer != "" {
_, err := cache.AnalyzerNamed(dm.DefaultAnalyzer)
if err != nil {
return err
}
}
for _, property := range dm.Properties {
err = property.Validate(cache, fieldAliasCtx)
for propertyName, property := range dm.Properties {
err = property.Validate(cache, propertyName, fieldAliasCtx)
if err != nil {
return err
}
Expand All @@ -79,18 +79,40 @@ func (dm *DocumentMapping) Validate(cache *registry.Cache,
}
}

err := validateFieldType(field.Type)
err := validateFieldMapping(field, parentName, fieldAliasCtx)
if err != nil {
return err
}
}
return nil
}

if field.Type == "vector" {
err := validateVectorField(field, fieldAliasCtx)
if err != nil {
return err
}
func validateFieldType(field *FieldMapping) error {
switch field.Type {
case "text", "datetime", "number", "boolean", "geopoint", "geoshape", "IP":
default:
return fmt.Errorf("field: '%s', unknown field type: '%s'",
field.Name, field.Type)
}

return nil
}

func validateFieldAlias(field *FieldMapping, parentName string,
fieldAliasCtx map[string]*FieldMapping) error {
if field.Name == "" {
field.Name = parentName
}
if fieldAlias, ok := fieldAliasCtx[field.Name]; ok {
if field.Type != fieldAlias.Type {
return fmt.Errorf("field: '%s', invalid alias "+
"(different types %s and %s)", field.Name, field.Type,
fieldAlias.Type)
}
} else {
fieldAliasCtx[field.Name] = field
}

return nil
}

Expand Down
6 changes: 3 additions & 3 deletions mapping/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,12 +176,12 @@ func (im *IndexMappingImpl) Validate() error {
}

fieldAliasCtx := make(map[string]*FieldMapping)
err = im.DefaultMapping.Validate(im.cache, fieldAliasCtx)
err = im.DefaultMapping.Validate(im.cache, "", fieldAliasCtx)
if err != nil {
return err
}
for _, docMapping := range im.TypeMapping {
err = docMapping.Validate(im.cache, fieldAliasCtx)
for docType, docMapping := range im.TypeMapping {
err = docMapping.Validate(im.cache, docType, fieldAliasCtx)
if err != nil {
return err
}
Expand Down
17 changes: 5 additions & 12 deletions mapping/mapping_no_vectors.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@

package mapping

import "fmt"

func NewVectorFieldMapping() *FieldMapping {
return nil
}
Expand All @@ -31,17 +29,12 @@ func (fm *FieldMapping) processVector(propertyMightBeVector interface{},
// -----------------------------------------------------------------------------
// document validation functions

func validateVectorField(fieldMapping *FieldMapping,
func validateFieldMapping(field *FieldMapping, parentName string,
fieldAliasCtx map[string]*FieldMapping) error {
return nil
}

func validateFieldType(fieldType string) error {
switch fieldType {
case "text", "datetime", "number", "boolean", "geopoint", "geoshape", "IP":
default:
return fmt.Errorf("unknown field type: '%s'", fieldType)
err := validateFieldType(field)
if err != nil {
return err
}

return nil
return validateFieldAlias(field, parentName, fieldAliasCtx)
}
76 changes: 41 additions & 35 deletions mapping/mapping_vectors.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,61 +80,67 @@ func (fm *FieldMapping) processVector(propertyMightBeVector interface{},
// -----------------------------------------------------------------------------
// document validation functions

func validateVectorField(field *FieldMapping,
func validateFieldMapping(field *FieldMapping, parentName string,
fieldAliasCtx map[string]*FieldMapping) error {
switch field.Type {
case "vector":
return validateVectorFieldAlias(field, parentName, fieldAliasCtx)
default:
err := validateFieldType(field)
if err != nil {
return err
}
return validateFieldAlias(field, parentName, fieldAliasCtx)
}
}

func validateVectorFieldAlias(field *FieldMapping, parentName string,
fieldAliasCtx map[string]*FieldMapping) error {

if field.Name == "" {
field.Name = parentName
}
if field.Similarity == "" {
field.Similarity = util.DefaultSimilarityMetric
}

fieldAlias, isAliased := fieldAliasCtx[field.Name]
if isAliased {
// following fields are not applicable for vector
// thus, we set them to default values
field.IncludeInAll = false
field.IncludeTermVectors = false
field.Store = false
field.DocValues = false
field.SkipFreqNorm = true

// If alias is present, validate the field as per the alias
if fieldAlias, ok := fieldAliasCtx[field.Name]; ok {
if field.Dims != fieldAlias.Dims {
return fmt.Errorf("vector field: %s, err: invalid alias "+
return fmt.Errorf("field: '%s', invalid alias "+
"(different dimensions %d and %d)", fieldAlias.Name, field.Dims,
fieldAlias.Dims)
}

if field.Similarity != fieldAlias.Similarity {
return fmt.Errorf("vector field: %s, err: invalid alias "+
"(different similarity value %s and %s)", fieldAlias.Name,
return fmt.Errorf("field: '%s', invalid alias "+
"(different similarity values %s and %s)", fieldAlias.Name,
field.Similarity, fieldAlias.Similarity)
}
} else {
if field.Dims <= 0 || field.Dims > 2048 {
return fmt.Errorf("invalid vector dimension,"+
" value should be in range (%d, %d)", 0, 2048)
}

if _, ok := util.SupportedSimilarityMetrics[field.Similarity]; !ok {
return fmt.Errorf("invalid similarity metric: '%s', "+
"valid metrics are: %+v", field.Similarity,
reflect.ValueOf(util.SupportedSimilarityMetrics).MapKeys())
}
return nil
}

// following fields are not applicable for vector
// thus, we set them to default values
field.IncludeInAll = false
field.IncludeTermVectors = false
field.Store = false
field.DocValues = false
field.SkipFreqNorm = true

if !isAliased {
fieldAliasCtx[field.Name] = field
if field.Dims <= 0 || field.Dims > 2048 {
return fmt.Errorf("field: '%s', invalid vector dimension: %d,"+
" value should be in range (%d, %d)", field.Name, field.Dims, 0, 2048)
}

return nil
}

func validateFieldType(fieldType string) error {
switch fieldType {
case "text", "datetime", "number", "boolean", "geopoint", "geoshape",
"IP", "vector":
default:
return fmt.Errorf("unknown field type: '%s'", fieldType)
if _, ok := util.SupportedSimilarityMetrics[field.Similarity]; !ok {
return fmt.Errorf("field: '%s', invalid similarity "+
"metric: '%s', valid metrics are: %+v", field.Name, field.Similarity,
reflect.ValueOf(util.SupportedSimilarityMetrics).MapKeys())
}

fieldAliasCtx[field.Name] = field

return nil
}

0 comments on commit f506d22

Please sign in to comment.