Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MB-60029: Accommodate vector_index_optimized_for into vector field mapping #1959

Merged
merged 5 commits into from
Jan 17, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 22 additions & 15 deletions document/field_vector.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,13 @@ func init() {
const DefaultVectorIndexingOptions = index.IndexField

type VectorField struct {
name string
dims int // Dimensionality of the vector
similarity string // Similarity metric to use for scoring
options index.FieldIndexingOptions
value []float32
numPlainTextBytes uint64
name string
dims int // Dimensionality of the vector
similarity string // Similarity metric to use for scoring
options index.FieldIndexingOptions
value []float32
numPlainTextBytes uint64
vectorIndexOptimizedFor string // Optimization applied to this index.
}

func (n *VectorField) Size() int {
Expand Down Expand Up @@ -95,25 +96,27 @@ func (n *VectorField) GoString() string {
// For the sake of not polluting the API, we are keeping arrayPositions as a
// parameter, but it is not used.
func NewVectorField(name string, arrayPositions []uint64,
vector []float32, dims int, similarity string) *VectorField {
vector []float32, dims int, similarity, vectorIndexOptimizedFor string) *VectorField {
return NewVectorFieldWithIndexingOptions(name, arrayPositions,
vector, dims, similarity, DefaultVectorIndexingOptions)
vector, dims, similarity, vectorIndexOptimizedFor,
DefaultVectorIndexingOptions)
}

// For the sake of not polluting the API, we are keeping arrayPositions as a
// parameter, but it is not used.
func NewVectorFieldWithIndexingOptions(name string, arrayPositions []uint64,
vector []float32, dims int, similarity string,
vector []float32, dims int, similarity, vectorIndexOptimizedFor string,
options index.FieldIndexingOptions) *VectorField {
options = options | DefaultVectorIndexingOptions

return &VectorField{
name: name,
dims: dims,
similarity: similarity,
options: options,
value: vector,
numPlainTextBytes: numBytesFloat32s(vector),
name: name,
dims: dims,
similarity: similarity,
options: options,
value: vector,
numPlainTextBytes: numBytesFloat32s(vector),
vectorIndexOptimizedFor: vectorIndexOptimizedFor,
}
}

Expand All @@ -136,3 +139,7 @@ func (n *VectorField) Dims() int {
func (n *VectorField) Similarity() string {
return n.similarity
}

func (n *VectorField) IndexOptimizedFor() string {
return n.vectorIndexOptimizedFor
}
8 changes: 8 additions & 0 deletions mapping/field.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ type FieldMapping struct {
// vector fields.
// See: index.DefaultSimilarityMetric & index.SupportedSimilarityMetrics
Similarity string `json:"similarity,omitempty"`

// The type of index based on the parameter to optimize for.
VectorIndexOptimizedFor string `json:"vector_index_optimized_for,omitempty"`
}

// NewTextFieldMapping returns a default field mapping for text
Expand Down Expand Up @@ -466,6 +469,11 @@ func (fm *FieldMapping) UnmarshalJSON(data []byte) error {
if err != nil {
return err
}
case "vector_index_optimized_for":
err := json.Unmarshal(v, &fm.VectorIndexOptimizedFor)
if err != nil {
return err
}
default:
invalidKeys = append(invalidKeys, k)
}
Expand Down
14 changes: 13 additions & 1 deletion mapping/mapping_vectors.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,8 @@ func (fm *FieldMapping) processVector(propertyMightBeVector interface{},
fieldName := getFieldName(pathString, path, fm)
options := fm.Options()
field := document.NewVectorFieldWithIndexingOptions(fieldName,
indexes, vector, fm.Dims, fm.Similarity, options)
indexes, vector, fm.Dims, fm.Similarity, fm.VectorIndexOptimizedFor,
options)
context.doc.AddField(field)

// "_all" composite field is not applicable for vector field
Expand Down Expand Up @@ -162,6 +163,11 @@ func validateVectorFieldAlias(field *FieldMapping, parentName string,
field.Similarity = index.DefaultSimilarityMetric
}

if field.VectorIndexOptimizedFor == "" {
metonymic-smokey marked this conversation as resolved.
Show resolved Hide resolved
fmt.Printf("using the default optimization type \n")
field.VectorIndexOptimizedFor = index.DefaultIndexOptimizedFor
}

// following fields are not applicable for vector
// thus, we set them to default values
field.IncludeInAll = false
Expand Down Expand Up @@ -202,6 +208,12 @@ func validateVectorFieldAlias(field *FieldMapping, parentName string,
reflect.ValueOf(index.SupportedSimilarityMetrics).MapKeys())
}

if _, ok := index.SupportedVectorIndexOptimizations[field.VectorIndexOptimizedFor]; !ok {
return fmt.Errorf("field: '%s', invalid optimization "+
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's not error out, with future in mind if we add more options.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought updating index.SupportedVectorIndexOptimizations in bleve_index_api and its version in bleve's go.mod would take care of this?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you mean!? I'm talking about the case in the future - when we add more optimizations and there's a mixed cluster scenario where some nodes won't support it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah right.
Removed this snippet so there's no check at this point for valid optimisation metrics.

"metric: '%s', valid metrics are: %+v", field.Name, field.VectorIndexOptimizedFor,
reflect.ValueOf(index.SupportedVectorIndexOptimizations).MapKeys())
}

if fieldAliasCtx != nil { // writing to a nil map is unsafe
fieldAliasCtx[field.Name] = field
}
Expand Down
Loading