Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MB-60029: Accommodate vector_index_optimized_for into vector field mapping #1959

Merged
merged 5 commits into from
Jan 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 22 additions & 15 deletions document/field_vector.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,13 @@ func init() {
const DefaultVectorIndexingOptions = index.IndexField

type VectorField struct {
name string
dims int // Dimensionality of the vector
similarity string // Similarity metric to use for scoring
options index.FieldIndexingOptions
value []float32
numPlainTextBytes uint64
name string
dims int // Dimensionality of the vector
similarity string // Similarity metric to use for scoring
options index.FieldIndexingOptions
value []float32
numPlainTextBytes uint64
vectorIndexOptimizedFor string // Optimization applied to this index.
}

func (n *VectorField) Size() int {
Expand Down Expand Up @@ -95,25 +96,27 @@ func (n *VectorField) GoString() string {
// For the sake of not polluting the API, we are keeping arrayPositions as a
// parameter, but it is not used.
func NewVectorField(name string, arrayPositions []uint64,
vector []float32, dims int, similarity string) *VectorField {
vector []float32, dims int, similarity, vectorIndexOptimizedFor string) *VectorField {
return NewVectorFieldWithIndexingOptions(name, arrayPositions,
vector, dims, similarity, DefaultVectorIndexingOptions)
vector, dims, similarity, vectorIndexOptimizedFor,
DefaultVectorIndexingOptions)
}

// For the sake of not polluting the API, we are keeping arrayPositions as a
// parameter, but it is not used.
func NewVectorFieldWithIndexingOptions(name string, arrayPositions []uint64,
vector []float32, dims int, similarity string,
vector []float32, dims int, similarity, vectorIndexOptimizedFor string,
options index.FieldIndexingOptions) *VectorField {
options = options | DefaultVectorIndexingOptions

return &VectorField{
name: name,
dims: dims,
similarity: similarity,
options: options,
value: vector,
numPlainTextBytes: numBytesFloat32s(vector),
name: name,
dims: dims,
similarity: similarity,
options: options,
value: vector,
numPlainTextBytes: numBytesFloat32s(vector),
vectorIndexOptimizedFor: vectorIndexOptimizedFor,
}
}

Expand All @@ -136,3 +139,7 @@ func (n *VectorField) Dims() int {
func (n *VectorField) Similarity() string {
return n.similarity
}

func (n *VectorField) IndexOptimizedFor() string {
return n.vectorIndexOptimizedFor
}
8 changes: 4 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ go 1.20
require (
github.com/RoaringBitmap/roaring v1.2.3
github.com/bits-and-blooms/bitset v1.2.0
github.com/blevesearch/bleve_index_api v1.1.4
github.com/blevesearch/geo v0.1.18
github.com/blevesearch/bleve_index_api v1.1.5
github.com/blevesearch/geo v0.1.19
github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475
github.com/blevesearch/go-porterstemmer v1.0.3
github.com/blevesearch/goleveldb v1.0.1
github.com/blevesearch/gtreap v0.1.1
github.com/blevesearch/scorch_segment_api/v2 v2.2.5
github.com/blevesearch/scorch_segment_api/v2 v2.2.6
github.com/blevesearch/segment v0.9.1
github.com/blevesearch/snowball v0.6.1
github.com/blevesearch/snowballstem v0.9.0
Expand All @@ -23,7 +23,7 @@ require (
github.com/blevesearch/zapx/v13 v13.3.10
github.com/blevesearch/zapx/v14 v14.3.10
github.com/blevesearch/zapx/v15 v15.3.13
github.com/blevesearch/zapx/v16 v16.0.1-0.20240112173957-c19e7ab032d8
github.com/blevesearch/zapx/v16 v16.0.1-0.20240117165423-8662175b037b
github.com/couchbase/moss v0.2.0
github.com/golang/protobuf v1.3.2
github.com/spf13/cobra v1.7.0
Expand Down
16 changes: 8 additions & 8 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ github.com/RoaringBitmap/roaring v1.2.3 h1:yqreLINqIrX22ErkKI0vY47/ivtJr6n+kMhVO
github.com/RoaringBitmap/roaring v1.2.3/go.mod h1:plvDsJQpxOC5bw8LRteu/MLWHsHez/3y6cubLI4/1yE=
github.com/bits-and-blooms/bitset v1.2.0 h1:Kn4yilvwNtMACtf1eYDlG8H77R07mZSPbMjLyS07ChA=
github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA=
github.com/blevesearch/bleve_index_api v1.1.4 h1:n9Ilxlb80g9DAhchR95IcVrzohamDSri0wPnkKnva50=
github.com/blevesearch/bleve_index_api v1.1.4/go.mod h1:PbcwjIcRmjhGbkS/lJCpfgVSMROV6TRubGGAODaK1W8=
github.com/blevesearch/geo v0.1.18 h1:Np8jycHTZ5scFe7VEPLrDoHnnb9C4j636ue/CGrhtDw=
github.com/blevesearch/geo v0.1.18/go.mod h1:uRMGWG0HJYfWfFJpK3zTdnnr1K+ksZTuWKhXeSokfnM=
github.com/blevesearch/bleve_index_api v1.1.5 h1:0q05mzu6GT/kebzqKywCpou/eUea9wTKa7kfqX7QX+k=
github.com/blevesearch/bleve_index_api v1.1.5/go.mod h1:PbcwjIcRmjhGbkS/lJCpfgVSMROV6TRubGGAODaK1W8=
github.com/blevesearch/geo v0.1.19 h1:hlX1YpBZ+X+xfjS8hEpmM/tdPUFbqBME3mdAWKHo2s0=
github.com/blevesearch/geo v0.1.19/go.mod h1:EPyr3iJCcESYa830PnkFhqzJkOP7/daHT/ocun43WRY=
github.com/blevesearch/go-faiss v1.0.5 h1:IWlOZGF3GXFOUdLVW9JkqgWPQ3gEIYqqdp88rbrAcc4=
github.com/blevesearch/go-faiss v1.0.5/go.mod h1:jrxHrbl42X/RnDPI+wBoZU8joxxuRwedrxqswQ3xfU8=
github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475 h1:kDy+zgJFJJoJYBvdfBSiZYBbdsUL0XcjHYWezpQBGPA=
Expand All @@ -19,8 +19,8 @@ github.com/blevesearch/gtreap v0.1.1/go.mod h1:QaQyDRAT51sotthUWAH4Sj08awFSSWzgY
github.com/blevesearch/mmap-go v1.0.2/go.mod h1:ol2qBqYaOUsGdm7aRMRrYGgPvnwLe6Y+7LMvAB5IbSA=
github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc=
github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs=
github.com/blevesearch/scorch_segment_api/v2 v2.2.5 h1:5SsNQmR8v1bojtGQ1zFhZravcMg5rdiX8AVu6LwlVtc=
github.com/blevesearch/scorch_segment_api/v2 v2.2.5/go.mod h1:8N2ytOlBCdurlxDgbqsfeR1oTKRN0ZVIKdUUP1VFZNc=
github.com/blevesearch/scorch_segment_api/v2 v2.2.6 h1:rewrzgFaCEjjfWovAB9NubMAd4+aCLxD3RaQcPDaoNo=
github.com/blevesearch/scorch_segment_api/v2 v2.2.6/go.mod h1:0rv+k/OIjtYCT/g7Z45pCOVweFyta+0AdXO8keKfZxo=
github.com/blevesearch/segment v0.9.1 h1:+dThDy+Lvgj5JMxhmOVlgFfkUtZV2kw49xax4+jTfSU=
github.com/blevesearch/segment v0.9.1/go.mod h1:zN21iLm7+GnBHWTao9I+Au/7MBiL8pPFtJBJTsk6kQw=
github.com/blevesearch/snowball v0.6.1 h1:cDYjn/NCH+wwt2UdehaLpr2e4BwLIjN4V/TdLsL+B5A=
Expand All @@ -43,8 +43,8 @@ github.com/blevesearch/zapx/v14 v14.3.10 h1:SG6xlsL+W6YjhX5N3aEiL/2tcWh3DO75Bnz7
github.com/blevesearch/zapx/v14 v14.3.10/go.mod h1:qqyuR0u230jN1yMmE4FIAuCxmahRQEOehF78m6oTgns=
github.com/blevesearch/zapx/v15 v15.3.13 h1:6EkfaZiPlAxqXz0neniq35my6S48QI94W/wyhnpDHHQ=
github.com/blevesearch/zapx/v15 v15.3.13/go.mod h1:Turk/TNRKj9es7ZpKK95PS7f6D44Y7fAFy8F4LXQtGg=
github.com/blevesearch/zapx/v16 v16.0.1-0.20240112173957-c19e7ab032d8 h1:dJfTV12JRZ9zkAukNSmKRY8P9iGbETJt2SqW71AvkUM=
github.com/blevesearch/zapx/v16 v16.0.1-0.20240112173957-c19e7ab032d8/go.mod h1:ih5Q8QhWQjgqVCnkSko/zMc5AR1BWyYbRyb/a+trB+Y=
github.com/blevesearch/zapx/v16 v16.0.1-0.20240117165423-8662175b037b h1:jZs0UQurWmVaNlrNolROktccw36F/U/z7+AaNLrvwHU=
github.com/blevesearch/zapx/v16 v16.0.1-0.20240117165423-8662175b037b/go.mod h1:FVQ8/6UMCPWFOJQ2sTJVju4d2XtTyPRhe1A6aQvNM5c=
github.com/couchbase/ghistogram v0.1.0 h1:b95QcQTCzjTUocDXp/uMgSNQi8oj1tGwnJ4bODWZnps=
github.com/couchbase/ghistogram v0.1.0/go.mod h1:s1Jhy76zqfEecpNWJfWUiKZookAFaiGOEoyzgHt9i7k=
github.com/couchbase/moss v0.2.0 h1:VCYrMzFwEryyhRSeI+/b3tRBSeTpi/8gn5Kf6dxqn+o=
Expand Down
8 changes: 8 additions & 0 deletions mapping/field.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ type FieldMapping struct {
// vector fields.
// See: index.DefaultSimilarityMetric & index.SupportedSimilarityMetrics
Similarity string `json:"similarity,omitempty"`

// Applicable to vector fields only - optimization string
VectorIndexOptimizedFor string `json:"vector_index_optimized_for,omitempty"`
}

// NewTextFieldMapping returns a default field mapping for text
Expand Down Expand Up @@ -466,6 +469,11 @@ func (fm *FieldMapping) UnmarshalJSON(data []byte) error {
if err != nil {
return err
}
case "vector_index_optimized_for":
err := json.Unmarshal(v, &fm.VectorIndexOptimizedFor)
if err != nil {
return err
}
default:
invalidKeys = append(invalidKeys, k)
}
Expand Down
13 changes: 11 additions & 2 deletions mapping/mapping_vectors.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@ func (fm *FieldMapping) processVector(propertyMightBeVector interface{},

fieldName := getFieldName(pathString, path, fm)
options := fm.Options()
field := document.NewVectorFieldWithIndexingOptions(fieldName,
indexes, vector, fm.Dims, fm.Similarity, options)
field := document.NewVectorFieldWithIndexingOptions(fieldName, indexes, vector,
fm.Dims, fm.Similarity, fm.VectorIndexOptimizedFor, options)
context.doc.AddField(field)

// "_all" composite field is not applicable for vector field
Expand All @@ -158,10 +158,19 @@ func validateVectorFieldAlias(field *FieldMapping, parentName string,
if field.Name == "" {
field.Name = parentName
}

if field.Similarity == "" {
field.Similarity = index.DefaultSimilarityMetric
}

if field.VectorIndexOptimizedFor == "" {
metonymic-smokey marked this conversation as resolved.
Show resolved Hide resolved
field.VectorIndexOptimizedFor = index.DefaultIndexOptimization
}
if _, exists := index.SupportedVectorIndexOptimizations[field.VectorIndexOptimizedFor]; !exists {
// if an unsupported config is provided, override to default
field.VectorIndexOptimizedFor = index.DefaultIndexOptimization
}

// following fields are not applicable for vector
// thus, we set them to default values
field.IncludeInAll = false
Expand Down
Loading