Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Support Int8Vector in go #38990

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions client/entity/field.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@
return "[]byte", ""
case FieldTypeBFloat16Vector:
return "[]byte", ""
case FieldTypeInt8Vector:
return "[]int8", ""

Check warning on line 140 in client/entity/field.go

View check run for this annotation

Codecov / codecov/patch

client/entity/field.go#L139-L140

Added lines #L139 - L140 were not covered by tests
default:
return "undefined", ""
}
Expand Down Expand Up @@ -177,6 +179,8 @@
FieldTypeBFloat16Vector FieldType = 103
// FieldTypeBinaryVector field type sparse vector
FieldTypeSparseVector FieldType = 104
// FieldTypeInt8Vector field type int8 vector
FieldTypeInt8Vector FieldType = 105
)

// Field represent field schema in milvus
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ require (
github.com/grpc-ecosystem/go-grpc-middleware v1.3.0
github.com/klauspost/compress v1.17.9
github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d
github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20241211060635-410431d7865b
github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20250102080446-c3ba3d26a90f
github.com/minio/minio-go/v7 v7.0.73
github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81
github.com/prometheus/client_golang v1.14.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -630,8 +630,8 @@ github.com/milvus-io/cgosymbolizer v0.0.0-20240722103217-b7dee0e50119 h1:9VXijWu
github.com/milvus-io/cgosymbolizer v0.0.0-20240722103217-b7dee0e50119/go.mod h1:DvXTE/K/RtHehxU8/GtDs4vFtfw64jJ3PaCnFri8CRg=
github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b h1:TfeY0NxYxZzUfIfYe5qYDBzt4ZYRqzUjTR6CvUzjat8=
github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b/go.mod h1:iwW+9cWfIzzDseEBCCeDSN5SD16Tidvy8cwQ7ZY8Qj4=
github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20241211060635-410431d7865b h1:iPPhnFx+s7FF53UeWj7A4EYhPRMFPL6mHqyQw7qRjeQ=
github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20241211060635-410431d7865b/go.mod h1:/6UT4zZl6awVeXLeE7UGDWZvXj3IWkRsh3mqsn0DiAs=
github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20250102080446-c3ba3d26a90f h1:So6RKU5wqP/8EaKogicJP8gZ2SrzzS/JprusBaE3RKc=
github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20250102080446-c3ba3d26a90f/go.mod h1:/6UT4zZl6awVeXLeE7UGDWZvXj3IWkRsh3mqsn0DiAs=
github.com/milvus-io/pulsar-client-go v0.12.1 h1:O2JZp1tsYiO7C0MQ4hrUY/aJXnn2Gry6hpm7UodghmE=
github.com/milvus-io/pulsar-client-go v0.12.1/go.mod h1:dkutuH4oS2pXiGm+Ti7fQZ4MRjrMPZ8IJeEGAWMeckk=
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs=
Expand Down
21 changes: 19 additions & 2 deletions internal/core/src/common/Types.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
using float16 = knowhere::fp16;
using bfloat16 = knowhere::bf16;
using bin1 = knowhere::bin1;
using int8 = knowhere::int8;

// See also: https://github.com/milvus-io/milvus-proto/blob/master/proto/schema.proto
enum class DataType {
Expand Down Expand Up @@ -85,6 +86,7 @@
VECTOR_FLOAT16 = 102,
VECTOR_BFLOAT16 = 103,
VECTOR_SPARSE_FLOAT = 104,
VECTOR_INT8 = 105,
};

using Timestamp = uint64_t; // TODO: use TiKV-like timestamp
Expand Down Expand Up @@ -322,6 +324,11 @@
return data_type == DataType::VECTOR_SPARSE_FLOAT;
}

inline bool
IsInt8VectorDataType(DataType data_type) {
return data_type == DataType::VECTOR_INT8;
}

inline bool
IsFloatVectorDataType(DataType data_type) {
return IsDenseFloatVectorDataType(data_type) ||
Expand All @@ -331,7 +338,7 @@
inline bool
IsVectorDataType(DataType data_type) {
return IsBinaryVectorDataType(data_type) ||
IsFloatVectorDataType(data_type);
IsFloatVectorDataType(data_type) || IsInt8VectorDataType(data_type);
}

inline bool
Expand Down Expand Up @@ -418,7 +425,17 @@

inline bool
IsBinaryVectorMetricType(const MetricType& metric_type) {
return !IsFloatVectorMetricType(metric_type);
return metric_type == knowhere::metric::HAMMING ||
metric_type == knowhere::metric::JACCARD ||
metric_type == knowhere::metric::SUPERSTRUCTURE ||
metric_type == knowhere::metric::SUBSTRUCTURE;
}

inline bool
IsIntVectorMetricType(const MetricType& metric_type) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is a little bit weird to have this function.
Let's change the metrics check to vector and binary vector

Copy link
Contributor Author

@cydrain cydrain Jan 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I cannot get your point.
FloatVector and Int8Vector support different metric types, so I use different APIs to handle.

return metric_type == knowhere::metric::L2 ||
metric_type == knowhere::metric::IP ||
metric_type == knowhere::metric::COSINE;

Check warning on line 438 in internal/core/src/common/Types.h

View check run for this annotation

Codecov / codecov/patch

internal/core/src/common/Types.h#L435-L438

Added lines #L435 - L438 were not covered by tests
}

// Plus 1 because we can't use greater(>) symbol
Expand Down
4 changes: 4 additions & 0 deletions internal/core/src/index/IndexFactory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,10 @@
return std::make_unique<VectorMemIndex<bfloat16>>(
index_type, metric_type, version, file_manager_context);
}
case DataType::VECTOR_INT8: {
return std::make_unique<VectorMemIndex<int8>>(
index_type, metric_type, version, file_manager_context);

Check warning on line 448 in internal/core/src/index/IndexFactory.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/index/IndexFactory.cpp#L446-L448

Added lines #L446 - L448 were not covered by tests
}
default:
PanicInfo(
DataTypeInvalid,
Expand Down
1 change: 1 addition & 0 deletions internal/core/src/index/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ BIN_List() {
return ret;
}

// TODO caiyd: should list supported list
std::vector<std::tuple<IndexType, MetricType>>
unsupported_index_combinations() {
static std::vector<std::tuple<IndexType, MetricType>> ret{
Expand Down
10 changes: 6 additions & 4 deletions internal/core/src/index/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,13 @@
if constexpr (std::is_same_v<T, bin1>) {
AssertInfo(
IsBinaryVectorMetricType(metric_type),
"binary vector does not float vector metric type: " + metric_type);
"binary vector does not support metric type: " + metric_type);
} else if constexpr (std::is_same_v<T, int8>) {
AssertInfo(IsIntVectorMetricType(metric_type),

Check warning on line 110 in internal/core/src/index/Utils.h

View check run for this annotation

Codecov / codecov/patch

internal/core/src/index/Utils.h#L110

Added line #L110 was not covered by tests
"int vector does not support metric type: " + metric_type);
} else {
AssertInfo(
IsFloatVectorMetricType(metric_type),
"float vector does not binary vector metric type: " + metric_type);
AssertInfo(IsFloatVectorMetricType(metric_type),
"float vector does not support metric type: " + metric_type);
}
}

Expand Down
1 change: 1 addition & 0 deletions internal/core/src/index/VectorMemIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -636,5 +636,6 @@ template class VectorMemIndex<float>;
template class VectorMemIndex<bin1>;
template class VectorMemIndex<float16>;
template class VectorMemIndex<bfloat16>;
template class VectorMemIndex<int8>;

} // namespace milvus::index
1 change: 1 addition & 0 deletions internal/core/src/indexbuilder/IndexFactory.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ class IndexFactory {
case DataType::VECTOR_BFLOAT16:
case DataType::VECTOR_BINARY:
case DataType::VECTOR_SPARSE_FLOAT:
case DataType::VECTOR_INT8:
return std::make_unique<VecIndexCreator>(type, config, context);
default:
PanicInfo(DataTypeInvalid,
Expand Down
23 changes: 23 additions & 0 deletions internal/core/src/indexbuilder/index_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,29 @@
return status;
}

CStatus
BuildInt8VecIndex(CIndex index, int64_t int8_value_num, const int8_t* vectors) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know we just follow the pattern. But it will be great if we can merge these Build functions, since they only have a little discrepancy.
This is a non-blocking comment

Copy link
Contributor Author

@cydrain cydrain Jan 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's because here is C code, we cannot use template here which is only supported in C++

auto status = CStatus();
try {
AssertInfo(index,

Check warning on line 457 in internal/core/src/indexbuilder/index_c.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/indexbuilder/index_c.cpp#L454-L457

Added lines #L454 - L457 were not covered by tests
"failed to build int8 vector index, passed index was null");
auto real_index =

Check warning on line 459 in internal/core/src/indexbuilder/index_c.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/indexbuilder/index_c.cpp#L459

Added line #L459 was not covered by tests
reinterpret_cast<milvus::indexbuilder::IndexCreatorBase*>(index);
auto cIndex =
dynamic_cast<milvus::indexbuilder::VecIndexCreator*>(real_index);
auto dim = cIndex->dim();
auto row_nums = int8_value_num / dim;
auto ds = knowhere::GenDataSet(row_nums, dim, vectors);
cIndex->Build(ds);
status.error_code = Success;
status.error_msg = "";
} catch (std::exception& e) {
status.error_code = UnexpectedError;
status.error_msg = strdup(e.what());
}
return status;

Check warning on line 473 in internal/core/src/indexbuilder/index_c.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/indexbuilder/index_c.cpp#L461-L473

Added lines #L461 - L473 were not covered by tests
}

// field_data:
// 1, serialized proto::schema::BoolArray, if type is bool;
// 2, serialized proto::schema::StringArray, if type is string;
Expand Down
3 changes: 3 additions & 0 deletions internal/core/src/indexbuilder/index_c.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ BuildSparseFloatVecIndex(CIndex index,
int64_t dim,
const uint8_t* vectors);

CStatus
BuildInt8VecIndex(CIndex index, int64_t data_size, const int8_t* vectors);

// field_data:
// 1, serialized proto::schema::BoolArray, if type is bool;
// 2, serialized proto::schema::StringArray, if type is string;
Expand Down
6 changes: 6 additions & 0 deletions internal/core/src/segcore/vector_index_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,12 @@
knowhere::Version::GetCurrentVersion().VersionNumber(),
json,
error_msg);
} else if (dataType == milvus::DataType::VECTOR_INT8) {
status = knowhere::IndexStaticFaced<knowhere::int8>::ConfigCheck(

Check warning on line 77 in internal/core/src/segcore/vector_index_c.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/segcore/vector_index_c.cpp#L77

Added line #L77 was not covered by tests
index_type,
knowhere::Version::GetCurrentVersion().VersionNumber(),

Check warning on line 79 in internal/core/src/segcore/vector_index_c.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/segcore/vector_index_c.cpp#L79

Added line #L79 was not covered by tests
json,
error_msg);
} else {
status = knowhere::Status::invalid_args;
}
Expand Down
15 changes: 13 additions & 2 deletions internal/proxy/task_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,11 @@
for k, v := range Params.AutoIndexConfig.BinaryIndexParams.GetAsJSONMap() {
indexParamsMap[k] = v
}
} else if typeutil.IsIntVectorType(cit.fieldSchema.DataType) {
// override int vector index params by autoindex
for k, v := range Params.AutoIndexConfig.IndexParams.GetAsJSONMap() {
indexParamsMap[k] = v
}

Check warning on line 268 in internal/proxy/task_index.go

View check run for this annotation

Codecov / codecov/patch

internal/proxy/task_index.go#L264-L268

Added lines #L264 - L268 were not covered by tests
}

if metricTypeExist {
Expand Down Expand Up @@ -320,6 +325,9 @@
} else if typeutil.IsBinaryVectorType(cit.fieldSchema.DataType) {
// override binary vector index params by autoindex
config = Params.AutoIndexConfig.BinaryIndexParams.GetAsJSONMap()
} else if typeutil.IsIntVectorType(cit.fieldSchema.DataType) {
// override int vector index params by autoindex
config = Params.AutoIndexConfig.IndexParams.GetAsJSONMap()

Check warning on line 330 in internal/proxy/task_index.go

View check run for this annotation

Codecov / codecov/patch

internal/proxy/task_index.go#L329-L330

Added lines #L329 - L330 were not covered by tests
}
if !exist {
if err := handle(0, config); err != nil {
Expand Down Expand Up @@ -364,17 +372,20 @@
return merr.WrapErrParameterInvalid("valid index params", "invalid index params", "float vector index does not support metric type: "+metricType)
}
} else if typeutil.IsSparseFloatVectorType(cit.fieldSchema.DataType) {
if metricType != metric.IP && metricType != metric.BM25 {
if !funcutil.SliceContain(indexparamcheck.SparseFloatVectorMetrics, metricType) {
return merr.WrapErrParameterInvalid("valid index params", "invalid index params", "only IP&BM25 is the supported metric type for sparse index")
}

if metricType == metric.BM25 && cit.functionSchema.GetType() != schemapb.FunctionType_BM25 {
return merr.WrapErrParameterInvalid("valid index params", "invalid index params", "only BM25 Function output field support BM25 metric type")
}
} else if typeutil.IsBinaryVectorType(cit.fieldSchema.DataType) {
if !funcutil.SliceContain(indexparamcheck.BinaryVectorMetrics, metricType) {
return merr.WrapErrParameterInvalid("valid index params", "invalid index params", "binary vector index does not support metric type: "+metricType)
}
} else if typeutil.IsIntVectorType(cit.fieldSchema.DataType) {
if !funcutil.SliceContain(indexparamcheck.IntVectorMetrics, metricType) {
return merr.WrapErrParameterInvalid("valid index params", "invalid index params", "int vector index does not support metric type: "+metricType)
}

Check warning on line 388 in internal/proxy/task_index.go

View check run for this annotation

Codecov / codecov/patch

internal/proxy/task_index.go#L385-L388

Added lines #L385 - L388 were not covered by tests
}
}

Expand Down
36 changes: 36 additions & 0 deletions internal/proxy/validate_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@
if err := v.checkSparseFloatFieldData(field, fieldSchema); err != nil {
return err
}
case schemapb.DataType_Int8Vector:
if err := v.checkInt8VectorFieldData(field, fieldSchema); err != nil {
return err
}

Check warning on line 94 in internal/proxy/validate_util.go

View check run for this annotation

Codecov / codecov/patch

internal/proxy/validate_util.go#L91-L94

Added lines #L91 - L94 were not covered by tests
case schemapb.DataType_VarChar:
if err := v.checkVarCharFieldData(field, fieldSchema); err != nil {
return err
Expand Down Expand Up @@ -246,6 +250,29 @@
return errNumRowsMismatch(field.GetFieldName(), n)
}

case schemapb.DataType_Int8Vector:
f, err := schema.GetFieldFromName(field.GetFieldName())
if err != nil {
return err
}

dim, err := typeutil.GetDim(f)
if err != nil {
return err
}

n, err := funcutil.GetNumRowsOfInt8VectorField(field.GetVectors().GetInt8Vector(), dim)
if err != nil {
return err
}
dataDim := field.GetVectors().Dim
if dataDim != dim {
return errDimMismatch(field.GetFieldName(), dataDim, dim)
}

if n != numRows {
return errNumRowsMismatch(field.GetFieldName(), n)
}
default:
// error won't happen here.
n, err := funcutil.GetNumRowOfFieldDataWithSchema(field, schema)
Expand Down Expand Up @@ -609,6 +636,15 @@
return typeutil.ValidateSparseFloatRows(sparseRows...)
}

func (v *validateUtil) checkInt8VectorFieldData(field *schemapb.FieldData, fieldSchema *schemapb.FieldSchema) error {
int8VecArray := field.GetVectors().GetInt8Vector()
if int8VecArray == nil {
msg := fmt.Sprintf("int8 vector field '%v' is illegal, nil Vector_Int8 type", field.GetFieldName())
return merr.WrapErrParameterInvalid("need vector_int8 array", "got nil", msg)
}
return nil

Check warning on line 645 in internal/proxy/validate_util.go

View check run for this annotation

Codecov / codecov/patch

internal/proxy/validate_util.go#L639-L645

Added lines #L639 - L645 were not covered by tests
}

func (v *validateUtil) checkVarCharFieldData(field *schemapb.FieldData, fieldSchema *schemapb.FieldSchema) error {
strArr := field.GetScalars().GetStringData().GetData()
if strArr == nil && fieldSchema.GetDefaultValue() == nil && !fieldSchema.GetNullable() {
Expand Down
Loading
Loading