Skip to content

Commit

Permalink
feat: Support Int8Vector in go
Browse files Browse the repository at this point in the history
Signed-off-by: Cai Yudong <[email protected]>
  • Loading branch information
cydrain committed Jan 7, 2025
1 parent 84f8047 commit 84c8e46
Show file tree
Hide file tree
Showing 56 changed files with 1,583 additions and 183 deletions.
4 changes: 4 additions & 0 deletions client/entity/field.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ func (t FieldType) PbFieldType() (string, string) {
return "[]byte", ""
case FieldTypeBFloat16Vector:
return "[]byte", ""
case FieldTypeInt8Vector:
return "[]int8", ""

Check warning on line 140 in client/entity/field.go

View check run for this annotation

Codecov / codecov/patch

client/entity/field.go#L139-L140

Added lines #L139 - L140 were not covered by tests
default:
return "undefined", ""
}
Expand Down Expand Up @@ -177,6 +179,8 @@ const (
FieldTypeBFloat16Vector FieldType = 103
// FieldTypeBinaryVector field type sparse vector
FieldTypeSparseVector FieldType = 104
// FieldTypeInt8Vector field type int8 vector
FieldTypeInt8Vector FieldType = 105
)

// Field represent field schema in milvus
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ require (
github.com/grpc-ecosystem/go-grpc-middleware v1.3.0
github.com/klauspost/compress v1.17.9
github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d
github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20241211060635-410431d7865b
github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20250102080446-c3ba3d26a90f
github.com/minio/minio-go/v7 v7.0.73
github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81
github.com/prometheus/client_golang v1.14.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -630,8 +630,8 @@ github.com/milvus-io/cgosymbolizer v0.0.0-20240722103217-b7dee0e50119 h1:9VXijWu
github.com/milvus-io/cgosymbolizer v0.0.0-20240722103217-b7dee0e50119/go.mod h1:DvXTE/K/RtHehxU8/GtDs4vFtfw64jJ3PaCnFri8CRg=
github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b h1:TfeY0NxYxZzUfIfYe5qYDBzt4ZYRqzUjTR6CvUzjat8=
github.com/milvus-io/gorocksdb v0.0.0-20220624081344-8c5f4212846b/go.mod h1:iwW+9cWfIzzDseEBCCeDSN5SD16Tidvy8cwQ7ZY8Qj4=
github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20241211060635-410431d7865b h1:iPPhnFx+s7FF53UeWj7A4EYhPRMFPL6mHqyQw7qRjeQ=
github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20241211060635-410431d7865b/go.mod h1:/6UT4zZl6awVeXLeE7UGDWZvXj3IWkRsh3mqsn0DiAs=
github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20250102080446-c3ba3d26a90f h1:So6RKU5wqP/8EaKogicJP8gZ2SrzzS/JprusBaE3RKc=
github.com/milvus-io/milvus-proto/go-api/v2 v2.5.0-beta.0.20250102080446-c3ba3d26a90f/go.mod h1:/6UT4zZl6awVeXLeE7UGDWZvXj3IWkRsh3mqsn0DiAs=
github.com/milvus-io/pulsar-client-go v0.12.1 h1:O2JZp1tsYiO7C0MQ4hrUY/aJXnn2Gry6hpm7UodghmE=
github.com/milvus-io/pulsar-client-go v0.12.1/go.mod h1:dkutuH4oS2pXiGm+Ti7fQZ4MRjrMPZ8IJeEGAWMeckk=
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs=
Expand Down
21 changes: 19 additions & 2 deletions internal/core/src/common/Types.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ using distance_t = float;
using float16 = knowhere::fp16;
using bfloat16 = knowhere::bf16;
using bin1 = knowhere::bin1;
using int8 = knowhere::int8;

// See also: https://github.com/milvus-io/milvus-proto/blob/master/proto/schema.proto
enum class DataType {
Expand Down Expand Up @@ -85,6 +86,7 @@ enum class DataType {
VECTOR_FLOAT16 = 102,
VECTOR_BFLOAT16 = 103,
VECTOR_SPARSE_FLOAT = 104,
VECTOR_INT8 = 105,
};

using Timestamp = uint64_t; // TODO: use TiKV-like timestamp
Expand Down Expand Up @@ -322,6 +324,11 @@ IsSparseFloatVectorDataType(DataType data_type) {
return data_type == DataType::VECTOR_SPARSE_FLOAT;
}

inline bool
IsInt8VectorDataType(DataType data_type) {
return data_type == DataType::VECTOR_INT8;
}

inline bool
IsFloatVectorDataType(DataType data_type) {
return IsDenseFloatVectorDataType(data_type) ||
Expand All @@ -331,7 +338,7 @@ IsFloatVectorDataType(DataType data_type) {
inline bool
IsVectorDataType(DataType data_type) {
return IsBinaryVectorDataType(data_type) ||
IsFloatVectorDataType(data_type);
IsFloatVectorDataType(data_type) || IsInt8VectorDataType(data_type);
}

inline bool
Expand Down Expand Up @@ -418,7 +425,17 @@ IsFloatVectorMetricType(const MetricType& metric_type) {

inline bool
IsBinaryVectorMetricType(const MetricType& metric_type) {
return !IsFloatVectorMetricType(metric_type);
return metric_type == knowhere::metric::HAMMING ||
metric_type == knowhere::metric::JACCARD ||
metric_type == knowhere::metric::SUPERSTRUCTURE ||
metric_type == knowhere::metric::SUBSTRUCTURE;
}

inline bool
IsIntVectorMetricType(const MetricType& metric_type) {
return metric_type == knowhere::metric::L2 ||
metric_type == knowhere::metric::IP ||
metric_type == knowhere::metric::COSINE;

Check warning on line 438 in internal/core/src/common/Types.h

View check run for this annotation

Codecov / codecov/patch

internal/core/src/common/Types.h#L435-L438

Added lines #L435 - L438 were not covered by tests
}

// Plus 1 because we can't use greater(>) symbol
Expand Down
4 changes: 4 additions & 0 deletions internal/core/src/index/IndexFactory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,10 @@ IndexFactory::CreateVectorIndex(
return std::make_unique<VectorMemIndex<bfloat16>>(
index_type, metric_type, version, file_manager_context);
}
case DataType::VECTOR_INT8: {
return std::make_unique<VectorMemIndex<int8>>(
index_type, metric_type, version, file_manager_context);

Check warning on line 448 in internal/core/src/index/IndexFactory.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/index/IndexFactory.cpp#L446-L448

Added lines #L446 - L448 were not covered by tests
}
default:
PanicInfo(
DataTypeInvalid,
Expand Down
1 change: 1 addition & 0 deletions internal/core/src/index/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ BIN_List() {
return ret;
}

// TODO caiyd: should list supported list
std::vector<std::tuple<IndexType, MetricType>>
unsupported_index_combinations() {
static std::vector<std::tuple<IndexType, MetricType>> ret{
Expand Down
10 changes: 6 additions & 4 deletions internal/core/src/index/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,13 @@ CheckMetricTypeSupport(const MetricType& metric_type) {
if constexpr (std::is_same_v<T, bin1>) {
AssertInfo(
IsBinaryVectorMetricType(metric_type),
"binary vector does not float vector metric type: " + metric_type);
"binary vector does not support metric type: " + metric_type);
} else if constexpr (std::is_same_v<T, int8>) {
AssertInfo(IsIntVectorMetricType(metric_type),

Check warning on line 110 in internal/core/src/index/Utils.h

View check run for this annotation

Codecov / codecov/patch

internal/core/src/index/Utils.h#L110

Added line #L110 was not covered by tests
"int vector does not support metric type: " + metric_type);
} else {
AssertInfo(
IsFloatVectorMetricType(metric_type),
"float vector does not binary vector metric type: " + metric_type);
AssertInfo(IsFloatVectorMetricType(metric_type),
"float vector does not support metric type: " + metric_type);
}
}

Expand Down
1 change: 1 addition & 0 deletions internal/core/src/index/VectorMemIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -636,5 +636,6 @@ template class VectorMemIndex<float>;
template class VectorMemIndex<bin1>;
template class VectorMemIndex<float16>;
template class VectorMemIndex<bfloat16>;
template class VectorMemIndex<int8>;

} // namespace milvus::index
1 change: 1 addition & 0 deletions internal/core/src/indexbuilder/IndexFactory.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ class IndexFactory {
case DataType::VECTOR_BFLOAT16:
case DataType::VECTOR_BINARY:
case DataType::VECTOR_SPARSE_FLOAT:
case DataType::VECTOR_INT8:
return std::make_unique<VecIndexCreator>(type, config, context);
default:
PanicInfo(DataTypeInvalid,
Expand Down
23 changes: 23 additions & 0 deletions internal/core/src/indexbuilder/index_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,29 @@ BuildSparseFloatVecIndex(CIndex index,
return status;
}

CStatus
BuildInt8VecIndex(CIndex index, int64_t int8_value_num, const int8_t* vectors) {
auto status = CStatus();
try {
AssertInfo(index,

Check warning on line 457 in internal/core/src/indexbuilder/index_c.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/indexbuilder/index_c.cpp#L454-L457

Added lines #L454 - L457 were not covered by tests
"failed to build int8 vector index, passed index was null");
auto real_index =

Check warning on line 459 in internal/core/src/indexbuilder/index_c.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/indexbuilder/index_c.cpp#L459

Added line #L459 was not covered by tests
reinterpret_cast<milvus::indexbuilder::IndexCreatorBase*>(index);
auto cIndex =
dynamic_cast<milvus::indexbuilder::VecIndexCreator*>(real_index);
auto dim = cIndex->dim();
auto row_nums = int8_value_num / dim;
auto ds = knowhere::GenDataSet(row_nums, dim, vectors);
cIndex->Build(ds);
status.error_code = Success;
status.error_msg = "";
} catch (std::exception& e) {
status.error_code = UnexpectedError;
status.error_msg = strdup(e.what());
}
return status;

Check warning on line 473 in internal/core/src/indexbuilder/index_c.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/indexbuilder/index_c.cpp#L461-L473

Added lines #L461 - L473 were not covered by tests
}

// field_data:
// 1, serialized proto::schema::BoolArray, if type is bool;
// 2, serialized proto::schema::StringArray, if type is string;
Expand Down
3 changes: 3 additions & 0 deletions internal/core/src/indexbuilder/index_c.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ BuildSparseFloatVecIndex(CIndex index,
int64_t dim,
const uint8_t* vectors);

CStatus
BuildInt8VecIndex(CIndex index, int64_t data_size, const int8_t* vectors);

// field_data:
// 1, serialized proto::schema::BoolArray, if type is bool;
// 2, serialized proto::schema::StringArray, if type is string;
Expand Down
6 changes: 6 additions & 0 deletions internal/core/src/segcore/vector_index_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,12 @@ ValidateIndexParams(const char* index_type,
knowhere::Version::GetCurrentVersion().VersionNumber(),
json,
error_msg);
} else if (dataType == milvus::DataType::VECTOR_INT8) {
status = knowhere::IndexStaticFaced<knowhere::int8>::ConfigCheck(

Check warning on line 77 in internal/core/src/segcore/vector_index_c.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/segcore/vector_index_c.cpp#L77

Added line #L77 was not covered by tests
index_type,
knowhere::Version::GetCurrentVersion().VersionNumber(),

Check warning on line 79 in internal/core/src/segcore/vector_index_c.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/segcore/vector_index_c.cpp#L79

Added line #L79 was not covered by tests
json,
error_msg);
} else {
status = knowhere::Status::invalid_args;
}
Expand Down
24 changes: 16 additions & 8 deletions internal/distributed/proxy/httpserver/handler_v1_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1270,7 +1270,8 @@ func TestFp16Bf16VectorsV1(t *testing.T) {
"binaryVector": "AQ==",
"float16Vector": [3.0],
"bfloat16Vector": [4.4, 442],
"sparseFloatVector": {"1": 0.1, "2": 0.44}
"sparseFloatVector": {"1": 0.1, "2": 0.44},
"int8Vector": [1, 2]
}
]
}`),
Expand All @@ -1289,7 +1290,8 @@ func TestFp16Bf16VectorsV1(t *testing.T) {
"binaryVector": "AQ==",
"float16Vector": [3, 3.0],
"bfloat16Vector": [4.4, 442],
"sparseFloatVector": {"1": 0.1, "2": 0.44}
"sparseFloatVector": {"1": 0.1, "2": 0.44},
"int8Vector": [1, 2]
}
]
}`),
Expand All @@ -1308,7 +1310,8 @@ func TestFp16Bf16VectorsV1(t *testing.T) {
"binaryVector": "AQ==",
"float16Vector": [3, 3],
"bfloat16Vector": [4.4, 442],
"sparseFloatVector": {"1": 0.1, "2": 0.44}
"sparseFloatVector": {"1": 0.1, "2": 0.44},
"int8Vector": [1, 2]
}
]
}`),
Expand All @@ -1326,7 +1329,8 @@ func TestFp16Bf16VectorsV1(t *testing.T) {
"binaryVector": "AQ==",
"float16Vector": "AQIDBA==",
"bfloat16Vector": "AQIDBA==",
"sparseFloatVector": {"1": 0.1, "2": 0.44}
"sparseFloatVector": {"1": 0.1, "2": 0.44},
"int8Vector": [1, 2]
}
]
}`),
Expand All @@ -1344,7 +1348,8 @@ func TestFp16Bf16VectorsV1(t *testing.T) {
"binaryVector": "AQ==",
"float16Vector": [3, 3.0, 3],
"bfloat16Vector": [4.4, 44],
"sparseFloatVector": {"1": 0.1, "2": 0.44}
"sparseFloatVector": {"1": 0.1, "2": 0.44},
"int8Vector": [1, 2]
}
]
}`),
Expand All @@ -1363,7 +1368,8 @@ func TestFp16Bf16VectorsV1(t *testing.T) {
"binaryVector": "AQ==",
"float16Vector": [3, 3.0],
"bfloat16Vector": [4.4, 442, 44],
"sparseFloatVector": {"1": 0.1, "2": 0.44}
"sparseFloatVector": {"1": 0.1, "2": 0.44},
"int8Vector": [1, 2]
}
]
}`),
Expand All @@ -1382,7 +1388,8 @@ func TestFp16Bf16VectorsV1(t *testing.T) {
"binaryVector": "AQ==",
"float16Vector": "AQIDBA==",
"bfloat16Vector": [4.4, 442],
"sparseFloatVector": {"1": 0.1, "2": 0.44}
"sparseFloatVector": {"1": 0.1, "2": 0.44},
"int8Vector": [1, 2]
},
{
"book_id": 1,
Expand All @@ -1391,7 +1398,8 @@ func TestFp16Bf16VectorsV1(t *testing.T) {
"binaryVector": "AQ==",
"float16Vector": [3.1, 3.1],
"bfloat16Vector": "AQIDBA==",
"sparseFloatVector": {"3": 1.1, "2": 0.44}
"sparseFloatVector": {"3": 1.1, "2": 0.44},
"int8Vector": [1, 2]
}
]
}`),
Expand Down
27 changes: 19 additions & 8 deletions internal/distributed/proxy/httpserver/handler_v2_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2000,10 +2000,13 @@ func generateCollectionSchemaWithVectorFields() *schemapb.CollectionSchema {
bfloat16VectorField.Name = "bfloat16Vector"
sparseFloatVectorField := generateVectorFieldSchema(schemapb.DataType_SparseFloatVector)
sparseFloatVectorField.Name = "sparseFloatVector"
int8VectorField := generateVectorFieldSchema(schemapb.DataType_Int8Vector)
int8VectorField.Name = "int8Vector"
collSchema.Fields = append(collSchema.Fields, binaryVectorField)
collSchema.Fields = append(collSchema.Fields, float16VectorField)
collSchema.Fields = append(collSchema.Fields, bfloat16VectorField)
collSchema.Fields = append(collSchema.Fields, sparseFloatVectorField)
collSchema.Fields = append(collSchema.Fields, int8VectorField)
return collSchema
}

Expand Down Expand Up @@ -2031,7 +2034,8 @@ func TestFp16Bf16VectorsV2(t *testing.T) {
"binaryVector": "AQ==",
"float16Vector": [3.0],
"bfloat16Vector": [4.4, 442],
"sparseFloatVector": {"1": 0.1, "2": 0.44}
"sparseFloatVector": {"1": 0.1, "2": 0.44},
"int8Vector": [1, 2]
}
]
}`),
Expand All @@ -2050,7 +2054,8 @@ func TestFp16Bf16VectorsV2(t *testing.T) {
"binaryVector": "AQ==",
"float16Vector": [3, 3.0],
"bfloat16Vector": [4.4, 442],
"sparseFloatVector": {"1": 0.1, "2": 0.44}
"sparseFloatVector": {"1": 0.1, "2": 0.44},
"int8Vector": [1, 2]
}
]
}`),
Expand All @@ -2068,7 +2073,8 @@ func TestFp16Bf16VectorsV2(t *testing.T) {
"binaryVector": "AQ==",
"float16Vector": [3, 3],
"bfloat16Vector": [4.4, 442],
"sparseFloatVector": {"1": 0.1, "2": 0.44}
"sparseFloatVector": {"1": 0.1, "2": 0.44},
"int8Vector": [1, 2]
}
]
}`),
Expand All @@ -2085,7 +2091,8 @@ func TestFp16Bf16VectorsV2(t *testing.T) {
"binaryVector": "AQ==",
"float16Vector": "AQIDBA==",
"bfloat16Vector": "AQIDBA==",
"sparseFloatVector": {"1": 0.1, "2": 0.44}
"sparseFloatVector": {"1": 0.1, "2": 0.44},
"int8Vector": [1, 2]
}
]
}`),
Expand All @@ -2102,7 +2109,8 @@ func TestFp16Bf16VectorsV2(t *testing.T) {
"binaryVector": "AQ==",
"float16Vector": [3, 3.0, 3],
"bfloat16Vector": [4.4, 44],
"sparseFloatVector": {"1": 0.1, "2": 0.44}
"sparseFloatVector": {"1": 0.1, "2": 0.44},
"int8Vector": [1, 2]
}
]
}`),
Expand All @@ -2121,7 +2129,8 @@ func TestFp16Bf16VectorsV2(t *testing.T) {
"binaryVector": "AQ==",
"float16Vector": [3, 3.0],
"bfloat16Vector": [4.4, 442, 44],
"sparseFloatVector": {"1": 0.1, "2": 0.44}
"sparseFloatVector": {"1": 0.1, "2": 0.44},
"int8Vector": [1, 2]
}
]
}`),
Expand All @@ -2140,7 +2149,8 @@ func TestFp16Bf16VectorsV2(t *testing.T) {
"binaryVector": "AQ==",
"float16Vector": "AQIDBA==",
"bfloat16Vector": [4.4, 442],
"sparseFloatVector": {"1": 0.1, "2": 0.44}
"sparseFloatVector": {"1": 0.1, "2": 0.44},
"int8Vector": [1, 2]
},
{
"book_id": 1,
Expand All @@ -2149,7 +2159,8 @@ func TestFp16Bf16VectorsV2(t *testing.T) {
"binaryVector": "AQ==",
"float16Vector": [3.1, 3.1],
"bfloat16Vector": "AQIDBA==",
"sparseFloatVector": {"3": 1.1, "2": 0.44}
"sparseFloatVector": {"3": 1.1, "2": 0.44},
"int8Vector": [1, 2]
}
]
}`),
Expand Down
Loading

0 comments on commit 84c8e46

Please sign in to comment.