diff --git a/internal/core/src/common/protobuf_utils.cpp b/internal/core/src/common/protobuf_utils.cpp index beb62e9322044..14972b0964614 100644 --- a/internal/core/src/common/protobuf_utils.cpp +++ b/internal/core/src/common/protobuf_utils.cpp @@ -1,10 +1,23 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + #include "common/protobuf_utils.h" #include "common/protobuf_utils_c.h" +// Make a static_assert to ensure that the size and alignment of the C++ and C static_assert( sizeof(milvus::ProtoLayout) == sizeof(ProtoLayout), "Size of milvus::ProtoLayout is not equal to size of ProtoLayoutInterface"); +// Make a static_assert to ensure that the size and alignment of the C++ and C static_assert(alignof(milvus::ProtoLayout) == alignof(ProtoLayout), "Alignment of milvus::ProtoLayout is not equal to alignment of " "ProtoLayoutInterface"); @@ -19,3 +32,14 @@ void ReleaseProtoLayout(ProtoLayoutInterface proto) { delete reinterpret_cast(proto); } + +namespace milvus { +ProtoLayout::ProtoLayout() : blob_(nullptr), size_(0) { +} + +ProtoLayout::~ProtoLayout() { + if (blob_ != nullptr) { + delete[] static_cast(blob_); + } +} +} // namespace milvus diff --git a/internal/core/src/common/protobuf_utils.h b/internal/core/src/common/protobuf_utils.h index c3ac410fe7b1d..924a7b90c73a4 100644 --- a/internal/core/src/common/protobuf_utils.h +++ b/internal/core/src/common/protobuf_utils.h @@ -42,33 +42,42 @@ RepeatedKeyValToMap( class ProtoLayout; using ProtoLayoutPtr = std::unique_ptr; +// ProtoLayout is a c++ type for esaier resource management at C-side. +// It's always keep same memory layout with ProtoLayout at C side for cgo call. class ProtoLayout { public: - static ProtoLayoutPtr - CreateEmpty() { - return std::make_unique(); - } + ProtoLayout(); - ProtoLayout() : blob(nullptr), size(0) { - } + ProtoLayout(const ProtoLayout&) = delete; - ~ProtoLayout() { - if (blob != nullptr) { - delete[] static_cast(blob); - blob = nullptr; - } - } + ProtoLayout(ProtoLayout&&) = delete; - void - Initialize(size_t initial_size) { - assert(size == 0); - auto new_blob = new uint8_t[initial_size]; - this->blob = new_blob; - this->size = initial_size; + ProtoLayout& + operator=(const ProtoLayout&) = delete; + + ProtoLayout& + operator=(ProtoLayout&&) = delete; + + ~ProtoLayout(); + + // Serialize the proto into bytes and hold it in the layout. + // Return false if failure. + template + bool + SerializeAndHoldProto(T& proto) { + if (blob_ != nullptr || size_ != 0) { + throw std::runtime_error( + "ProtoLayout should always be empty " + "before calling SerializeAndHoldProto"); + } + size_ = proto.ByteSizeLong(); + blob_ = new uint8_t[size_]; + return proto.SerializeToArray(blob_, size_); } - void* blob; - size_t size; + private: + void* blob_; + size_t size_; }; } //namespace milvus diff --git a/internal/core/src/common/protobuf_utils_c.h b/internal/core/src/common/protobuf_utils_c.h index 3e99491dd80ac..48af575281c8d 100644 --- a/internal/core/src/common/protobuf_utils_c.h +++ b/internal/core/src/common/protobuf_utils_c.h @@ -1,15 +1,33 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + #pragma once #ifdef __cplusplus extern "C" { #endif +// ProtoLayout is a common ffi type for cgo call with serialized protobuf message. +// It's always keep same memory layout with milvus::ProtoLayout at C++ side. typedef struct ProtoLayout { void* blob; size_t size; } ProtoLayout; -typedef ProtoLayout* ProtoLayoutInterface; +// ProtoLayoutInterface is the pointer alias for ProtoLayout. +// It should always created by CreateProtoLayout and released by ReleaseProtoLayout. +typedef struct ProtoLayout* ProtoLayoutInterface; +// CreateProtoLayout is used to create an empty ProtoLayout. +// When you want to create a ProtoLayout at go-side, and return some data from C-side. +// You should use this API. ProtoLayoutInterface CreateProtoLayout(); diff --git a/internal/core/src/index/BitmapIndex.cpp b/internal/core/src/index/BitmapIndex.cpp index c4649c69a93ed..87e597d108de9 100644 --- a/internal/core/src/index/BitmapIndex.cpp +++ b/internal/core/src/index/BitmapIndex.cpp @@ -298,8 +298,8 @@ BitmapIndex::Upload(const Config& config) { for (auto& file : remote_path_to_size) { index_files.emplace_back(file.first, file.second); } - return std::make_unique( - file_manager_->GetAddedTotalMemSize(), std::move(index_files)); + return CreateIndexResult::New(file_manager_->GetAddedTotalMemSize(), + std::move(index_files)); } template diff --git a/internal/core/src/index/CreateIndexResult.cpp b/internal/core/src/index/CreateIndexResult.cpp index cd09a92f3abf3..0615c4af5f4de 100644 --- a/internal/core/src/index/CreateIndexResult.cpp +++ b/internal/core/src/index/CreateIndexResult.cpp @@ -1,6 +1,26 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + #include "index/CreateIndexResult.h" namespace milvus::index { + +CreateIndexResultPtr +CreateIndexResult::New( + int64_t mem_size, + std::vector&& serialized_index_infos) { + return std::unique_ptr( + new CreateIndexResult(mem_size, std::move(serialized_index_infos))); +} + CreateIndexResult::CreateIndexResult( int64_t mem_size, std::vector&& serialized_index_infos) @@ -22,9 +42,8 @@ CreateIndexResult::SerializeAt(milvus::ProtoLayout* layout) { serialized_info->set_file_name(info.file_name); serialized_info->set_file_size(info.file_size); } - layout->Initialize(result.ByteSizeLong()); - AssertInfo(result.SerializeToArray(layout->blob, layout->size), - "unmarshal CreateIndexResult failed"); + AssertInfo(layout->SerializeAndHoldProto(result), + "marshal CreateIndexResult failed"); } std::vector diff --git a/internal/core/src/index/CreateIndexResult.h b/internal/core/src/index/CreateIndexResult.h index cf29f4f08ba81..c9ccee2cbfc17 100644 --- a/internal/core/src/index/CreateIndexResult.h +++ b/internal/core/src/index/CreateIndexResult.h @@ -1,3 +1,14 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + #pragma once #include @@ -17,15 +28,32 @@ class SerializedIndexFileInfo { int64_t file_size; }; +class CreateIndexResult; + +using CreateIndexResultPtr = std::unique_ptr; + class CreateIndexResult { public: - CreateIndexResult( - int64_t mem_size, + // Create a new CreateIndexResult instance. + static CreateIndexResultPtr + New(int64_t mem_size, std::vector&& serialized_index_infos); + CreateIndexResult(const CreateIndexResult&) = delete; + + CreateIndexResult(CreateIndexResult&&) = delete; + + CreateIndexResult& + operator=(const CreateIndexResult&) = delete; + + CreateIndexResult& + operator=(CreateIndexResult&&) = delete; + + // Append a new serialized index file info into the result. void AppendSerializedIndexFileInfo(SerializedIndexFileInfo&& info); + // Serialize the result into the target proto layout. void SerializeAt(milvus::ProtoLayout* layout); @@ -39,10 +67,11 @@ class CreateIndexResult { GetSerializedSize() const; private: + CreateIndexResult( + int64_t mem_size, + std::vector&& serialized_index_infos); + int64_t mem_size_; std::vector serialized_index_infos_; }; - -using CreateIndexResultPtr = std::unique_ptr; - } // namespace milvus::index diff --git a/internal/core/src/index/InvertedIndexTantivy.cpp b/internal/core/src/index/InvertedIndexTantivy.cpp index e63a970df62c2..22468d5a7af84 100644 --- a/internal/core/src/index/InvertedIndexTantivy.cpp +++ b/internal/core/src/index/InvertedIndexTantivy.cpp @@ -167,8 +167,8 @@ InvertedIndexTantivy::Upload(const Config& config) { for (auto& file : remote_mem_path_to_size) { index_files.emplace_back(file.first, file.second); } - return std::make_unique( - mem_file_manager_->GetAddedTotalMemSize(), std::move(index_files)); + return CreateIndexResult::New(mem_file_manager_->GetAddedTotalMemSize(), + std::move(index_files)); } template diff --git a/internal/core/src/index/ScalarIndexSort.cpp b/internal/core/src/index/ScalarIndexSort.cpp index 8d66e2c089328..da2a8001a6e39 100644 --- a/internal/core/src/index/ScalarIndexSort.cpp +++ b/internal/core/src/index/ScalarIndexSort.cpp @@ -162,8 +162,8 @@ ScalarIndexSort::Upload(const Config& config) { for (auto& file : remote_paths_to_size) { index_files.emplace_back(file.first, file.second); } - return std::make_unique( - file_manager_->GetAddedTotalMemSize(), std::move(index_files)); + return CreateIndexResult::New(file_manager_->GetAddedTotalMemSize(), + std::move(index_files)); } template diff --git a/internal/core/src/index/StringIndexMarisa.cpp b/internal/core/src/index/StringIndexMarisa.cpp index 9ba1509ed15e4..29972f54a894a 100644 --- a/internal/core/src/index/StringIndexMarisa.cpp +++ b/internal/core/src/index/StringIndexMarisa.cpp @@ -185,8 +185,8 @@ StringIndexMarisa::Upload(const Config& config) { for (auto& file : remote_paths_to_size) { index_files.emplace_back(file.first, file.second); } - return std::make_unique( - file_manager_->GetAddedTotalMemSize(), std::move(index_files)); + return CreateIndexResult::New(file_manager_->GetAddedTotalMemSize(), + std::move(index_files)); } void diff --git a/internal/core/src/index/TextMatchIndex.cpp b/internal/core/src/index/TextMatchIndex.cpp index fd0fa64654caf..c60d5a30a18a9 100644 --- a/internal/core/src/index/TextMatchIndex.cpp +++ b/internal/core/src/index/TextMatchIndex.cpp @@ -113,8 +113,8 @@ TextMatchIndex::Upload(const Config& config) { for (auto& file : remote_mem_path_to_size) { index_files.emplace_back(file.first, file.second); } - return std::make_unique( - mem_file_manager_->GetAddedTotalMemSize(), std::move(index_files)); + return CreateIndexResult::New(mem_file_manager_->GetAddedTotalMemSize(), + std::move(index_files)); } void diff --git a/internal/core/src/index/VectorDiskIndex.cpp b/internal/core/src/index/VectorDiskIndex.cpp index 4a2746d9e3215..0f18175a966f4 100644 --- a/internal/core/src/index/VectorDiskIndex.cpp +++ b/internal/core/src/index/VectorDiskIndex.cpp @@ -129,8 +129,8 @@ VectorDiskAnnIndex::Upload(const Config& config) { for (auto& file : remote_paths_to_size) { index_files.emplace_back(file.first, file.second); } - return std::make_unique( - file_manager_->GetAddedTotalFileSize(), std::move(index_files)); + return CreateIndexResult::New(file_manager_->GetAddedTotalFileSize(), + std::move(index_files)); } template diff --git a/internal/core/src/index/VectorMemIndex.cpp b/internal/core/src/index/VectorMemIndex.cpp index a25c0951663af..35f65468197bd 100644 --- a/internal/core/src/index/VectorMemIndex.cpp +++ b/internal/core/src/index/VectorMemIndex.cpp @@ -103,8 +103,8 @@ VectorMemIndex::Upload(const Config& config) { for (auto& file : remote_paths_to_size) { index_files.emplace_back(file.first, file.second); } - return std::make_unique( - file_manager_->GetAddedTotalMemSize(), std::move(index_files)); + return CreateIndexResult::New(file_manager_->GetAddedTotalMemSize(), + std::move(index_files)); } template diff --git a/internal/core/unittest/test_indexing.cpp b/internal/core/unittest/test_indexing.cpp index 122479dbebd76..66e38d2f708e4 100644 --- a/internal/core/unittest/test_indexing.cpp +++ b/internal/core/unittest/test_indexing.cpp @@ -548,7 +548,7 @@ TEST_P(IndexTest, Mmap) { milvus::index::IndexBasePtr new_index; milvus::index::VectorIndex* vec_index = nullptr; - auto create_index_result= index->Upload(); + auto create_index_result = index->Upload(); index.reset(); new_index = milvus::index::IndexFactory::GetInstance().CreateIndex( @@ -704,7 +704,7 @@ TEST_P(IndexTest, GetVector_EmptySparseVector) { auto create_index_result = index->Upload(); index.reset(); - auto index_files = create_index_result->GetIndexFiles(); + auto index_files = create_index_result->GetIndexFiles(); new_index = milvus::index::IndexFactory::GetInstance().CreateIndex( create_index_info, file_manager_context); load_conf = generate_load_conf(index_type, metric_type, 0); @@ -962,4 +962,23 @@ TEST(Indexing, SearchDiskAnnWithBFloat16) { SearchResult result; EXPECT_NO_THROW(vec_index->Query(xq_dataset, search_info, nullptr, result)); } -#endif \ No newline at end of file +#endif + +TEST(Indexing, CreateIndexResult) { + using milvus::index::CreateIndexResult; + using milvus::index::SerializedIndexFileInfo; + auto result = + CreateIndexResult::New(16, + std::vector{ + SerializedIndexFileInfo{"file1", 100}, + SerializedIndexFileInfo{"file2", 200}}); + result->AppendSerializedIndexFileInfo( + SerializedIndexFileInfo{"file3", 300}); + auto files = result->GetIndexFiles(); + ASSERT_EQ(files.size(), 3); + ASSERT_EQ(files[0], "file1"); + ASSERT_EQ(files[1], "file2"); + ASSERT_EQ(files[2], "file3"); + ASSERT_EQ(result->GetMemSize(), 16); + ASSERT_EQ(result->GetSerializedSize(), 600); +} \ No newline at end of file diff --git a/internal/core/unittest/test_utils.cpp b/internal/core/unittest/test_utils.cpp index 7861dd9baab58..565c538917028 100644 --- a/internal/core/unittest/test_utils.cpp +++ b/internal/core/unittest/test_utils.cpp @@ -199,3 +199,9 @@ TEST(Util, dis_closer) { EXPECT_FALSE(milvus::query::dis_closer(0.1, 0.2, "IP")); EXPECT_FALSE(milvus::query::dis_closer(0.1, 0.1, "IP")); } + +TEST(Util, ProtoLayout) { + milvus::ProtoLayout layout; + milvus::proto::cgo::CreateIndexResult result; + EXPECT_TRUE(layout.SerializeAndHoldProto(result)); +} \ No newline at end of file diff --git a/internal/datacoord/index_meta.go b/internal/datacoord/index_meta.go index 21a7b63038657..95130d937c27d 100644 --- a/internal/datacoord/index_meta.go +++ b/internal/datacoord/index_meta.go @@ -45,6 +45,7 @@ import ( "github.com/milvus-io/milvus/pkg/util/funcutil" "github.com/milvus-io/milvus/pkg/util/indexparams" "github.com/milvus-io/milvus/pkg/util/metricsinfo" + "github.com/milvus-io/milvus/pkg/util/paramtable" "github.com/milvus-io/milvus/pkg/util/timerecord" "github.com/milvus-io/milvus/pkg/util/tsoutil" "github.com/milvus-io/milvus/pkg/util/typeutil" @@ -154,6 +155,9 @@ func (m *indexMeta) reloadFromKV() error { return err } for _, segIdx := range segmentIndexes { + if segIdx.IndexMemSize == 0 { + segIdx.IndexMemSize = segIdx.IndexSerializedSize * paramtable.Get().DataCoordCfg.IndexMemSizeEstimateMultiplier.GetAsUint64() + } m.updateSegmentIndex(segIdx) metrics.FlushedSegmentFileNum.WithLabelValues(metrics.IndexFileLabel).Observe(float64(len(segIdx.IndexFileKeys))) } diff --git a/internal/metastore/model/segment_index.go b/internal/metastore/model/segment_index.go index b31bd282456aa..03e70add45816 100644 --- a/internal/metastore/model/segment_index.go +++ b/internal/metastore/model/segment_index.go @@ -4,7 +4,6 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus/internal/proto/indexpb" "github.com/milvus-io/milvus/pkg/common" - "github.com/milvus-io/milvus/pkg/util/paramtable" ) type SegmentIndex struct { @@ -38,10 +37,6 @@ func UnmarshalSegmentIndexModel(segIndex *indexpb.SegmentIndex) *SegmentIndex { if segIndex == nil { return nil } - memSize := segIndex.MemSize - if memSize == 0 { - memSize = segIndex.SerializeSize * paramtable.Get().DataCoordCfg.IndexMemSizeEstimateMultiplier.GetAsUint64() - } return &SegmentIndex{ SegmentID: segIndex.SegmentID, CollectionID: segIndex.CollectionID, @@ -57,7 +52,7 @@ func UnmarshalSegmentIndexModel(segIndex *indexpb.SegmentIndex) *SegmentIndex { CreatedUTCTime: segIndex.CreateTime, IndexFileKeys: common.CloneStringList(segIndex.IndexFileKeys), IndexSerializedSize: segIndex.SerializeSize, - IndexMemSize: memSize, + IndexMemSize: segIndex.MemSize, WriteHandoff: segIndex.WriteHandoff, CurrentIndexVersion: segIndex.GetCurrentIndexVersion(), } diff --git a/internal/util/segcore/cgo_util_test.go b/internal/util/segcore/cgo_util_test.go index 3a517ede69d56..f5e2f8592b69e 100644 --- a/internal/util/segcore/cgo_util_test.go +++ b/internal/util/segcore/cgo_util_test.go @@ -2,9 +2,12 @@ package segcore import ( "context" + "runtime" "testing" + "github.com/milvus-io/milvus/internal/proto/cgopb" "github.com/stretchr/testify/assert" + "google.golang.org/protobuf/proto" ) func TestConsumeCStatusIntoError(t *testing.T) { @@ -17,3 +20,28 @@ func TestGetLocalUsedSize(t *testing.T) { assert.NoError(t, err) assert.NotNil(t, size) } + +func TestProtoLayout(t *testing.T) { + layout := CreateProtoLayout() + testProto := cgopb.CreateIndexResult{ + MemSize: 1024, + SerializedIndexInfos: []*cgopb.SerializedIndexFileInfo{ + { + FileName: "test", + FileSize: 768, + }, + }, + } + msg, err := proto.Marshal(&testProto) + defer runtime.KeepAlive(msg) + assert.NoError(t, err) + SetProtoLayout(layout, msg) + + resultProto := cgopb.CreateIndexResult{} + UnmarshalProtoLayout(layout, &resultProto) + + assert.True(t, proto.Equal(&testProto, &resultProto)) + layout.blob = nil + layout.size = 0 + ReleaseProtoLayout(layout) +} diff --git a/internal/util/segcore/cgo_util_test_only.go b/internal/util/segcore/cgo_util_test_only.go new file mode 100644 index 0000000000000..3af2982b08051 --- /dev/null +++ b/internal/util/segcore/cgo_util_test_only.go @@ -0,0 +1,28 @@ +package segcore + +/* +#cgo pkg-config: milvus_core + +#include "common/protobuf_utils_c.h" +*/ +import "C" + +import ( + "reflect" + "unsafe" +) + +func CreateProtoLayout() *C.ProtoLayout { + ptr := C.CreateProtoLayout() + layout := unsafe.Pointer(reflect.ValueOf(ptr).Pointer()) + return (*C.ProtoLayout)(layout) +} + +func SetProtoLayout(protoLayout *C.ProtoLayout, slice []byte) { + protoLayout.blob = unsafe.Pointer(&slice[0]) + protoLayout.size = C.size_t(len(slice)) +} + +func ReleaseProtoLayout(protoLayout *C.ProtoLayout) { + C.ReleaseProtoLayout((C.ProtoLayoutInterface)(unsafe.Pointer(reflect.ValueOf(protoLayout).Pointer()))) +}