Skip to content

Commit

Permalink
enhance: Accelerate find_first by utilizing bitset simd methods (#3…
Browse files Browse the repository at this point in the history
…9004)

Related to #39003

Signed-off-by: Congqi Xia <[email protected]>
  • Loading branch information
congqixia authored Jan 7, 2025
1 parent ee9a279 commit 72f5b85
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 14 deletions.
18 changes: 11 additions & 7 deletions internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1255,20 +1255,24 @@ ChunkedSegmentSealedImpl::find_first(int64_t limit,
std::vector<int64_t> seg_offsets;
seg_offsets.reserve(limit);

// flip bitset since `find_next` is used to find true.
auto flipped = bitset.clone();
flipped.flip();

int64_t offset = 0;
for (; hit_num < limit && offset < num_rows_.value(); offset++) {
std::optional<size_t> result = flipped.find_first();
while (result.has_value() && hit_num < limit) {
hit_num++;
seg_offsets.push_back(result.value());
offset = result.value();
if (offset >= size) {
// In fact, this case won't happen on sealed segments.
continue;
}

if (!bitset[offset]) {
seg_offsets.push_back(offset);
hit_num++;
}
result = flipped.find_next(offset);
}

return {seg_offsets, more_hit_than_limit && offset != num_rows_.value()};
return {seg_offsets, more_hit_than_limit && result.has_value()};
}

ChunkedSegmentSealedImpl::ChunkedSegmentSealedImpl(
Expand Down
19 changes: 12 additions & 7 deletions internal/core/src/segcore/SegmentSealedImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1714,20 +1714,25 @@ SegmentSealedImpl::find_first(int64_t limit, const BitsetType& bitset) const {
std::vector<int64_t> seg_offsets;
seg_offsets.reserve(limit);

// flip bitset since `find_first` & `find_next` is used to find true.
// could be optimized by support find false in bitset.
auto flipped = bitset.clone();
flipped.flip();

int64_t offset = 0;
for (; hit_num < limit && offset < num_rows_.value(); offset++) {
std::optional<size_t> result = flipped.find_first();
while (result.has_value() && hit_num < limit) {
hit_num++;
seg_offsets.push_back(result.value());
offset = result.value();
if (offset >= size) {
// In fact, this case won't happen on sealed segments.
continue;
}

if (!bitset[offset]) {
seg_offsets.push_back(offset);
hit_num++;
}
result = flipped.find_next(offset);
}

return {seg_offsets, more_hit_than_limit && offset != num_rows_.value()};
return {seg_offsets, more_hit_than_limit && result.has_value()};
}

SegcoreError
Expand Down

0 comments on commit 72f5b85

Please sign in to comment.