Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Reduce lock cost in AsciiInfo updates #12363

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 51 additions & 20 deletions velox/vector/SimpleVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,14 @@ struct AsciiInfo {
isAllAscii_ = f;
}

bool asciiComputedRowsEmpty() const {
return asciiComputedRowsEmpty_;
}

void setAsciiComputedRowsEmpty(bool value) {
asciiComputedRowsEmpty_ = value;
}

/// Returns locked for read bit vector with bits set for rows where ascii was
/// processed.
auto readLockedAsciiComputedRows() const {
Expand All @@ -69,12 +77,6 @@ struct AsciiInfo {
return asciiComputedRows_.wlock();
}

/// Returns upgradable locked bit vector with bits set for rows where ascii
/// was processed.
auto upgradableLockedAsciiComputedRows() {
return asciiComputedRows_.ulock();
}

private:
// isAllAscii_ and asciiComputedRows_ are thread-safe because input vectors
// can be shared across threads, hence this make their mutation thread safe.
Expand All @@ -84,6 +86,8 @@ struct AsciiInfo {
// True is all strings in asciiComputedRows_ are ASCII.
std::atomic_bool isAllAscii_{false};

std::atomic_bool asciiComputedRowsEmpty_{true};

// If T is StringView, store set of rows where we have computed asciiness.
// A set bit means the row was processed.
folly::Synchronized<SelectivityVector> asciiComputedRows_;
Expand Down Expand Up @@ -341,14 +345,21 @@ class SimpleVector : public BaseVector {
}

wlockedAsciiComputedRows->select(rows);
asciiInfo.setAsciiComputedRowsEmpty(
!wlockedAsciiComputedRows->hasSelections());
return asciiInfo.isAllAscii();
}

/// Clears asciiness state.
template <typename U = T>
typename std::enable_if_t<std::is_same_v<U, StringView>, void>
invalidateIsAscii() {
asciiInfo.writeLockedAsciiComputedRows()->clearAll();
if (asciiInfo.asciiComputedRowsEmpty()) {
return;
}
auto wlock = asciiInfo.writeLockedAsciiComputedRows();
wlock->clearAll();
asciiInfo.setAsciiComputedRowsEmpty(true);
asciiInfo.setIsAllAscii(false);
}

Expand All @@ -367,14 +378,18 @@ class SimpleVector : public BaseVector {
}

wlockedAsciiComputedRows->select(rows);
asciiInfo.setAsciiComputedRowsEmpty(
!wlockedAsciiComputedRows->hasSelections());
}

template <typename U = T>
typename std::enable_if_t<std::is_same_v<U, StringView>, void> setAllIsAscii(
bool ascii) {
ensureIsAsciiCapacity();
auto wlock = asciiInfo.writeLockedAsciiComputedRows();
wlock->setAll();
asciiInfo.setIsAllAscii(ascii);
asciiInfo.writeLockedAsciiComputedRows()->setAll();
asciiInfo.setAsciiComputedRowsEmpty(!wlock->hasSelections());
}

template <typename U = T>
Expand Down Expand Up @@ -438,26 +453,40 @@ class SimpleVector : public BaseVector {
template <typename U = T>
typename std::enable_if_t<std::is_same_v<U, StringView>, void>
ensureIsAsciiCapacity() {
auto ulockedAsciiComputedRows{
asciiInfo.upgradableLockedAsciiComputedRows()};
if (ulockedAsciiComputedRows->size() < length_) {
ulockedAsciiComputedRows.moveFromUpgradeToWrite()->resize(length_, false);
{
auto rlock = asciiInfo.readLockedAsciiComputedRows();
if (rlock->size() >= length_) {
return;
}
}
auto wlock = asciiInfo.writeLockedAsciiComputedRows();
if (wlock->size() >= length_) {
return;
}
wlock->resize(length_, false);
asciiInfo.setAsciiComputedRowsEmpty(!wlock->hasSelections());
}

/// Ensure asciiInfo is of the correct size. But only if it is not empty.
template <typename U = T>
typename std::enable_if_t<std::is_same_v<U, StringView>, void>
resizeIsAsciiIfNotEmpty(vector_size_t size, bool newAscii) {
auto ulockedAsciiComputedRows{
asciiInfo.upgradableLockedAsciiComputedRows()};
if (ulockedAsciiComputedRows->hasSelections()) {
if (ulockedAsciiComputedRows->size() < size) {
ulockedAsciiComputedRows.moveFromUpgradeToWrite()->resize(
size, newAscii);
asciiInfo.setIsAllAscii(asciiInfo.isAllAscii() & newAscii);
if (asciiInfo.asciiComputedRowsEmpty()) {
return;
}
{
auto rlock = asciiInfo.readLockedAsciiComputedRows();
if (!rlock->hasSelections() || rlock->size() >= size) {
return;
}
}
auto wlock = asciiInfo.writeLockedAsciiComputedRows();
if (!wlock->hasSelections() || wlock->size() >= size) {
return;
}
wlock->resize(size, newAscii);
asciiInfo.setIsAllAscii(asciiInfo.isAllAscii() & newAscii);
asciiInfo.setAsciiComputedRowsEmpty(!wlock->hasSelections());
}

/**
Expand Down Expand Up @@ -493,7 +522,9 @@ class SimpleVector : public BaseVector {

if constexpr (std::is_same_v<T, StringView>) {
if (rows) {
asciiInfo.writeLockedAsciiComputedRows()->deselect(*rows);
auto wlock = asciiInfo.writeLockedAsciiComputedRows();
wlock->deselect(*rows);
asciiInfo.setAsciiComputedRowsEmpty(!wlock->hasSelections());
} else {
invalidateIsAscii();
}
Expand Down
Loading