Skip to content

Commit

Permalink
misc(fuzzer): Add stats about verification against reference DB in ex…
Browse files Browse the repository at this point in the history
…pression fuzzer (#12383)

Summary:

This diffs add stats reported by the expression fuzzer about how many test cases are verified against Presto and how many fail in both Velox and the reference DB.

Differential Revision: D69819014
  • Loading branch information
kagamiori authored and facebook-github-bot committed Feb 20, 2025
1 parent 82e00de commit 848d478
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 28 deletions.
40 changes: 31 additions & 9 deletions velox/expression/fuzzer/ExpressionFuzzerVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,11 +293,13 @@ void ExpressionFuzzerVerifier::retryWithTry(
}

std::vector<ResultOrError> tryResults;
std::vector<test::ExpressionVerifier::VerificationState>
tryVerificationStates;

// The function throws if anything goes wrong except
// UNSUPPORTED_INPUT_UNCATCHABLE errors.
try {
tryResults = verifier_.verify(
std::tie(tryResults, tryVerificationStates) = verifier_.verify(
tryPlans,
inputsToRetry,
resultVector ? BaseVector::copy(*resultVector) : nullptr,
Expand Down Expand Up @@ -371,7 +373,10 @@ void ExpressionFuzzerVerifier::go() {

auto startTime = std::chrono::system_clock::now();
size_t i = 0;
size_t totalTestCases = 0;
size_t numFailed = 0;
size_t numVerified = 0;
size_t numReferenceUnsupported = 0;

// TODO: some expression will throw exception for NaN input, eg: IN
// predicate for floating point. remove this constraint once that are fixed
Expand Down Expand Up @@ -402,12 +407,14 @@ void ExpressionFuzzerVerifier::go() {

auto [inputTestCases, inputRowMetadata] =
generateInput(inputType, *vectorFuzzer_, inputGenerators);
totalTestCases += inputTestCases.size();

auto resultVectors = generateResultVectors(plans);
std::vector<fuzzer::ResultOrError> results;
std::vector<test::ExpressionVerifier::VerificationState> verificationStates;

try {
results = verifier_.verify(
std::tie(results, verificationStates) = verifier_.verify(
plans,
inputTestCases,
resultVectors ? BaseVector::copy(*resultVectors) : nullptr,
Expand All @@ -430,12 +437,10 @@ void ExpressionFuzzerVerifier::go() {
// cannot throw. Expressions that throw UNSUPPORTED_INPUT_UNCATCHABLE
// errors are not supported.
std::vector<fuzzer::InputTestCase> inputsToRetry;
bool anyInputsThrew = false;
bool anyInputsThrewButRetryable = false;
for (int j = 0; j < results.size(); j++) {
auto& result = results[j];
if (result.exceptionPtr) {
anyInputsThrew = true;
if (!result.unsupportedInputUncatchableError && options_.retryWithTry) {
anyInputsThrewButRetryable = true;
inputsToRetry.push_back(inputTestCases[j]);
Expand All @@ -446,9 +451,21 @@ void ExpressionFuzzerVerifier::go() {
// executed.
inputsToRetry.push_back(inputTestCases[j]);
}
}
if (anyInputsThrew) {
++numFailed;

auto& verificationState = verificationStates[j];
switch (verificationState) {
case test::ExpressionVerifier::VerificationState::
kVerifiedAgainstReference:
++numVerified;
break;
case test::ExpressionVerifier::VerificationState::
kReferencePathUnsupported:
++numReferenceUnsupported;
break;
case test::ExpressionVerifier::VerificationState::kBothPathsThrow:
++numFailed;
break;
}
}
if (anyInputsThrewButRetryable) {
LOG(INFO)
Expand All @@ -462,8 +479,13 @@ void ExpressionFuzzerVerifier::go() {
}
logStats();

LOG(ERROR) << "Total iterations: " << i;
LOG(ERROR) << "Total failed: " << numFailed;
LOG(ERROR) << "Total test cases: " << totalTestCases;
LOG(ERROR) << "Total test cases verified in the reference DB: "
<< numVerified;
LOG(ERROR) << "Total test cases failed in both Velox and the reference DB: "
<< numFailed;
LOG(ERROR) << "Total test cases unsupported in the reference DB: "
<< numReferenceUnsupported;
}

} // namespace facebook::velox::fuzzer
49 changes: 33 additions & 16 deletions velox/expression/tests/ExpressionVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,10 @@ exec::ExprSet createExprSetCommon(

} // namespace

std::vector<fuzzer::ResultOrError> ExpressionVerifier::verify(
std::pair<
std::vector<fuzzer::ResultOrError>,
std::vector<ExpressionVerifier::VerificationState>>
ExpressionVerifier::verify(
const std::vector<core::TypedExprPtr>& plans,
const std::vector<fuzzer::InputTestCase>& inputTestCases,
VectorPtr&& resultVector,
Expand Down Expand Up @@ -178,6 +181,7 @@ std::vector<fuzzer::ResultOrError> ExpressionVerifier::verify(
}

std::vector<fuzzer::ResultOrError> results;
std::vector<VerificationState> verificationStates;
// Share ExpressionSet between consecutive iterations to simulate its usage in
// FilterProject.
exec::ExprSet exprSetCommon =
Expand Down Expand Up @@ -303,20 +307,23 @@ std::vector<fuzzer::ResultOrError> ExpressionVerifier::verify(
if (exceptionCommonPtr || exceptionReference) {
// Throws in case only one evaluation path throws exception.
// Otherwise, return false to signal that the expression failed.
if (!(defaultNull &&
referenceQueryRunner_->runnerType() ==
ReferenceQueryRunner::RunnerType::kPrestoQueryRunner) &&
!(exceptionCommonPtr && exceptionReference)) {
LOG(ERROR) << "Only "
<< (exceptionCommonPtr ? "common" : "reference")
<< " path threw exception:";
if (exceptionCommonPtr) {
std::rethrow_exception(exceptionCommonPtr);
} else {
auto referenceSql =
referenceQueryRunner_->toSql(projectionPlan);
VELOX_FAIL(
"Reference path throws for query: {}", *referenceSql);
if (exceptionCommonPtr && exceptionReference) {
verificationStates.push_back(VerificationState::kBothPathsThrow);
} else {
if (!(defaultNull &&
referenceQueryRunner_->runnerType() ==
ReferenceQueryRunner::RunnerType::kPrestoQueryRunner)) {
LOG(ERROR) << "Only "
<< (exceptionCommonPtr ? "common" : "reference")
<< " path threw exception:";
if (exceptionCommonPtr) {
std::rethrow_exception(exceptionCommonPtr);
} else {
auto referenceSql =
referenceQueryRunner_->toSql(projectionPlan);
VELOX_FAIL(
"Reference path throws for query: {}", *referenceSql);
}
}
}
} else {
Expand All @@ -343,6 +350,8 @@ std::vector<fuzzer::ResultOrError> ExpressionVerifier::verify(
{commonEvalResultRow}),
"Velox and reference DB results don't match");
LOG(INFO) << "Verified results against reference DB";
verificationStates.push_back(
VerificationState::kVerifiedAgainstReference);
}
} catch (...) {
persistReproInfoIfNeeded(
Expand All @@ -353,6 +362,10 @@ std::vector<fuzzer::ResultOrError> ExpressionVerifier::verify(
complexConstants);
throw;
}
} else {
LOG(INFO) << "Reference DB doesn't support this query";
verificationStates.push_back(
VerificationState::kReferencePathUnsupported);
}
} else {
VLOG(1) << "Execute with simplified expression eval path.";
Expand Down Expand Up @@ -423,6 +436,7 @@ std::vector<fuzzer::ResultOrError> ExpressionVerifier::verify(
{nullptr,
exceptionCommonPtr ? exceptionCommonPtr : exceptionSimplifiedPtr,
unsupportedInputUncatchableError});
verificationStates.push_back(VerificationState::kBothPathsThrow);
continue;
} else {
// Throws in case output is different.
Expand All @@ -436,6 +450,8 @@ std::vector<fuzzer::ResultOrError> ExpressionVerifier::verify(
"simplified path results",
rows);
}
verificationStates.push_back(
VerificationState::kVerifiedAgainstReference);
}
} catch (...) {
persistReproInfoIfNeeded(
Expand Down Expand Up @@ -469,7 +485,8 @@ std::vector<fuzzer::ResultOrError> ExpressionVerifier::verify(
unsupportedInputUncatchableError});
}
}
return results;
VELOX_CHECK_EQ(results.size(), verificationStates.size());
return std::make_pair(results, verificationStates);
}

void ExpressionVerifier::persistReproInfoIfNeeded(
Expand Down
14 changes: 11 additions & 3 deletions velox/expression/tests/ExpressionVerifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ class ExpressionVerifier {
options_(options),
referenceQueryRunner_{referenceQueryRunner} {}

enum class VerificationState {
kVerifiedAgainstReference = 0,
kBothPathsThrow = 1,
kReferencePathUnsupported = 2,
};
// Executes expressions using common path (all evaluation
// optimizations) and compares the result with either the simplified path or a
// reference query runner. This execution is done for each input test cases
Expand All @@ -73,9 +78,12 @@ class ExpressionVerifier {
// - result of evaluating the expressions if both paths succeeded and
// returned the exact same vectors.
// - exception thrown by the common path if both paths failed with compatible
// exceptions.
// - throws otherwise (incompatible exceptions or different results).
std::vector<fuzzer::ResultOrError> verify(
// exceptions. Throws otherwise (incompatible exceptions or different
// results).
// - a verification state indicating if the result was verified against the
// reference DB.
std::pair<std::vector<fuzzer::ResultOrError>, std::vector<VerificationState>>
verify(
const std::vector<core::TypedExprPtr>& plans,
const std::vector<fuzzer::InputTestCase>& inputTestCases,
VectorPtr&& resultVector,
Expand Down

0 comments on commit 848d478

Please sign in to comment.