Skip to content

Commit

Permalink
feat(fuzzer): Add input generator for json_extract in expression fuzz…
Browse files Browse the repository at this point in the history
…er (#12367)

Summary: Pull Request resolved: #12367

Differential Revision: D68923269
  • Loading branch information
kagamiori authored and facebook-github-bot committed Feb 18, 2025
1 parent 6abe0fb commit 14f1de7
Show file tree
Hide file tree
Showing 5 changed files with 110 additions and 13 deletions.
21 changes: 13 additions & 8 deletions velox/common/fuzzer/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,40 +26,45 @@ bool coinToss(FuzzerGenerator& rng, double threshold) {
TypePtr randType(
FuzzerGenerator& rng,
const std::vector<TypePtr>& scalarTypes,
int maxDepth) {
int maxDepth,
const std::vector<TypePtr>& keyTypes) {
const int numScalarTypes = scalarTypes.size();
// Should we generate a scalar type?
if (maxDepth <= 1 || rand<bool>(rng)) {
return scalarTypes[rand<uint32_t>(rng) % numScalarTypes];
}
switch (rand<uint32_t>(rng) % 3) {
case 0:
return randMapType(rng, scalarTypes, maxDepth);
return randMapType(rng, scalarTypes, maxDepth, keyTypes);
case 1:
return ARRAY(randType(rng, scalarTypes, maxDepth - 1));
return ARRAY(randType(rng, scalarTypes, maxDepth - 1, keyTypes));
default:
return randRowType(rng, scalarTypes, maxDepth - 1);
return randRowType(rng, scalarTypes, maxDepth - 1, keyTypes);
}
}

TypePtr randMapType(
FuzzerGenerator& rng,
const std::vector<TypePtr>& scalarTypes,
int maxDepth) {
int maxDepth,
const std::vector<TypePtr>& keyTypes) {
const auto& selectedKeyTypes = keyTypes.empty() ? scalarTypes : keyTypes;
return MAP(
randType(rng, scalarTypes, 0), randType(rng, scalarTypes, maxDepth - 1));
randType(rng, selectedKeyTypes, 0),
randType(rng, scalarTypes, maxDepth - 1, selectedKeyTypes));
}

RowTypePtr randRowType(
FuzzerGenerator& rng,
const std::vector<TypePtr>& scalarTypes,
int maxDepth) {
int maxDepth,
const std::vector<TypePtr>& keyTypes) {
int numFields = 1 + rand<uint32_t>(rng) % 7;
std::vector<std::string> names;
std::vector<TypePtr> fields;
for (int i = 0; i < numFields; ++i) {
names.push_back(fmt::format("f{}", i));
fields.push_back(randType(rng, scalarTypes, maxDepth));
fields.push_back(randType(rng, scalarTypes, maxDepth, keyTypes));
}
return ROW(std::move(names), std::move(fields));
}
Expand Down
15 changes: 12 additions & 3 deletions velox/common/fuzzer/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,20 +40,29 @@ enum UTF8CharList {

bool coinToss(FuzzerGenerator& rng, double threshold);

/// Generate a random type with given scalar types. The level of nesting is up
/// to maxDepth. If keyTypes is non-empty, choosing from keyTypes when
/// determining the types of map keys. If keyTypes is empty, choosing from
/// scalarTypes for the types of map keys.
TypePtr randType(
FuzzerGenerator& rng,
const std::vector<TypePtr>& scalarTypes,
int maxDepth);
int maxDepth,
const std::vector<TypePtr>& keyTypes = {});

/// Similar to randType but generates a random map type.
TypePtr randMapType(
FuzzerGenerator& rng,
const std::vector<TypePtr>& scalarTypes,
int maxDepth);
int maxDepth,
const std::vector<TypePtr>& keyTypes = {});

/// Similar to randType but generates a random row type.
RowTypePtr randRowType(
FuzzerGenerator& rng,
const std::vector<TypePtr>& scalarTypes,
int maxDepth);
int maxDepth,
const std::vector<TypePtr>& keyTypes = {});

struct DataSpec {
bool includeNaN;
Expand Down
72 changes: 71 additions & 1 deletion velox/expression/fuzzer/ArgValuesGenerators.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,26 @@

namespace facebook::velox::fuzzer {

namespace {

const std::vector<TypePtr>& jsonScalarTypes() {
static const std::vector<TypePtr> kScalarTypes{
BOOLEAN(),
TINYINT(),
SMALLINT(),
INTEGER(),
BIGINT(),
REAL(),
DOUBLE(),
VARCHAR(),
VARBINARY(),
TIMESTAMP(),
};
return kScalarTypes;
}

} // namespace

std::vector<core::TypedExprPtr> JsonParseArgValuesGenerator::generate(
const CallableSignature& signature,
const VectorFuzzer::Options& options,
Expand All @@ -35,7 +55,8 @@ std::vector<core::TypedExprPtr> JsonParseArgValuesGenerator::generate(
state.inputRowNames_.emplace_back(
fmt::format("c{}", state.inputRowTypes_.size() - 1));

const auto representedType = facebook::velox::randType(rng, 3);
const auto representedType =
facebook::velox::randType(rng, jsonScalarTypes(), 3);
const auto seed = rand<uint32_t>(rng);
const auto nullRatio = options.nullRatio;
state.customInputGenerators_.emplace_back(
Expand All @@ -51,4 +72,53 @@ std::vector<core::TypedExprPtr> JsonParseArgValuesGenerator::generate(
return inputExpressions;
}

std::vector<core::TypedExprPtr> JsonExtractArgValuesGenerator::generate(
const CallableSignature& signature,
const VectorFuzzer::Options& options,
FuzzerGenerator& rng,
ExpressionFuzzerState& state) {
VELOX_CHECK_EQ(signature.args.size(), 2);
std::vector<core::TypedExprPtr> inputExpressions;

for (auto i = 0; i < signature.args.size(); ++i) {
state.inputRowTypes_.emplace_back(signature.args[i]);
state.inputRowNames_.emplace_back(
fmt::format("c{}", state.inputRowTypes_.size() - 1));
inputExpressions.push_back(std::make_shared<core::FieldAccessTypedExpr>(
signature.args[i], state.inputRowNames_.back()));
}

const auto representedType =
facebook::velox::fuzzer::randType(rng, jsonScalarTypes(), 3, {VARCHAR()});
const auto maxContainerSize = rand<uint32_t>(rng, 0, 10);
const auto seed = rand<uint32_t>(rng);
const auto nullRatio = options.nullRatio;

std::vector<variant> mapKeys;
RandomInputGenerator<StringView> mapKeyGenerator{seed, VARCHAR(), 0.0, 4};
for (auto i = 0; i < maxContainerSize + 2; ++i) {
mapKeys.push_back(mapKeyGenerator.generate());
}

state.customInputGenerators_.emplace_back(
std::make_shared<fuzzer::JsonInputGenerator>(
seed,
signature.args[0],
nullRatio,
fuzzer::getRandomInputGenerator(
seed, representedType, nullRatio, mapKeys, maxContainerSize),
true));
state.customInputGenerators_.emplace_back(
std::make_shared<fuzzer::JsonPathGenerator>(
seed,
signature.args[1],
nullRatio,
representedType,
mapKeys,
maxContainerSize,
true));

return inputExpressions;
}

} // namespace facebook::velox::fuzzer
11 changes: 11 additions & 0 deletions velox/expression/fuzzer/ArgValuesGenerators.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,15 @@ class JsonParseArgValuesGenerator : public ArgValuesGenerator {
ExpressionFuzzerState& state) override;
};

class JsonExtractArgValuesGenerator : public ArgValuesGenerator {
public:
~JsonExtractArgValuesGenerator() override = default;

std::vector<core::TypedExprPtr> generate(
const CallableSignature& signature,
const VectorFuzzer::Options& options,
FuzzerGenerator& rng,
ExpressionFuzzerState& state) override;
};

} // namespace facebook::velox::fuzzer
4 changes: 3 additions & 1 deletion velox/expression/fuzzer/ExpressionFuzzerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ using facebook::velox::fuzzer::ArgTypesGenerator;
using facebook::velox::fuzzer::ArgValuesGenerator;
using facebook::velox::fuzzer::ExpressionFuzzer;
using facebook::velox::fuzzer::FuzzerRunner;
using facebook::velox::fuzzer::JsonExtractArgValuesGenerator;
using facebook::velox::fuzzer::JsonParseArgValuesGenerator;
using facebook::velox::test::ReferenceQueryRunner;

Expand Down Expand Up @@ -130,7 +131,8 @@ int main(int argc, char** argv) {

std::unordered_map<std::string, std::shared_ptr<ArgValuesGenerator>>
argValuesGenerators = {
{"json_parse", std::make_shared<JsonParseArgValuesGenerator>()}};
{"json_parse", std::make_shared<JsonParseArgValuesGenerator>()},
{"json_extract", std::make_shared<JsonExtractArgValuesGenerator>()}};

std::shared_ptr<facebook::velox::memory::MemoryPool> rootPool{
facebook::velox::memory::memoryManager()->addRootPool()};
Expand Down

0 comments on commit 14f1de7

Please sign in to comment.