Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HIVE-28582: OOM when compiling query with many GROUP BY columns aliased multiple times #5583

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -98,4 +98,9 @@ public final class Bug {
* Whether <a href="https://issues.apache.org/jira/browse/CALCITE-6513">CALCITE-6513</a> is fixed.
*/
public static final boolean CALCITE_6513_FIXED = false;

/**
* Whether <a href="https://issues.apache.org/jira/browse/CALCITE-6704">CALCITE-6704</a> is fixed.
*/
public static final boolean CALCITE_6704_FIXED = false;
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,28 +21,123 @@
import java.util.List;
import java.util.Set;

import org.apache.calcite.linq4j.Linq4j;
import org.apache.calcite.rel.SingleRel;
import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rel.metadata.BuiltInMetadata;
import org.apache.calcite.rel.metadata.MetadataDef;
import org.apache.calcite.rel.metadata.MetadataHandler;
import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.util.BuiltInMethod;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.Util;
import org.apache.hadoop.hive.ql.optimizer.calcite.Bug;
import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;

import com.google.common.collect.ImmutableMultimap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Multimap;

public class HiveRelMdUniqueKeys implements MetadataHandler<BuiltInMetadata.UniqueKeys> {

public static final RelMetadataProvider SOURCE =
ReflectiveRelMetadataProvider.reflectiveSource(
BuiltInMethod.UNIQUE_KEYS.method, new HiveRelMdUniqueKeys());
/**
* A limit about the number of unique keys returned by the handler.
* The limit must be in the range [0, Integer.MAX_VALUE].
*/
private final int limit;

private HiveRelMdUniqueKeys() {
if (Bug.CALCITE_6704_FIXED) {
throw new IllegalStateException("Remove constructor and limit once we upgrade to version with CALCITE-6704");
}
this.limit = 1000;
}

@Override
public MetadataDef<BuiltInMetadata.UniqueKeys> getDef() {
return BuiltInMetadata.UniqueKeys.DEF;
}

public Set<ImmutableBitSet> getUniqueKeys(Project rel, RelMetadataQuery mq,
boolean ignoreNulls) {
return getProjectUniqueKeys(rel, mq, ignoreNulls, rel.getProjects());
}

private Set<ImmutableBitSet> getProjectUniqueKeys(SingleRel rel, RelMetadataQuery mq,
boolean ignoreNulls, List<RexNode> projExprs) {
if (Bug.CALCITE_6704_FIXED) {
throw new IllegalStateException("Method is redundant once we upgrade to version with CALCITE-6704");

}
// LogicalProject maps a set of rows to a different set;
// Without knowledge of the mapping function(whether it
// preserves uniqueness), it is only safe to derive uniqueness
// info from the child of a project when the mapping is f(a) => a.
//
// Further more, the unique bitset coming from the child needs
// to be mapped to match the output of the project.

// Single input can be mapped to multiple outputs
ImmutableMultimap.Builder<Integer, Integer> inToOutPosBuilder = ImmutableMultimap.builder();
ImmutableBitSet.Builder mappedInColumnsBuilder = ImmutableBitSet.builder();

// Build an input to output position map.
for (int i = 0; i < projExprs.size(); i++) {
RexNode projExpr = projExprs.get(i);
if (projExpr instanceof RexInputRef) {
int inputIndex = ((RexInputRef) projExpr).getIndex();
inToOutPosBuilder.put(inputIndex, i);
mappedInColumnsBuilder.set(inputIndex);
}
}
ImmutableBitSet inColumnsUsed = mappedInColumnsBuilder.build();

if (inColumnsUsed.isEmpty()) {
// if there's no RexInputRef in the projected expressions
// return empty set.
return ImmutableSet.of();
}

Set<ImmutableBitSet> childUniqueKeySet =
mq.getUniqueKeys(rel.getInput(), ignoreNulls);

if (childUniqueKeySet == null) {
return ImmutableSet.of();
}

Multimap<Integer, Integer> mapInToOutPos = inToOutPosBuilder.build();

Set<ImmutableBitSet> resultBuilder = new HashSet<>();
// Now add to the projUniqueKeySet the child keys that are fully
// projected.
outerLoop:
for (ImmutableBitSet colMask : childUniqueKeySet) {
if (!inColumnsUsed.contains(colMask)) {
// colMask contains a column that is not projected as RexInput => the key is not unique
continue;
}
// colMask is mapped to output project, however, the column can be mapped more than once:
// select key1, key1, val1, val2, key2 from ...
// the resulting unique keys would be {{0},{4}}, {{1},{4}}

Iterable<List<Integer>> product = Linq4j.product(Util.transform(colMask.toList(), mapInToOutPos::get));
for (List<Integer> passKey : product) {
if (resultBuilder.size() == limit) {
break outerLoop;
}
resultBuilder.add(ImmutableBitSet.of(passKey));
}
}
return resultBuilder;
}

public Set<ImmutableBitSet> getUniqueKeys(HiveTableScan rel, RelMetadataQuery mq,
boolean ignoreNulls) {
Expand Down
44 changes: 44 additions & 0 deletions ql/src/test/queries/clientpositive/cbo_unique_keys_oom.q
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
CREATE TABLE t (
c1 string,
c2 string,
c3 string,
c4 string,
c5 string,
c6 string,
c7 string,
c8 string,
c9 string
);

EXPLAIN CBO
SELECT a0
FROM
(SELECT c1 as a0,
c1 as a1,
c1 as a2,
c2 as a3,
c2 as a4,
c2 as a5,
c3 as a6,
c3 as a7,
c3 as a8,
c4 as a9,
c4 as a10,
c4 as a11,
c5 as a12,
c5 as a13,
c5 as a14,
c6 as a15,
c6 as a16,
c6 as a17,
c7 as a18,
c7 as a19,
c7 as a20,
c8 as a21,
c8 as a22,
c8 as a23,
c9 as a24,
c9 as a25,
c9 as a26
FROM t GROUP BY c1,c2,c3,c4,c5,c6,c7,c8,c9) t1
GROUP BY a0, a4
105 changes: 105 additions & 0 deletions ql/src/test/results/clientpositive/llap/cbo_unique_keys_oom.q.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
PREHOOK: query: CREATE TABLE t (
c1 string,
c2 string,
c3 string,
c4 string,
c5 string,
c6 string,
c7 string,
c8 string,
c9 string
)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@t
POSTHOOK: query: CREATE TABLE t (
c1 string,
c2 string,
c3 string,
c4 string,
c5 string,
c6 string,
c7 string,
c8 string,
c9 string
)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@t
PREHOOK: query: EXPLAIN CBO
SELECT a0
FROM
(SELECT c1 as a0,
c1 as a1,
c1 as a2,
c2 as a3,
c2 as a4,
c2 as a5,
c3 as a6,
c3 as a7,
c3 as a8,
c4 as a9,
c4 as a10,
c4 as a11,
c5 as a12,
c5 as a13,
c5 as a14,
c6 as a15,
c6 as a16,
c6 as a17,
c7 as a18,
c7 as a19,
c7 as a20,
c8 as a21,
c8 as a22,
c8 as a23,
c9 as a24,
c9 as a25,
c9 as a26
FROM t GROUP BY c1,c2,c3,c4,c5,c6,c7,c8,c9) t1
GROUP BY a0, a4
PREHOOK: type: QUERY
PREHOOK: Input: default@t
#### A masked pattern was here ####
POSTHOOK: query: EXPLAIN CBO
SELECT a0
FROM
(SELECT c1 as a0,
c1 as a1,
c1 as a2,
c2 as a3,
c2 as a4,
c2 as a5,
c3 as a6,
c3 as a7,
c3 as a8,
c4 as a9,
c4 as a10,
c4 as a11,
c5 as a12,
c5 as a13,
c5 as a14,
c6 as a15,
c6 as a16,
c6 as a17,
c7 as a18,
c7 as a19,
c7 as a20,
c8 as a21,
c8 as a22,
c8 as a23,
c9 as a24,
c9 as a25,
c9 as a26
FROM t GROUP BY c1,c2,c3,c4,c5,c6,c7,c8,c9) t1
GROUP BY a0, a4
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t
#### A masked pattern was here ####
CBO PLAN:
HiveProject(a0=[$0])
HiveAggregate(group=[{0, 1}])
HiveProject(c1=[$0], c2=[$1], c3=[$2], c4=[$3], c5=[$4], c6=[$5], c7=[$6], c8=[$7], c9=[$8])
HiveAggregate(group=[{0, 1, 2, 3, 4, 5, 6, 7, 8}])
HiveTableScan(table=[[default, t]], table:alias=[t])

Loading