Skip to content

Commit

Permalink
HIVE-28732: Sorted dynamic partition optimization does not apply hive…
Browse files Browse the repository at this point in the history
….default.nulls.last
  • Loading branch information
kasakrisz committed Jan 31, 2025
1 parent 4f22ecd commit f33d1f8
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 51 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@

import java.util.function.Function;
import java.util.stream.Collectors;

import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hive.conf.Constants;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
Expand Down Expand Up @@ -78,6 +80,7 @@
import org.apache.hadoop.hive.ql.plan.SelectDesc;
import org.apache.hadoop.hive.ql.plan.Statistics;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.util.NullOrdering;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
Expand Down Expand Up @@ -283,9 +286,9 @@ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
List<ColumnInfo> colInfos = fsParent.getSchema().getSignature();
bucketColumns = getPositionsToExprNodes(bucketPositions, colInfos);
}
List<Integer> sortNullOrder = new ArrayList<Integer>();
List<Integer> sortNullOrder = new ArrayList<>();
for (int order : sortOrder) {
sortNullOrder.add(order == 1 ? 0 : 1); // for asc, nulls first; for desc, nulls last
sortNullOrder.add(NullOrdering.defaultNullOrder(order, parseCtx.getConf()).getCode());
}
LOG.debug("Got sort order");
for (int i : sortPositions) {
Expand Down Expand Up @@ -635,34 +638,18 @@ public ReduceSinkOperator getReduceSinkOp(List<Integer> partitionPositions, List
}
}

// if partition and bucket columns are sorted in ascending order, by default
// nulls come first; otherwise nulls come last
Integer nullOrder = order == 1 ? 0 : 1;
char nullOrder = NullOrdering.defaultNullOrder(order, parseCtx.getConf()).getSign();
if (sortNullOrder != null && !sortNullOrder.isEmpty()) {
if (sortNullOrder.get(0) == 0) {
nullOrder = 0;
} else {
nullOrder = 1;
}
}

for (Integer ignored : keyColsPosInVal) {
newSortNullOrder.add(nullOrder);
nullOrder = NullOrdering.fromCode(sortNullOrder.get(0)).getSign();
}

StringBuilder nullOrderStr = new StringBuilder(StringUtils.repeat(nullOrder, keyColsPosInVal.size()));
if (customSortExprPresent) {
for (int i = 0; i < customSortExprs.size() - customSortNullOrder.size(); i++) {
newSortNullOrder.add(nullOrder);
nullOrderStr.append(nullOrder);
}
newSortNullOrder.addAll(customSortNullOrder);
}

String nullOrderStr = "";
for (Integer i : newSortNullOrder) {
if (i == 0) {
nullOrderStr += "a";
} else {
nullOrderStr += "z";
for (int i = 0; i < customSortNullOrder.size(); ++i) {
nullOrderStr.append(NullOrdering.fromCode(customSortNullOrder.get(0)).getSign());
}
}

Expand Down Expand Up @@ -709,7 +696,7 @@ public ReduceSinkOperator getReduceSinkOp(List<Integer> partitionPositions, List
if (parentRSOpOrder != null && !parentRSOpOrder.isEmpty() && sortPositions.isEmpty()) {
keyCols.addAll(parentRSOp.getConf().getKeyCols());
orderStr += parentRSOpOrder;
nullOrderStr += parentRSOpNullOrder;
nullOrderStr.append(parentRSOpNullOrder);
}
}

Expand Down Expand Up @@ -739,7 +726,7 @@ public ReduceSinkOperator getReduceSinkOp(List<Integer> partitionPositions, List
// from Key and Value TableDesc
List<FieldSchema> fields = PlanUtils.getFieldSchemasFromColumnList(keyCols,
keyColNames, 0, "");
TableDesc keyTable = PlanUtils.getReduceKeyTableDesc(fields, orderStr, nullOrderStr);
TableDesc keyTable = PlanUtils.getReduceKeyTableDesc(fields, orderStr, nullOrderStr.toString());
List<FieldSchema> valFields = PlanUtils.getFieldSchemasFromColumnList(valCols,
valColNames, 0, "");
TableDesc valueTable = PlanUtils.getReduceValueTableDesc(valFields);
Expand Down
17 changes: 15 additions & 2 deletions ql/src/java/org/apache/hadoop/hive/ql/util/NullOrdering.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import org.apache.hadoop.hive.ql.parse.HiveParser;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.NullValueOption;

import static org.apache.hadoop.hive.ql.util.DirectionUtils.ASCENDING_CODE;

/**
* Enum for converting different Null ordering description types.
*/
Expand Down Expand Up @@ -82,8 +84,19 @@ public static NullOrdering fromDirection(RelFieldCollation.NullDirection nullDir
}

public static NullOrdering defaultNullOrder(Configuration hiveConf) {
return HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_DEFAULT_NULLS_LAST) ?
NullOrdering.NULLS_LAST : NullOrdering.NULLS_FIRST;
return defaultNullsLast(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_DEFAULT_NULLS_LAST));
}

private static NullOrdering defaultNullsLast(boolean defaultNullsLast) {
return defaultNullsLast ? NullOrdering.NULLS_LAST : NullOrdering.NULLS_FIRST;
}

public static NullOrdering defaultNullOrder(int order, Configuration hiveConf) {
if (order == ASCENDING_CODE) {
return defaultNullOrder(hiveConf);
}

return defaultNullsLast(!HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_DEFAULT_NULLS_LAST));
}

public int getCode() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col4 (type: tinyint)
null sort order: a
null sort order: z
sort order: +
Map-reduce partition columns: _col4 (type: tinyint)
Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Expand Down Expand Up @@ -289,7 +289,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col4 (type: tinyint)
null sort order: a
null sort order: z
sort order: +
Map-reduce partition columns: _col4 (type: tinyint)
Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Expand Down Expand Up @@ -410,7 +410,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col4 (type: tinyint), _bucket_number (type: string), _col0 (type: smallint)
null sort order: aaa
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col4 (type: tinyint)
Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Expand Down Expand Up @@ -532,7 +532,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col4 (type: tinyint), _bucket_number (type: string), _col3 (type: float)
null sort order: aaa
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col4 (type: tinyint)
Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Expand Down Expand Up @@ -723,7 +723,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col4 (type: tinyint)
null sort order: a
null sort order: z
sort order: +
Map-reduce partition columns: _col4 (type: tinyint)
Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Expand Down Expand Up @@ -867,7 +867,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col4 (type: tinyint)
null sort order: a
null sort order: z
sort order: +
Map-reduce partition columns: _col4 (type: tinyint)
Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Expand Down Expand Up @@ -988,7 +988,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col4 (type: tinyint), _bucket_number (type: string), _col0 (type: smallint)
null sort order: aaa
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col4 (type: tinyint)
Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Expand Down Expand Up @@ -1110,7 +1110,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col4 (type: tinyint), _bucket_number (type: string), _col3 (type: float)
null sort order: aaa
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col4 (type: tinyint)
Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Expand Down Expand Up @@ -1824,7 +1824,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col4 (type: tinyint), _col1 (type: int)
null sort order: aa
null sort order: zz
sort order: ++
Map-reduce partition columns: _col4 (type: tinyint)
Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Expand Down Expand Up @@ -1972,7 +1972,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col4 (type: tinyint)
null sort order: a
null sort order: z
sort order: +
Map-reduce partition columns: _col4 (type: tinyint)
Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Expand Down Expand Up @@ -2222,7 +2222,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float)
null sort order: azzzz
null sort order: zzzzz
sort order: +++++
Map-reduce partition columns: _col0 (type: tinyint)
Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Expand Down Expand Up @@ -2779,7 +2779,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col4 (type: tinyint), _bucket_number (type: string), _col3 (type: float)
null sort order: aaa
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col4 (type: tinyint)
Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Expand Down Expand Up @@ -3197,7 +3197,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
null sort order: aaa
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE
Expand Down Expand Up @@ -3321,7 +3321,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
null sort order: aaa
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE
Expand Down Expand Up @@ -3445,7 +3445,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
null sort order: aaa
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE
Expand Down Expand Up @@ -3569,7 +3569,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
null sort order: aaa
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE
Expand Down Expand Up @@ -3693,7 +3693,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
null sort order: aaa
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE
Expand Down Expand Up @@ -3817,7 +3817,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
null sort order: aaa
null sort order: zzz
sort order: +++
Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE
Expand Down Expand Up @@ -4459,7 +4459,7 @@ STAGE PLANS:
Statistics: Num rows: 804 Data size: 19288 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col4 (type: tinyint)
null sort order: a
null sort order: z
sort order: +
Map-reduce partition columns: _col4 (type: tinyint)
Statistics: Num rows: 804 Data size: 19288 Basic stats: COMPLETE Column stats: COMPLETE
Expand Down Expand Up @@ -4603,7 +4603,7 @@ STAGE PLANS:
Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col4 (type: tinyint)
null sort order: a
null sort order: z
sort order: +
Map-reduce partition columns: _col4 (type: tinyint)
Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
Expand Down Expand Up @@ -4852,7 +4852,7 @@ STAGE PLANS:
Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col4 (type: tinyint)
null sort order: a
null sort order: z
sort order: +
Map-reduce partition columns: _col4 (type: tinyint)
Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
Expand Down Expand Up @@ -5019,7 +5019,7 @@ STAGE PLANS:
Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col1 (type: string)
null sort order: a
null sort order: z
sort order: +
Map-reduce partition columns: _col1 (type: string)
Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE
Expand Down Expand Up @@ -5048,7 +5048,7 @@ STAGE PLANS:
Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col1 (type: string)
null sort order: a
null sort order: z
sort order: +
Map-reduce partition columns: _col1 (type: string)
Statistics: Num rows: 524 Data size: 52924 Basic stats: COMPLETE Column stats: COMPLETE
Expand Down

0 comments on commit f33d1f8

Please sign in to comment.