Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Advance DF 0ct-30 by SPM round-robin commit. #49

Closed
wants to merge 10 commits into from
Closed
5 changes: 5 additions & 0 deletions datafusion/catalog/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@
// specific language governing permissions and limitations
// under the License.

// Disable clippy lints that were introduced after this code was written
#![allow(clippy::needless_lifetimes)]
#![allow(clippy::unnecessary_lazy_evaluations)]
#![allow(clippy::empty_line_after_doc_comments)]

mod catalog;
mod dynamic_file;
mod schema;
Expand Down
2 changes: 1 addition & 1 deletion datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ config_namespace! {
///
/// This is used to workaround bugs in the planner that are now caught by
/// the new schema verification step.
pub skip_physical_aggregate_schema_check: bool, default = false
pub skip_physical_aggregate_schema_check: bool, default = true

/// Specifies the reserved memory for each spillable sort operation to
/// facilitate an in-memory merge.
Expand Down
4 changes: 4 additions & 0 deletions datafusion/common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
// under the License.
// Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
#![deny(clippy::clone_on_ref_ptr)]
// Disable clippy lints that were introduced after this code was written
#![allow(clippy::needless_lifetimes)]
#![allow(clippy::unnecessary_lazy_evaluations)]
#![allow(clippy::empty_line_after_doc_comments)]

mod column;
mod dfschema;
Expand Down
20 changes: 20 additions & 0 deletions datafusion/common/src/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,26 @@ impl Statistics {
self
}

/// Project the statistics to the given column indices.
///
/// For example, if we had statistics for columns `{"a", "b", "c"}`,
/// projecting to `vec![2, 1]` would return statistics for columns `{"c",
/// "b"}`.
pub fn project(mut self, projection: Option<&Vec<usize>>) -> Self {
let Some(projection) = projection else {
return self;
};

// todo: it would be nice to avoid cloning column statistics if
// possible (e.g. if the projection did not contain duplicates)
self.column_statistics = projection
.iter()
.map(|&i| self.column_statistics[i].clone())
.collect();

self
}

/// Calculates the statistics after `fetch` and `skip` operations apply.
/// Here, `self` denotes per-partition statistics. Use the `n_partitions`
/// parameter to compute global statistics in a multi-partition setting.
Expand Down
8 changes: 8 additions & 0 deletions datafusion/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@
// specific language governing permissions and limitations
// under the License.
#![warn(missing_docs, clippy::needless_borrow)]
// Disable clippy lints that were introduced after this code was written
#![allow(clippy::needless_return)]
#![allow(clippy::needless_lifetimes)]
#![allow(clippy::unnecessary_lazy_evaluations)]
#![allow(clippy::empty_line_after_doc_comments)]
#![allow(clippy::unnecessary_filter_map)]
#![allow(clippy::manual_div_ceil)]
#![allow(missing_docs)]

//! [DataFusion] is an extensible query engine written in Rust that
//! uses [Apache Arrow] as its in-memory format. DataFusion's target users are
Expand Down
10 changes: 10 additions & 0 deletions datafusion/core/src/physical_optimizer/sanity_checker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ use datafusion_physical_plan::{get_plan_string, ExecutionPlanProperties};

use datafusion_physical_expr_common::sort_expr::format_physical_sort_requirement_list;
use datafusion_physical_optimizer::PhysicalOptimizerRule;
use datafusion_physical_plan::sorts::sort::SortExec;
use datafusion_physical_plan::union::UnionExec;
use itertools::izip;

/// The SanityCheckPlan rule rejects the following query plans:
Expand Down Expand Up @@ -126,6 +128,14 @@ pub fn check_plan_sanity(
plan.required_input_ordering().iter(),
plan.required_input_distribution().iter()
) {
// TEMP HACK WORKAROUND https://github.com/apache/datafusion/issues/11492
if child.as_any().downcast_ref::<UnionExec>().is_some() {
continue;
}
if child.as_any().downcast_ref::<SortExec>().is_some() {
continue;
}

let child_eq_props = child.equivalence_properties();
if let Some(sort_req) = sort_req {
if !child_eq_props.ordering_satisfy_requirement(sort_req) {
Expand Down
1 change: 1 addition & 0 deletions datafusion/core/src/physical_planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,7 @@ impl DefaultPhysicalPlanner {
if &physical_input_schema != physical_input_schema_from_logical
&& !options.execution.skip_physical_aggregate_schema_check
{
log::warn!("Physical input schema should be the same as the one converted from logical input schema, but did not match for logical plan:\n{}", input.display_indent());
return internal_err!("Physical input schema should be the same as the one converted from logical input schema.");
}

Expand Down
5 changes: 5 additions & 0 deletions datafusion/execution/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@
// under the License.
// Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
#![deny(clippy::clone_on_ref_ptr)]
// Disable clippy lints that were introduced after this code was written
#![allow(clippy::needless_return)]
#![allow(clippy::needless_lifetimes)]
#![allow(clippy::unnecessary_lazy_evaluations)]
#![allow(clippy::empty_line_after_doc_comments)]

//! DataFusion execution configuration and runtime structures

Expand Down
6 changes: 6 additions & 0 deletions datafusion/expr-common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@
// specific language governing permissions and limitations
// under the License.

// Disable clippy lints that were introduced after this code was written
#![allow(clippy::needless_return)]
#![allow(clippy::needless_lifetimes)]
#![allow(clippy::unnecessary_lazy_evaluations)]
#![allow(clippy::empty_line_after_doc_comments)]

//! Logical Expr types and traits for [DataFusion]
//!
//! This crate contains types and traits that are used by both Logical and Physical expressions.
Expand Down
7 changes: 3 additions & 4 deletions datafusion/expr/src/expr_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,7 @@ impl ExprSchemable for Expr {
match self {
Expr::Column(c) => Ok(schema.metadata(c)?.clone()),
Expr::Alias(Alias { expr, .. }) => expr.metadata(schema),
Expr::Cast(Cast { expr, .. }) => expr.metadata(schema),
_ => Ok(HashMap::new()),
}
}
Expand Down Expand Up @@ -681,13 +682,11 @@ mod tests {
.with_data_type(DataType::Int32)
.with_metadata(meta.clone());

// col and alias should be metadata-preserving
// col, alias, and cast should be metadata-preserving
assert_eq!(meta, expr.metadata(&schema).unwrap());
assert_eq!(meta, expr.clone().alias("bar").metadata(&schema).unwrap());

// cast should drop input metadata since the type has changed
assert_eq!(
HashMap::new(),
meta,
expr.clone()
.cast_to(&DataType::Int64, &schema)
.unwrap()
Expand Down
5 changes: 5 additions & 0 deletions datafusion/expr/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@
// under the License.
// Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
#![deny(clippy::clone_on_ref_ptr)]
// Disable clippy lints that were introduced after this code was written
#![allow(clippy::needless_return)]
#![allow(clippy::needless_lifetimes)]
#![allow(clippy::unnecessary_lazy_evaluations)]
#![allow(clippy::empty_line_after_doc_comments)]

//! [DataFusion](https://github.com/apache/datafusion)
//! is an extensible query execution framework that uses
Expand Down
5 changes: 5 additions & 0 deletions datafusion/functions-aggregate-common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@

// Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
#![deny(clippy::clone_on_ref_ptr)]
// Disable clippy lints that were introduced after this code was written
#![allow(clippy::needless_return)]
#![allow(clippy::needless_lifetimes)]
#![allow(clippy::unnecessary_lazy_evaluations)]
#![allow(clippy::empty_line_after_doc_comments)]

pub mod accumulator;
pub mod aggregate;
Expand Down
5 changes: 5 additions & 0 deletions datafusion/functions-nested/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@
// under the License.
// Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
#![deny(clippy::clone_on_ref_ptr)]
// Disable clippy lints that were introduced after this code was written
#![allow(clippy::needless_return)]
#![allow(clippy::needless_lifetimes)]
#![allow(clippy::unnecessary_lazy_evaluations)]
#![allow(clippy::empty_line_after_doc_comments)]

//! Nested type Functions for [DataFusion].
//!
Expand Down
5 changes: 5 additions & 0 deletions datafusion/functions/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@
// under the License.
// Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
#![deny(clippy::clone_on_ref_ptr)]
// Disable clippy lints that were introduced after this code was written
#![allow(clippy::needless_return)]
#![allow(clippy::needless_lifetimes)]
#![allow(clippy::unnecessary_lazy_evaluations)]
#![allow(clippy::empty_line_after_doc_comments)]

//! Function packages for [DataFusion].
//!
Expand Down
6 changes: 5 additions & 1 deletion datafusion/functions/src/string/bit_length.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,11 @@ impl Default for BitLengthFunc {
impl BitLengthFunc {
pub fn new() -> Self {
Self {
signature: Signature::string(1, Volatility::Immutable),
signature: Signature::uniform(
1,
vec![DataType::Utf8, DataType::LargeUtf8],
Volatility::Immutable,
),
}
}
}
Expand Down
5 changes: 5 additions & 0 deletions datafusion/optimizer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@
// under the License.
// Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
#![deny(clippy::clone_on_ref_ptr)]
// Disable clippy lints that were introduced after this code was written
#![allow(clippy::needless_return)]
#![allow(clippy::needless_lifetimes)]
#![allow(clippy::unnecessary_lazy_evaluations)]
#![allow(clippy::empty_line_after_doc_comments)]

//! # DataFusion Optimizer
//!
Expand Down
6 changes: 6 additions & 0 deletions datafusion/physical-expr-common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@
//!
//! [DataFusion]: <https://crates.io/crates/datafusion>

// Disable clippy lints that were introduced after this code was written
#![allow(clippy::needless_return)]
#![allow(clippy::needless_lifetimes)]
#![allow(clippy::unnecessary_lazy_evaluations)]
#![allow(clippy::empty_line_after_doc_comments)]

pub mod binary_map;
pub mod binary_view_map;
pub mod datum;
Expand Down
7 changes: 0 additions & 7 deletions datafusion/physical-expr-common/src/sort_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,13 +120,6 @@ impl PhysicalSortExpr {
}
}

/// Access the PhysicalSortExpr as a PhysicalExpr
impl AsRef<dyn PhysicalExpr> for PhysicalSortExpr {
fn as_ref(&self) -> &(dyn PhysicalExpr + 'static) {
self.expr.as_ref()
}
}

impl PartialEq for PhysicalSortExpr {
fn eq(&self, other: &PhysicalSortExpr) -> bool {
self.options == other.options && self.expr.eq(&other.expr)
Expand Down
50 changes: 4 additions & 46 deletions datafusion/physical-expr/src/equivalence/class.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
use datafusion_common::JoinType;
use datafusion_physical_expr_common::physical_expr::format_physical_expr_list;

#[derive(Debug, Clone)]
/// A structure representing a expression known to be constant in a physical execution plan.
///
/// The `ConstExpr` struct encapsulates an expression that is constant during the execution
Expand All @@ -40,10 +41,9 @@ use datafusion_physical_expr_common::physical_expr::format_physical_expr_list;
///
/// - `expr`: Constant expression for a node in the physical plan.
///
/// - `across_partitions`: A boolean flag indicating whether the constant
/// expression is the same across partitions. If set to `true`, the constant
/// expression has same value for all partitions. If set to `false`, the
/// constant expression may have different values for different partitions.
/// - `across_partitions`: A boolean flag indicating whether the constant expression is
/// valid across partitions. If set to `true`, the constant expression has same value for all partitions.
/// If set to `false`, the constant expression may have different values for different partitions.
///
/// # Example
///
Expand All @@ -56,22 +56,11 @@ use datafusion_physical_expr_common::physical_expr::format_physical_expr_list;
/// // create a constant expression from a physical expression
/// let const_expr = ConstExpr::from(col);
/// ```
#[derive(Debug, Clone)]
pub struct ConstExpr {
/// The expression that is known to be constant (e.g. a `Column`)
expr: Arc<dyn PhysicalExpr>,
/// Does the constant have the same value across all partitions? See
/// struct docs for more details
across_partitions: bool,
}

impl PartialEq for ConstExpr {
fn eq(&self, other: &Self) -> bool {
self.across_partitions == other.across_partitions
&& self.expr.eq(other.expr.as_any())
}
}

impl ConstExpr {
/// Create a new constant expression from a physical expression.
///
Expand All @@ -85,17 +74,11 @@ impl ConstExpr {
}
}

/// Set the `across_partitions` flag
///
/// See struct docs for more details
pub fn with_across_partitions(mut self, across_partitions: bool) -> Self {
self.across_partitions = across_partitions;
self
}

/// Is the expression the same across all partitions?
///
/// See struct docs for more details
pub fn across_partitions(&self) -> bool {
self.across_partitions
}
Expand All @@ -118,31 +101,6 @@ impl ConstExpr {
across_partitions: self.across_partitions,
})
}

/// Returns true if this constant expression is equal to the given expression
pub fn eq_expr(&self, other: impl AsRef<dyn PhysicalExpr>) -> bool {
self.expr.eq(other.as_ref().as_any())
}

/// Returns a [`Display`]able list of `ConstExpr`.
pub fn format_list(input: &[ConstExpr]) -> impl Display + '_ {
struct DisplayableList<'a>(&'a [ConstExpr]);
impl<'a> Display for DisplayableList<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
let mut first = true;
for const_expr in self.0 {
if first {
first = false;
} else {
write!(f, ",")?;
}
write!(f, "{}", const_expr)?;
}
Ok(())
}
}
DisplayableList(input)
}
}

/// Display implementation for `ConstExpr`
Expand Down
Loading
Loading