Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[dnr][tables] Move read-then-write plans into clusterd #31189

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 48 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,17 @@ crates_repository(
# Note: This is a target we add from the additive build file above.
deps = [":zlib"],
)],
# TODO(parkmycar): Refactor this to build the version of zlib from the `bzip2-sys` crate.
"bzip2-sys": [crate.annotation(
gen_build_script = False,
deps = ["@bzip2"],
)],
"lzma-sys": [crate.annotation(
additive_build_file = "@//misc/bazel/c_deps:rust-sys/BUILD.lzma-sys.bazel",
gen_build_script = False,
# Note: This is a target we add from the additive build file above.
deps = [":xz"],
)],
"openssl-sys": [crate.annotation(
build_script_data = [
"@openssl//:openssl_lib",
Expand Down
68 changes: 68 additions & 0 deletions misc/bazel/c_deps/rust-sys/BUILD.lzma-sys.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Copyright Materialize, Inc. and contributors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License in the LICENSE file at the
# root of this repository, or online at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Builds xz from the lzma-sys crate."""

cc_library(
name = "xz",
srcs = glob(
include = [
"xz-5.2/src/liblzma/api/**/*.c",
"xz-5.2/src/liblzma/lzma/**/*.c",
"xz-5.2/src/liblzma/lzma/**/*.h",
"xz-5.2/src/liblzma/lz/**/*.c",
"xz-5.2/src/liblzma/lz/**/*.h",
"xz-5.2/src/liblzma/check/**/*.c",
"xz-5.2/src/liblzma/check/**/*.h",
"xz-5.2/src/liblzma/simple/**/*.c",
"xz-5.2/src/liblzma/simple/**/*.h",
"xz-5.2/src/liblzma/delta/**/*.c",
"xz-5.2/src/liblzma/delta/**/*.h",
"xz-5.2/src/liblzma/common/**/*.c",
"xz-5.2/src/liblzma/common/**/*.h",
"xz-5.2/src/liblzma/rangecoder/**/*.c",
"xz-5.2/src/liblzma/rangecoder/**/*.h",
"xz-5.2/src/common/**/*.h",
],
exclude = [
"**/*crc32_small*",
"**/*crc64_small*",
"**/*tablegen*",
],
) + [
"config.h",
"xz-5.2/src/common/tuklib_cpucores.c",
"xz-5.2/src/common/tuklib_physmem.c",
],
hdrs = glob(["xz-5.2/src/liblzma/api/**/*.h"]),
copts = [
"-std=c99",
"-pthread",
],
includes = [
"xz-5.2/src/common",
"xz-5.2/src/liblzma/api",
"xz-5.2/src/liblzma/check",
"xz-5.2/src/liblzma/common",
"xz-5.2/src/liblzma/delta",
"xz-5.2/src/liblzma/lz",
"xz-5.2/src/liblzma/lzma",
"xz-5.2/src/liblzma/rangecoder",
"xz-5.2/src/liblzma/simple",
# The current working directory.
"",
],
local_defines = ["HAVE_CONFIG_H=1"],
)
1 change: 1 addition & 0 deletions src/adapter/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,7 @@ Issue a SQL query to get started. Need help?
{
(ExecuteResponse::SendingRows { future, .. }, _) => match future.await {
PeekResponseUnary::Rows(rows) => Ok(rows),
PeekResponseUnary::Batches(_) => bail!("unexpected staged result"),
PeekResponseUnary::Canceled => bail!("query canceled"),
PeekResponseUnary::Error(e) => bail!(e),
},
Expand Down
31 changes: 24 additions & 7 deletions src/adapter/src/coord/peek.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ use mz_expr::{
use mz_ore::cast::CastFrom;
use mz_ore::str::{separated, StrExt};
use mz_ore::tracing::OpenTelemetryContext;
use mz_persist_client::batch::ProtoBatch;
use mz_repr::explain::text::DisplayText;
use mz_repr::explain::{CompactScalars, IndexUsageType, PlanRenderingContext, UsedIndexes};
use mz_repr::{Diff, GlobalId, IntoRowIterator, RelationType, Row, RowIterator};
Expand All @@ -44,6 +45,7 @@ use timely::progress::Timestamp;
use uuid::Uuid;

use crate::coord::timestamp_selection::TimestampDetermination;
use crate::optimize::peek::PeekOutput;
use crate::optimize::OptimizerError;
use crate::statement_logging::{StatementEndedExecutionReason, StatementExecutionStrategy};
use crate::util::ResultExt;
Expand Down Expand Up @@ -72,14 +74,15 @@ pub(crate) struct PendingPeek {
#[derive(Debug)]
pub enum PeekResponseUnary {
Rows(Box<dyn RowIterator + Send + Sync>),
Batches(Vec<ProtoBatch>),
Error(String),
Canceled,
}

#[derive(Clone, Debug)]
pub struct PeekDataflowPlan<T = mz_repr::Timestamp> {
pub(crate) desc: DataflowDescription<mz_compute_types::plan::Plan<T>, (), T>,
pub(crate) id: GlobalId,
pub(crate) output: PeekOutput,
key: Vec<MirScalarExpr>,
permutation: Vec<usize>,
thinned_arity: usize,
Expand All @@ -88,7 +91,7 @@ pub struct PeekDataflowPlan<T = mz_repr::Timestamp> {
impl<T> PeekDataflowPlan<T> {
pub fn new(
desc: DataflowDescription<mz_compute_types::plan::Plan<T>, (), T>,
id: GlobalId,
output: PeekOutput,
typ: &RelationType,
) -> Self {
let arity = typ.arity();
Expand All @@ -100,7 +103,7 @@ impl<T> PeekDataflowPlan<T> {
let (permutation, thinning) = permutation_for_arrangement(&key, arity);
Self {
desc,
id,
output,
key,
permutation,
thinned_arity: thinning.len(),
Expand Down Expand Up @@ -559,12 +562,14 @@ impl crate::coord::Coordinator {
// n.b. this index_id identifies a transient index the
// caller created, so it is guaranteed to be on
// `compute_instance`.
id: index_id,
//
// TODO(parkmycar): Update this comment.
output,
key: index_key,
permutation: index_permutation,
thinned_arity: index_thinned_arity,
}) => {
let output_ids = dataflow.export_ids().collect();
let output_ids = dataflow.exported_index_ids().collect();

// Very important: actually create the dataflow (here, so we can destructure).
self.controller
Expand All @@ -579,6 +584,15 @@ impl crate::coord::Coordinator {
)
.await;

let (target, index_id) = match output {
PeekOutput::Index {
transient_id: index_id,
} => (PeekTarget::Index { id: index_id }, Some(index_id)),
PeekOutput::ReadThenWrite { sink_id, .. } => {
(PeekTarget::Sinked { select_id: sink_id }, None)
}
};

// Create an identity MFP operator.
let mut map_filter_project = mz_expr::MapFilterProject::new(source_arity);
map_filter_project.permute_fn(
Expand All @@ -588,9 +602,9 @@ impl crate::coord::Coordinator {
let map_filter_project = mfp_to_safe_plan(map_filter_project)?;
(
(None, timestamp, map_filter_project),
Some(index_id),
index_id,
false,
PeekTarget::Index { id: index_id },
target,
StatementExecutionStrategy::Standard,
)
}
Expand Down Expand Up @@ -658,6 +672,9 @@ impl crate::coord::Coordinator {
Err(e) => PeekResponseUnary::Error(e),
}
}
PeekResponse::Staged(response) => {
PeekResponseUnary::Batches(response.staged_batches)
}
PeekResponse::Canceled => PeekResponseUnary::Canceled,
PeekResponse::Error(e) => PeekResponseUnary::Error(e),
},
Expand Down
2 changes: 1 addition & 1 deletion src/adapter/src/coord/sequencer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ impl Coordinator {
session,
);
}
CopyFromSource::Url(_) => {
CopyFromSource::Url(_) | CopyFromSource::AwsS3 { .. } => {
self.sequence_copy_from(ctx, plan, target_cluster).await;
}
},
Expand Down
Loading
Loading