-
Notifications
You must be signed in to change notification settings - Fork 170
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Move shuffle block decompression and decoding to native code and add LZ4 & Snappy support #1192
feat: Move shuffle block decompression and decoding to native code and add LZ4 & Snappy support #1192
Changes from 34 commits
8ce9bb5
a9a0593
11320a5
e2f28f9
c97eb58
4ffe47d
68d2331
a3fb105
b593e80
4078551
f286309
fbc2124
bed543a
e13d72f
f1ed927
45b020e
587feee
ea03a3d
b56089b
f66bced
0b2f0e9
ad1adc1
3e15b12
1c08a4b
7dd2ff6
1fc3d49
5a3fb2e
2b88bbd
b4b6aff
78340a1
69b54d9
f41180d
9f176c9
dd4f259
7e4ddc9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -52,6 +52,9 @@ serde = { version = "1", features = ["derive"] } | |
lazy_static = "1.4.0" | ||
prost = "0.12.1" | ||
jni = "0.21" | ||
snap = "1.1" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. awesome, its really challenging to find a well maintained snappy Rust library. |
||
# we disable default features in lz4_flex to force the use of the faster unsafe encoding and decoding implementation | ||
lz4_flex = { version = "0.11.3", default-features = false } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a somewhat confusingly named feature flag. Can we leave a comment here that we're enabling unsafe encode and decode for performance? |
||
zstd = "0.11" | ||
rand = { workspace = true} | ||
num = { workspace = true } | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,23 +35,52 @@ fn criterion_benchmark(c: &mut Criterion) { | |
group.bench_function("shuffle_writer: encode (no compression))", |b| { | ||
let batch = create_batch(8192, true); | ||
let mut buffer = vec![]; | ||
let mut cursor = Cursor::new(&mut buffer); | ||
let ipc_time = Time::default(); | ||
b.iter(|| write_ipc_compressed(&batch, &mut cursor, &CompressionCodec::None, &ipc_time)); | ||
b.iter(|| { | ||
buffer.clear(); | ||
let mut cursor = Cursor::new(&mut buffer); | ||
write_ipc_compressed(&batch, &mut cursor, &CompressionCodec::None, &ipc_time) | ||
}); | ||
}); | ||
group.bench_function("shuffle_writer: encode and compress (snappy)", |b| { | ||
let batch = create_batch(8192, true); | ||
let mut buffer = vec![]; | ||
let ipc_time = Time::default(); | ||
b.iter(|| { | ||
buffer.clear(); | ||
let mut cursor = Cursor::new(&mut buffer); | ||
write_ipc_compressed(&batch, &mut cursor, &CompressionCodec::Snappy, &ipc_time) | ||
}); | ||
}); | ||
group.bench_function("shuffle_writer: encode and compress (lz4)", |b| { | ||
let batch = create_batch(8192, true); | ||
let mut buffer = vec![]; | ||
let ipc_time = Time::default(); | ||
b.iter(|| { | ||
buffer.clear(); | ||
let mut cursor = Cursor::new(&mut buffer); | ||
write_ipc_compressed(&batch, &mut cursor, &CompressionCodec::Lz4Frame, &ipc_time) | ||
}); | ||
}); | ||
group.bench_function("shuffle_writer: encode and compress (zstd level 1)", |b| { | ||
let batch = create_batch(8192, true); | ||
let mut buffer = vec![]; | ||
let mut cursor = Cursor::new(&mut buffer); | ||
let ipc_time = Time::default(); | ||
b.iter(|| write_ipc_compressed(&batch, &mut cursor, &CompressionCodec::Zstd(1), &ipc_time)); | ||
b.iter(|| { | ||
buffer.clear(); | ||
let mut cursor = Cursor::new(&mut buffer); | ||
write_ipc_compressed(&batch, &mut cursor, &CompressionCodec::Zstd(1), &ipc_time) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think zstd should have faster negative levels as well (-4 or -5 might come close), would be interesting to see how it compares. Not sure if it is available in the rust bindings. |
||
}); | ||
}); | ||
group.bench_function("shuffle_writer: encode and compress (zstd level 6)", |b| { | ||
let batch = create_batch(8192, true); | ||
let mut buffer = vec![]; | ||
let mut cursor = Cursor::new(&mut buffer); | ||
let ipc_time = Time::default(); | ||
b.iter(|| write_ipc_compressed(&batch, &mut cursor, &CompressionCodec::Zstd(6), &ipc_time)); | ||
b.iter(|| { | ||
buffer.clear(); | ||
let mut cursor = Cursor::new(&mut buffer); | ||
write_ipc_compressed(&batch, &mut cursor, &CompressionCodec::Zstd(6), &ipc_time) | ||
}); | ||
}); | ||
group.bench_function("shuffle_writer: end to end", |b| { | ||
let ctx = SessionContext::new(); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit since the config name now has
zstd
, the constant name should ideally reflect it, but optional