-
Notifications
You must be signed in to change notification settings - Fork 57
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve performance of decoding #62
base: main
Are you sure you want to change the base?
Changes from all commits
870590e
3b75b32
ff1d115
4fd0ed8
3e2aa25
f3251bd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -40,9 +40,9 @@ | |||||||||||||||||
#[cfg(feature = "alloc")] | ||||||||||||||||||
extern crate alloc; | ||||||||||||||||||
#[cfg(feature = "alloc")] | ||||||||||||||||||
use alloc::{string::String, vec::Vec}; | ||||||||||||||||||
use alloc::{string::String, vec, vec::Vec}; | ||||||||||||||||||
|
||||||||||||||||||
use core::iter; | ||||||||||||||||||
use core::{iter, u8}; | ||||||||||||||||||
|
||||||||||||||||||
mod error; | ||||||||||||||||||
pub use crate::error::FromHexError; | ||||||||||||||||||
|
@@ -172,16 +172,47 @@ pub trait FromHex: Sized { | |||||||||||||||||
fn from_hex<T: AsRef<[u8]>>(hex: T) -> Result<Self, Self::Error>; | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
const fn val(c: u8, idx: usize) -> Result<u8, FromHexError> { | ||||||||||||||||||
match c { | ||||||||||||||||||
b'A'..=b'F' => Ok(c - b'A' + 10), | ||||||||||||||||||
b'a'..=b'f' => Ok(c - b'a' + 10), | ||||||||||||||||||
b'0'..=b'9' => Ok(c - b'0'), | ||||||||||||||||||
_ => Err(FromHexError::InvalidHexCharacter { | ||||||||||||||||||
c: c as char, | ||||||||||||||||||
const __: u8 = u8::MAX; | ||||||||||||||||||
|
||||||||||||||||||
// Lookup table for ascii to hex decoding. | ||||||||||||||||||
#[rustfmt::skip] | ||||||||||||||||||
static DECODE_TABLE: [u8; 256] = [ | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this
Suggested change
If this table were There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The lookup table is relatively large, so using const may bloat the binary size by inlining. IIRC, the standard library uses static for that use-case. For example: https://github.com/rust-lang/rust/blob/1.54.0/library/core/src/unicode/unicode_data.rs There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. By the way, benchmarking (on my machine) shows that changing this to const does not change the performance. |
||||||||||||||||||
// 1 2 3 4 5 6 7 8 9 a b c d e f | ||||||||||||||||||
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 0 | ||||||||||||||||||
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 1 | ||||||||||||||||||
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2 | ||||||||||||||||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, __, __, __, __, __, __, // 3 | ||||||||||||||||||
__, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 4 | ||||||||||||||||||
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 5 | ||||||||||||||||||
__, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 6 | ||||||||||||||||||
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7 | ||||||||||||||||||
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8 | ||||||||||||||||||
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9 | ||||||||||||||||||
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // a | ||||||||||||||||||
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // b | ||||||||||||||||||
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // c | ||||||||||||||||||
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // d | ||||||||||||||||||
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // e | ||||||||||||||||||
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // f | ||||||||||||||||||
]; | ||||||||||||||||||
|
||||||||||||||||||
#[inline] | ||||||||||||||||||
fn val(bytes: &[u8], idx: usize) -> Result<u8, FromHexError> { | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The new implementation of val function cannot be const There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the table were |
||||||||||||||||||
let upper = DECODE_TABLE[bytes[0] as usize]; | ||||||||||||||||||
let lower = DECODE_TABLE[bytes[1] as usize]; | ||||||||||||||||||
if upper == u8::MAX { | ||||||||||||||||||
return Err(FromHexError::InvalidHexCharacter { | ||||||||||||||||||
c: bytes[0] as char, | ||||||||||||||||||
index: idx, | ||||||||||||||||||
}), | ||||||||||||||||||
}); | ||||||||||||||||||
} | ||||||||||||||||||
if lower == u8::MAX { | ||||||||||||||||||
return Err(FromHexError::InvalidHexCharacter { | ||||||||||||||||||
c: bytes[1] as char, | ||||||||||||||||||
index: idx + 1, | ||||||||||||||||||
}); | ||||||||||||||||||
} | ||||||||||||||||||
Ok((upper << 4) | lower) | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What does this bitshift + or do?
Suggested change
(does not hurt to explain?) |
||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
#[cfg(feature = "alloc")] | ||||||||||||||||||
|
@@ -194,10 +225,9 @@ impl FromHex for Vec<u8> { | |||||||||||||||||
return Err(FromHexError::OddLength); | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
hex.chunks(2) | ||||||||||||||||||
.enumerate() | ||||||||||||||||||
.map(|(i, pair)| Ok(val(pair[0], 2 * i)? << 4 | val(pair[1], 2 * i + 1)?)) | ||||||||||||||||||
.collect() | ||||||||||||||||||
let mut out = vec![0; hex.len() / 2]; | ||||||||||||||||||
decode_to_slice(hex, &mut out)?; | ||||||||||||||||||
Ok(out) | ||||||||||||||||||
} | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
|
@@ -309,6 +339,7 @@ pub fn decode<T: AsRef<[u8]>>(data: T) -> Result<Vec<u8>, FromHexError> { | |||||||||||||||||
/// assert_eq!(hex::decode_to_slice("6b697769", &mut bytes as &mut [u8]), Ok(())); | ||||||||||||||||||
/// assert_eq!(&bytes, b"kiwi"); | ||||||||||||||||||
/// ``` | ||||||||||||||||||
#[inline] | ||||||||||||||||||
pub fn decode_to_slice<T: AsRef<[u8]>>(data: T, out: &mut [u8]) -> Result<(), FromHexError> { | ||||||||||||||||||
let data = data.as_ref(); | ||||||||||||||||||
|
||||||||||||||||||
|
@@ -319,8 +350,8 @@ pub fn decode_to_slice<T: AsRef<[u8]>>(data: T, out: &mut [u8]) -> Result<(), Fr | |||||||||||||||||
return Err(FromHexError::InvalidStringLength); | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
for (i, byte) in out.iter_mut().enumerate() { | ||||||||||||||||||
*byte = val(data[2 * i], 2 * i)? << 4 | val(data[2 * i + 1], 2 * i + 1)?; | ||||||||||||||||||
for (i, (data, byte)) in data.chunks_exact(2).zip(out).enumerate() { | ||||||||||||||||||
*byte = val(data, 2 * i)?; | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
Ok(()) | ||||||||||||||||||
|
@@ -340,7 +371,7 @@ fn generate_iter(len: usize) -> impl Iterator<Item = (usize, usize)> { | |||||||||||||||||
// the inverse of `val`. | ||||||||||||||||||
#[inline] | ||||||||||||||||||
#[must_use] | ||||||||||||||||||
const fn byte2hex(byte: u8, table: &[u8; 16]) -> (u8, u8) { | ||||||||||||||||||
fn byte2hex(byte: u8, table: &[u8; 16]) -> (u8, u8) { | ||||||||||||||||||
taiki-e marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||
let high = table[((byte & 0xf0) >> 4) as usize]; | ||||||||||||||||||
let low = table[(byte & 0x0f) as usize]; | ||||||||||||||||||
|
||||||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
uhm I do not think that you should import the
u8 module
, whereu8::MAX
is deprecated.The
u8
type itself has an associated constantu8::MAX
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I used u8 module because the u8::MAX associated constant requires a relatively new compiler.
See also: #55
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You have a point. Personally, I do not really care about MSRV (because I want to use the new features and updating the compiler is just a simple
rustup update
) so I did not think much about it.#58 (comment)