From 519906b25951862b492c4b3fff8fa376573e3b55 Mon Sep 17 00:00:00 2001 From: chrysn Date: Fri, 16 Nov 2018 10:52:59 +0100 Subject: [PATCH 1/6] tests: Add case with multiple infinite (byte) strings This should trigger if anything goes wrong with the management of buffers for infinite lists during the re-initialization (eg. truncating the buffer is missed). --- tests/de.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/de.rs b/tests/de.rs index e28bfa36..8fbec056 100644 --- a/tests/de.rs +++ b/tests/de.rs @@ -113,6 +113,16 @@ fn test_indefinite_byte_string() { assert_eq!(value.unwrap(), Value::Bytes(b"\x01#Eg".to_vec())); } +#[test] +fn test_multiple_indefinite_strings() { + // This assures that buffer rewinding in infinite buffers works as intended. + let value: error::Result = de::from_slice(b"\x82\x7f\x65Mary \x64Had \x62a \x67Little \x64Lamb\xff\x5f\x42\x01\x23\x42\x45\x67\xff"); + assert_eq!(value.unwrap(), Value::Array(vec![ + Value::String("Mary Had a Little Lamb".to_owned()), + Value::Bytes(b"\x01#Eg".to_vec()) + ])); +} + #[test] fn test_float() { let value: error::Result = de::from_slice(b"\xfa\x47\xc3\x50\x00"); From 1107667702faa19d4f1037f98e68ccdc8644859a Mon Sep 17 00:00:00 2001 From: chrysn Date: Fri, 16 Nov 2018 11:11:22 +0100 Subject: [PATCH 2/6] tests: Add a case for various EOFs inside indefinite strings --- tests/de.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/de.rs b/tests/de.rs index 8fbec056..491a9475 100644 --- a/tests/de.rs +++ b/tests/de.rs @@ -243,6 +243,20 @@ fn stream_deserializer_eof() { assert!(it.next().unwrap().unwrap_err().is_eof()); } +#[test] +fn stream_deserializer_eof_in_indefinite() { + let slice = b"\x7f\x65Mary \x64Had \x62a \x67Little \x64Lamb\xff"; + let indices: &[usize] = &[ + 2, // announcement but no data + 10, // mid-buffer EOF + 12, // neither new element nor end marker + ]; + for end_of_slice in indices { + let mut it = Deserializer::from_slice(&slice[..*end_of_slice]).into_iter::(); + assert!(it.next().unwrap().unwrap_err().is_eof()); + } +} + #[test] fn test_large_bytes() { let expected = (0..2 * 1024 * 1024).map(|i| (i * 7) as u8).collect::>(); From 9cca031dc88f5a147c78375ec7db6010e7b0bfef Mon Sep 17 00:00:00 2001 From: chrysn Date: Fri, 16 Nov 2018 09:25:15 +0100 Subject: [PATCH 3/6] IO Reader: Use provided read_exact function The provided read_exact method already does exactly what read_into would do, including looping while ErrorKind::Interrupted is returned. --- src/read.rs | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/src/read.rs b/src/read.rs index e30b84a0..5ca50c50 100644 --- a/src/read.rs +++ b/src/read.rs @@ -133,26 +133,14 @@ where Ok(Reference::Copied) } - fn read_into(&mut self, mut buf: &mut [u8]) -> Result<()> { - while !buf.is_empty() { - match self.reader.read(buf) { - Ok(0) => { - return Err(Error::syntax( - ErrorCode::EofWhileParsingValue, - self.offset(), - )) - } - Ok(count) => { - buf = &mut { - buf - }[count..] - } - Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {} - Err(e) => return Err(Error::io(e)), + fn read_into(&mut self, buf: &mut [u8]) -> Result<()> { + self.reader.read_exact(buf).map_err(|e| { + if e.kind() == io::ErrorKind::UnexpectedEof { + Error::syntax(ErrorCode::EofWhileParsingValue, self.offset()) + } else { + Error::io(e) } - } - - Ok(()) + }) } #[inline] From 708d981e577494d26a15568dae27542fe83bb1e6 Mon Sep 17 00:00:00 2001 From: chrysn Date: Fri, 16 Nov 2018 10:13:27 +0100 Subject: [PATCH 4/6] IO Reader: Avoid clearing the scratch buffer before writing The IoRead::read implementation used to manage the length of the vector and its content independently, necessitating a zero-initialization. In this change, only the capacity of the vec is set in advance, and the vec is fed the input data from a taken read slice's read_to_end method, which populates the vector and updates its length in (optimized) lockstep. As a result, the buffer has the nice property of always being the right length, so scratch_offset has become a bit redundant -- but not yet been removed, because there's two directions to go: Either * do away with scratch_offset (as the information is already transfered in the length of the vec), or * to move the buf (whose management is currently shared responsibility between deserializer and reader) completely into the reader, so `.read(n, scratch, scratch_offset)` becomes `.clear_buffer(); .read_into_buffer(n)`, and the fully populated buffer can then be fetched as `.read_buffer<'a>() -> &'a [u8]`. Such a scheme would be helpful in implementations for no_std environments (either without alloc or with fallible allocation), and help reduce the number of #[cfg()] around the deserializer's infinite string handling there. --- src/read.rs | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/src/read.rs b/src/read.rs index 5ca50c50..77e97335 100644 --- a/src/read.rs +++ b/src/read.rs @@ -110,24 +110,42 @@ where &mut self, mut n: usize, scratch: &mut Vec, - mut scratch_offset: usize, + scratch_offset: usize, ) -> Result> { + assert!(scratch.len() == scratch_offset); while n > 0 { // defend against malicious input pretending to be huge strings by limiting growth - let to_read = cmp::min(n, 16 * 1024); + let mut to_read = cmp::min(n, 16 * 1024); n -= to_read; - if to_read > scratch.len() - scratch_offset { - scratch.resize(scratch_offset + to_read, 0); - } + scratch.reserve(to_read); if let Some(ch) = self.ch.take() { - scratch[scratch_offset] = ch; - scratch_offset += 1; + scratch.push(ch); + to_read -= 1; } - self.read_into(&mut scratch[scratch_offset..])?; - scratch_offset = scratch.len(); + let transfer_result = { + // Prepare for take() (which consumes its reader) by creating a reference adaptor + // that'll only live in this block + let reference = self.reader.by_ref(); + // Append the first to_read bytes of the reader to the scratch vector (or up to + // an error or EOF indicated by a shorter read) + let mut taken = reference.take(to_read as u64); + taken.read_to_end(scratch) + }; + match transfer_result { + Ok(r) if r == to_read => (), + Ok(_) => { + return Err(Error::syntax( + ErrorCode::EofWhileParsingValue, + self.offset(), + )); + }, + Err(e) => { + return Err(Error::io(e)) + } + } } Ok(Reference::Copied) From 67c3b49bddcbf71abb0e6752d63510629cb8d503 Mon Sep 17 00:00:00 2001 From: chrysn Date: Sat, 17 Nov 2018 13:47:41 +0100 Subject: [PATCH 5/6] IO Reader: Don't loop in 16k chunks The splitting of reads into 16k chunks to avoid resource exhaustion has become obsolete since .take() / .read_to_end() is used, as its implementation already takes care of allocating in sane chunk sizes. The .reserve() call for the small values ensures that we use the available knowledge in the typical cases. (Given indenting changes, this commit is best viewed with --ignore-all-space). --- src/read.rs | 55 ++++++++++++++++++++++------------------------------- 1 file changed, 23 insertions(+), 32 deletions(-) diff --git a/src/read.rs b/src/read.rs index 77e97335..2a837a64 100644 --- a/src/read.rs +++ b/src/read.rs @@ -113,42 +113,33 @@ where scratch_offset: usize, ) -> Result> { assert!(scratch.len() == scratch_offset); - while n > 0 { - // defend against malicious input pretending to be huge strings by limiting growth - let mut to_read = cmp::min(n, 16 * 1024); - n -= to_read; - scratch.reserve(to_read); + // defend against malicious input pretending to be huge strings by limiting growth + scratch.reserve(cmp::min(n, 16 * 1024)); - if let Some(ch) = self.ch.take() { - scratch.push(ch); - to_read -= 1; - } - - let transfer_result = { - // Prepare for take() (which consumes its reader) by creating a reference adaptor - // that'll only live in this block - let reference = self.reader.by_ref(); - // Append the first to_read bytes of the reader to the scratch vector (or up to - // an error or EOF indicated by a shorter read) - let mut taken = reference.take(to_read as u64); - taken.read_to_end(scratch) - }; - match transfer_result { - Ok(r) if r == to_read => (), - Ok(_) => { - return Err(Error::syntax( - ErrorCode::EofWhileParsingValue, - self.offset(), - )); - }, - Err(e) => { - return Err(Error::io(e)) - } - } + if let Some(ch) = self.ch.take() { + scratch.push(ch); + n -= 1; } - Ok(Reference::Copied) + let transfer_result = { + // Prepare for take() (which consumes its reader) by creating a reference adaptor + // that'll only live in this block + let reference = self.reader.by_ref(); + // Append the first n bytes of the reader to the scratch vector (or up to + // an error or EOF indicated by a shorter read) + let mut taken = reference.take(n as u64); + taken.read_to_end(scratch) + }; + + match transfer_result { + Ok(r) if r == n => Ok(Reference::Copied), + Ok(_) => Err(Error::syntax( + ErrorCode::EofWhileParsingValue, + self.offset(), + )), + Err(e) => Err(Error::io(e)), + } } fn read_into(&mut self, buf: &mut [u8]) -> Result<()> { From 6814cb9cc26aef3d14d533ff3a2f121f29167918 Mon Sep 17 00:00:00 2001 From: chrysn Date: Wed, 21 Nov 2018 16:43:14 +0100 Subject: [PATCH 6/6] tests: Add zero-length byte strings into indefinite sequences Those are pointless to add in real data, but trigger code paths otherwise less trod. --- tests/de.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/de.rs b/tests/de.rs index 491a9475..a78c97b3 100644 --- a/tests/de.rs +++ b/tests/de.rs @@ -103,7 +103,7 @@ fn test_indefinite_list() { #[test] fn test_indefinite_string() { - let value: error::Result = de::from_slice(b"\x7f\x65Mary \x64Had \x62a \x67Little \x64Lamb\xff"); + let value: error::Result = de::from_slice(b"\x7f\x65Mary \x64Had \x62a \x67Little \x60\x64Lamb\xff"); assert_eq!(value.unwrap(), Value::String("Mary Had a Little Lamb".to_owned())); } @@ -116,7 +116,7 @@ fn test_indefinite_byte_string() { #[test] fn test_multiple_indefinite_strings() { // This assures that buffer rewinding in infinite buffers works as intended. - let value: error::Result = de::from_slice(b"\x82\x7f\x65Mary \x64Had \x62a \x67Little \x64Lamb\xff\x5f\x42\x01\x23\x42\x45\x67\xff"); + let value: error::Result = de::from_slice(b"\x82\x7f\x65Mary \x64Had \x62a \x67Little \x60\x64Lamb\xff\x5f\x42\x01\x23\x42\x45\x67\xff"); assert_eq!(value.unwrap(), Value::Array(vec![ Value::String("Mary Had a Little Lamb".to_owned()), Value::Bytes(b"\x01#Eg".to_vec()) @@ -245,7 +245,7 @@ fn stream_deserializer_eof() { #[test] fn stream_deserializer_eof_in_indefinite() { - let slice = b"\x7f\x65Mary \x64Had \x62a \x67Little \x64Lamb\xff"; + let slice = b"\x7f\x65Mary \x64Had \x62a \x60\x67Little \x60\x64Lamb\xff"; let indices: &[usize] = &[ 2, // announcement but no data 10, // mid-buffer EOF