From 519906b25951862b492c4b3fff8fa376573e3b55 Mon Sep 17 00:00:00 2001
From: chrysn <chrysn@fsfe.org>
Date: Fri, 16 Nov 2018 10:52:59 +0100
Subject: [PATCH 1/6] tests: Add case with multiple infinite (byte) strings

This should trigger if anything goes wrong with the management of
buffers for infinite lists during the re-initialization (eg. truncating
the buffer is missed).
---
 tests/de.rs | 10 ++++++++++
 1 file changed, 10 insertions(+)
diff --git a/tests/de.rs b/tests/de.rs
index e28bfa36..8fbec056 100644
--- a/tests/de.rs
+++ b/tests/de.rs
@@ -113,6 +113,16 @@ fn test_indefinite_byte_string() {
     assert_eq!(value.unwrap(), Value::Bytes(b"\x01#Eg".to_vec()));
 }
 
+#[test]
+fn test_multiple_indefinite_strings() {
+    // This assures that buffer rewinding in infinite buffers works as intended.
+    let value: error::Result<Value> = de::from_slice(b"\x82\x7f\x65Mary \x64Had \x62a \x67Little \x64Lamb\xff\x5f\x42\x01\x23\x42\x45\x67\xff");
+    assert_eq!(value.unwrap(), Value::Array(vec![
+        Value::String("Mary Had a Little Lamb".to_owned()),
+        Value::Bytes(b"\x01#Eg".to_vec())
+        ]));
+}
+
 #[test]
 fn test_float() {
     let value: error::Result<Value> = de::from_slice(b"\xfa\x47\xc3\x50\x00");

From 1107667702faa19d4f1037f98e68ccdc8644859a Mon Sep 17 00:00:00 2001
From: chrysn <chrysn@fsfe.org>
Date: Fri, 16 Nov 2018 11:11:22 +0100
Subject: [PATCH 2/6] tests: Add a case for various EOFs inside indefinite
 strings

---
 tests/de.rs | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tests/de.rs b/tests/de.rs
index 8fbec056..491a9475 100644
--- a/tests/de.rs
+++ b/tests/de.rs
@@ -243,6 +243,20 @@ fn stream_deserializer_eof() {
     assert!(it.next().unwrap().unwrap_err().is_eof());
 }
 
+#[test]
+fn stream_deserializer_eof_in_indefinite() {
+    let slice = b"\x7f\x65Mary \x64Had \x62a \x67Little \x64Lamb\xff";
+    let indices: &[usize] = &[
+        2, // announcement but no data
+        10, // mid-buffer EOF
+        12, // neither new element nor end marker
+    ];
+    for end_of_slice in indices {
+        let mut it = Deserializer::from_slice(&slice[..*end_of_slice]).into_iter::<Value>();
+        assert!(it.next().unwrap().unwrap_err().is_eof());
+    }
+}
+
 #[test]
 fn test_large_bytes() {
     let expected = (0..2 * 1024 * 1024).map(|i| (i * 7) as u8).collect::<Vec<_>>();

From 9cca031dc88f5a147c78375ec7db6010e7b0bfef Mon Sep 17 00:00:00 2001
From: chrysn <chrysn@fsfe.org>
Date: Fri, 16 Nov 2018 09:25:15 +0100
Subject: [PATCH 3/6] IO Reader: Use provided read_exact function

The provided read_exact method already does exactly what read_into would
do, including looping while ErrorKind::Interrupted is returned.
---
 src/read.rs | 26 +++++++-------------------
 1 file changed, 7 insertions(+), 19 deletions(-)

diff --git a/src/read.rs b/src/read.rs
index e30b84a0..5ca50c50 100644
--- a/src/read.rs
+++ b/src/read.rs
@@ -133,26 +133,14 @@ where
         Ok(Reference::Copied)
     }
 
-    fn read_into(&mut self, mut buf: &mut [u8]) -> Result<()> {
-        while !buf.is_empty() {
-            match self.reader.read(buf) {
-                Ok(0) => {
-                    return Err(Error::syntax(
-                        ErrorCode::EofWhileParsingValue,
-                        self.offset(),
-                    ))
-                }
-                Ok(count) => {
-                    buf = &mut {
-                        buf
-                    }[count..]
-                }
-                Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {}
-                Err(e) => return Err(Error::io(e)),
+    fn read_into(&mut self, buf: &mut [u8]) -> Result<()> {
+        self.reader.read_exact(buf).map_err(|e| {
+            if e.kind() == io::ErrorKind::UnexpectedEof {
+                Error::syntax(ErrorCode::EofWhileParsingValue, self.offset())
+            } else {
+                Error::io(e)
             }
-        }
-
-        Ok(())
+        })
     }
 
     #[inline]

From 708d981e577494d26a15568dae27542fe83bb1e6 Mon Sep 17 00:00:00 2001
From: chrysn <chrysn@fsfe.org>
Date: Fri, 16 Nov 2018 10:13:27 +0100
Subject: [PATCH 4/6] IO Reader: Avoid clearing the scratch buffer before
 writing

The IoRead::read implementation used to manage the length of the vector
and its content independently, necessitating a zero-initialization. In
this change, only the capacity of the vec is set in advance, and the vec
is fed the input data from a taken read slice's read_to_end method,
which populates the vector and updates its length in (optimized)
lockstep.

As a result, the buffer has the nice property of always being the right
length, so scratch_offset has become a bit redundant -- but not yet been
removed, because there's two directions to go: Either

* do away with scratch_offset (as the information is already transfered
  in the length of the vec), or

* to move the buf (whose management is currently shared responsibility
  between deserializer and reader) completely into the reader, so
  `.read(n, scratch, scratch_offset)` becomes `.clear_buffer();
  .read_into_buffer(n)`, and the fully populated buffer can then be
  fetched as `.read_buffer<'a>() -> &'a [u8]`.

  Such a scheme would be helpful in implementations for no_std
  environments (either without alloc or with fallible allocation), and
  help reduce the number of #[cfg()] around the deserializer's infinite
  string handling there.
---
 src/read.rs | 36 +++++++++++++++++++++++++++---------
 1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/src/read.rs b/src/read.rs
index 5ca50c50..77e97335 100644
--- a/src/read.rs
+++ b/src/read.rs
@@ -110,24 +110,42 @@ where
         &mut self,
         mut n: usize,
         scratch: &mut Vec<u8>,
-        mut scratch_offset: usize,
+        scratch_offset: usize,
     ) -> Result<Reference<'de>> {
+        assert!(scratch.len() == scratch_offset);
         while n > 0 {
             // defend against malicious input pretending to be huge strings by limiting growth
-            let to_read = cmp::min(n, 16 * 1024);
+            let mut to_read = cmp::min(n, 16 * 1024);
             n -= to_read;
 
-            if to_read > scratch.len() - scratch_offset {
-                scratch.resize(scratch_offset + to_read, 0);
-            }
+            scratch.reserve(to_read);
 
             if let Some(ch) = self.ch.take() {
-                scratch[scratch_offset] = ch;
-                scratch_offset += 1;
+                scratch.push(ch);
+                to_read -= 1;
             }
 
-            self.read_into(&mut scratch[scratch_offset..])?;
-            scratch_offset = scratch.len();
+            let transfer_result = {
+                // Prepare for take() (which consumes its reader) by creating a reference adaptor
+                // that'll only live in this block
+                let reference = self.reader.by_ref();
+                // Append the first to_read bytes of the reader to the scratch vector (or up to
+                // an error or EOF indicated by a shorter read)
+                let mut taken = reference.take(to_read as u64);
+                taken.read_to_end(scratch)
+            };
+            match transfer_result {
+                Ok(r) if r == to_read => (),
+                Ok(_) => {
+                    return Err(Error::syntax(
+                        ErrorCode::EofWhileParsingValue,
+                        self.offset(),
+                    ));
+                },
+                Err(e) => {
+                    return Err(Error::io(e))
+                }
+            }
         }
 
         Ok(Reference::Copied)

From 67c3b49bddcbf71abb0e6752d63510629cb8d503 Mon Sep 17 00:00:00 2001
From: chrysn <chrysn@fsfe.org>
Date: Sat, 17 Nov 2018 13:47:41 +0100
Subject: [PATCH 5/6] IO Reader: Don't loop in 16k chunks

The splitting of reads into 16k chunks to avoid resource exhaustion has
become obsolete since .take() / .read_to_end() is used, as its
implementation already takes care of allocating in sane chunk sizes.

The .reserve() call for the small values ensures that we use the
available knowledge in the typical cases.

(Given indenting changes, this commit is best viewed with
--ignore-all-space).
---
 src/read.rs | 55 ++++++++++++++++++++++-------------------------------
 1 file changed, 23 insertions(+), 32 deletions(-)

diff --git a/src/read.rs b/src/read.rs
index 77e97335..2a837a64 100644
--- a/src/read.rs
+++ b/src/read.rs
@@ -113,42 +113,33 @@ where
         scratch_offset: usize,
     ) -> Result<Reference<'de>> {
         assert!(scratch.len() == scratch_offset);
-        while n > 0 {
-            // defend against malicious input pretending to be huge strings by limiting growth
-            let mut to_read = cmp::min(n, 16 * 1024);
-            n -= to_read;
 
-            scratch.reserve(to_read);
+        // defend against malicious input pretending to be huge strings by limiting growth
+        scratch.reserve(cmp::min(n, 16 * 1024));
 
-            if let Some(ch) = self.ch.take() {
-                scratch.push(ch);
-                to_read -= 1;
-            }
-
-            let transfer_result = {
-                // Prepare for take() (which consumes its reader) by creating a reference adaptor
-                // that'll only live in this block
-                let reference = self.reader.by_ref();
-                // Append the first to_read bytes of the reader to the scratch vector (or up to
-                // an error or EOF indicated by a shorter read)
-                let mut taken = reference.take(to_read as u64);
-                taken.read_to_end(scratch)
-            };
-            match transfer_result {
-                Ok(r) if r == to_read => (),
-                Ok(_) => {
-                    return Err(Error::syntax(
-                        ErrorCode::EofWhileParsingValue,
-                        self.offset(),
-                    ));
-                },
-                Err(e) => {
-                    return Err(Error::io(e))
-                }
-            }
+        if let Some(ch) = self.ch.take() {
+            scratch.push(ch);
+            n -= 1;
         }
 
-        Ok(Reference::Copied)
+        let transfer_result = {
+            // Prepare for take() (which consumes its reader) by creating a reference adaptor
+            // that'll only live in this block
+            let reference = self.reader.by_ref();
+            // Append the first n bytes of the reader to the scratch vector (or up to
+            // an error or EOF indicated by a shorter read)
+            let mut taken = reference.take(n as u64);
+            taken.read_to_end(scratch)
+        };
+
+        match transfer_result {
+            Ok(r) if r == n => Ok(Reference::Copied),
+            Ok(_) => Err(Error::syntax(
+                    ErrorCode::EofWhileParsingValue,
+                    self.offset(),
+                )),
+            Err(e) => Err(Error::io(e)),
+        }
     }
 
     fn read_into(&mut self, buf: &mut [u8]) -> Result<()> {

From 6814cb9cc26aef3d14d533ff3a2f121f29167918 Mon Sep 17 00:00:00 2001
From: chrysn <chrysn@fsfe.org>
Date: Wed, 21 Nov 2018 16:43:14 +0100
Subject: [PATCH 6/6] tests: Add zero-length byte strings into indefinite
 sequences

Those are pointless to add in real data, but trigger code paths
otherwise less trod.
---
 tests/de.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/de.rs b/tests/de.rs
index 491a9475..a78c97b3 100644
--- a/tests/de.rs
+++ b/tests/de.rs
@@ -103,7 +103,7 @@ fn test_indefinite_list() {
 
 #[test]
 fn test_indefinite_string() {
-    let value: error::Result<Value> = de::from_slice(b"\x7f\x65Mary \x64Had \x62a \x67Little \x64Lamb\xff");
+    let value: error::Result<Value> = de::from_slice(b"\x7f\x65Mary \x64Had \x62a \x67Little \x60\x64Lamb\xff");
     assert_eq!(value.unwrap(), Value::String("Mary Had a Little Lamb".to_owned()));
 }
 
@@ -116,7 +116,7 @@ fn test_indefinite_byte_string() {
 #[test]
 fn test_multiple_indefinite_strings() {
     // This assures that buffer rewinding in infinite buffers works as intended.
-    let value: error::Result<Value> = de::from_slice(b"\x82\x7f\x65Mary \x64Had \x62a \x67Little \x64Lamb\xff\x5f\x42\x01\x23\x42\x45\x67\xff");
+    let value: error::Result<Value> = de::from_slice(b"\x82\x7f\x65Mary \x64Had \x62a \x67Little \x60\x64Lamb\xff\x5f\x42\x01\x23\x42\x45\x67\xff");
     assert_eq!(value.unwrap(), Value::Array(vec![
         Value::String("Mary Had a Little Lamb".to_owned()),
         Value::Bytes(b"\x01#Eg".to_vec())
@@ -245,7 +245,7 @@ fn stream_deserializer_eof() {
 
 #[test]
 fn stream_deserializer_eof_in_indefinite() {
-    let slice = b"\x7f\x65Mary \x64Had \x62a \x67Little \x64Lamb\xff";
+    let slice = b"\x7f\x65Mary \x64Had \x62a \x60\x67Little \x60\x64Lamb\xff";
     let indices: &[usize] = &[
         2, // announcement but no data
         10, // mid-buffer EOF