Skip to content
This repository has been archived by the owner on Aug 15, 2021. It is now read-only.

IoReader enhancements #80

Merged
merged 6 commits into from
Nov 23, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 32 additions & 35 deletions src/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,49 +110,46 @@ where
&mut self,
mut n: usize,
scratch: &mut Vec<u8>,
mut scratch_offset: usize,
scratch_offset: usize,
) -> Result<Reference<'de>> {
while n > 0 {
// defend against malicious input pretending to be huge strings by limiting growth
let to_read = cmp::min(n, 16 * 1024);
n -= to_read;

if to_read > scratch.len() - scratch_offset {
scratch.resize(scratch_offset + to_read, 0);
}
assert!(scratch.len() == scratch_offset);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this always the case? If it is, we should just remove the scratch_offset parameter.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, at least while the Deserializer uses it as it does now.

I haven't changed the internal API part yet because there are two ways of going forward here (drop scratch_offset, or drop the buf.clear() in the reader and make scratch_offset a boolean "start anew"), see d1903ff -- but I think those are better explored in a separate change.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok cool - it seems fine to follow up in a separate change.


if let Some(ch) = self.ch.take() {
scratch[scratch_offset] = ch;
scratch_offset += 1;
}
// defend against malicious input pretending to be huge strings by limiting growth
scratch.reserve(cmp::min(n, 16 * 1024));

self.read_into(&mut scratch[scratch_offset..])?;
scratch_offset = scratch.len();
if let Some(ch) = self.ch.take() {
scratch.push(ch);
n -= 1;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we ever call this method with n == 0?

}

Ok(Reference::Copied)
let transfer_result = {
// Prepare for take() (which consumes its reader) by creating a reference adaptor
// that'll only live in this block
let reference = self.reader.by_ref();
// Append the first n bytes of the reader to the scratch vector (or up to
// an error or EOF indicated by a shorter read)
let mut taken = reference.take(n as u64);
taken.read_to_end(scratch)
};

match transfer_result {
Ok(r) if r == n => Ok(Reference::Copied),
Ok(_) => Err(Error::syntax(
ErrorCode::EofWhileParsingValue,
self.offset(),
)),
Err(e) => Err(Error::io(e)),
}
}

fn read_into(&mut self, mut buf: &mut [u8]) -> Result<()> {
while !buf.is_empty() {
match self.reader.read(buf) {
Ok(0) => {
return Err(Error::syntax(
ErrorCode::EofWhileParsingValue,
self.offset(),
))
}
Ok(count) => {
buf = &mut {
buf
}[count..]
}
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {}
Err(e) => return Err(Error::io(e)),
fn read_into(&mut self, buf: &mut [u8]) -> Result<()> {
self.reader.read_exact(buf).map_err(|e| {
if e.kind() == io::ErrorKind::UnexpectedEof {
Error::syntax(ErrorCode::EofWhileParsingValue, self.offset())
} else {
Error::io(e)
}
}

Ok(())
})
}

#[inline]
Expand Down
26 changes: 25 additions & 1 deletion tests/de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ fn test_indefinite_list() {

#[test]
fn test_indefinite_string() {
let value: error::Result<Value> = de::from_slice(b"\x7f\x65Mary \x64Had \x62a \x67Little \x64Lamb\xff");
let value: error::Result<Value> = de::from_slice(b"\x7f\x65Mary \x64Had \x62a \x67Little \x60\x64Lamb\xff");
assert_eq!(value.unwrap(), Value::String("Mary Had a Little Lamb".to_owned()));
}

Expand All @@ -113,6 +113,16 @@ fn test_indefinite_byte_string() {
assert_eq!(value.unwrap(), Value::Bytes(b"\x01#Eg".to_vec()));
}

#[test]
fn test_multiple_indefinite_strings() {
// This assures that buffer rewinding in infinite buffers works as intended.
let value: error::Result<Value> = de::from_slice(b"\x82\x7f\x65Mary \x64Had \x62a \x67Little \x60\x64Lamb\xff\x5f\x42\x01\x23\x42\x45\x67\xff");
assert_eq!(value.unwrap(), Value::Array(vec![
Value::String("Mary Had a Little Lamb".to_owned()),
Value::Bytes(b"\x01#Eg".to_vec())
]));
}

#[test]
fn test_float() {
let value: error::Result<Value> = de::from_slice(b"\xfa\x47\xc3\x50\x00");
Expand Down Expand Up @@ -233,6 +243,20 @@ fn stream_deserializer_eof() {
assert!(it.next().unwrap().unwrap_err().is_eof());
}

#[test]
fn stream_deserializer_eof_in_indefinite() {
let slice = b"\x7f\x65Mary \x64Had \x62a \x60\x67Little \x60\x64Lamb\xff";
let indices: &[usize] = &[
2, // announcement but no data
10, // mid-buffer EOF
12, // neither new element nor end marker
];
for end_of_slice in indices {
let mut it = Deserializer::from_slice(&slice[..*end_of_slice]).into_iter::<Value>();
assert!(it.next().unwrap().unwrap_err().is_eof());
}
}

#[test]
fn test_large_bytes() {
let expected = (0..2 * 1024 * 1024).map(|i| (i * 7) as u8).collect::<Vec<_>>();
Expand Down