diff --git a/.rustfmt.toml b/.rustfmt.toml new file mode 100644 index 0000000..df99c69 --- /dev/null +++ b/.rustfmt.toml @@ -0,0 +1 @@ +max_width = 80 diff --git a/Cargo.toml b/Cargo.toml index 1e45a7c..fe1cb87 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,10 @@ flate2 = "1.0.31" serde = { version = "1.0", features = ["derive"] } num_enum = "0.7.3" +[features] +default = [] +restrictive = [] + [[bin]] name = "idb-tools" path = "src/tools/tools.rs" diff --git a/src/id0.rs b/src/id0.rs index 4fdd705..10ab02f 100644 --- a/src/id0.rs +++ b/src/id0.rs @@ -26,7 +26,12 @@ pub struct IDBFileRegions { } impl IDBFileRegions { - fn read(_key: &[u8], data: &[u8], version: u16, is_64: bool) -> Result { + fn read( + _key: &[u8], + data: &[u8], + version: u16, + is_64: bool, + ) -> Result { let mut input = IdaUnpacker::new(data, is_64); // TODO detect versions with more accuracy let (start, end, eva) = match version { @@ -38,9 +43,9 @@ impl IDBFileRegions { } 700.. => { let start = input.unpack_usize()?; - let end = start - .checked_add(input.unpack_usize()?) - .ok_or_else(|| anyhow!("Overflow address in File Regions"))?; + let end = start.checked_add(input.unpack_usize()?).ok_or_else( + || anyhow!("Overflow address in File Regions"), + )?; let rva = input.unpack_usize()?; // TODO some may include an extra 0 byte at the end? if let Ok(_unknown) = input.unpack_usize() { @@ -73,9 +78,13 @@ impl<'a> FunctionsAndComments<'a> { ensure!(parse_maybe_cstr(value) == Some(&b"$ funcs"[..])); Ok(Self::Name) } - b'S' => IDBFunction::read(sub_key, value, is_64).map(Self::Function), + b'S' => { + IDBFunction::read(sub_key, value, is_64).map(Self::Function) + } // some kind of style setting, maybe setting font and background color - b'R' | b'C' if value.starts_with(&[4, 3, 2, 1]) => Ok(Self::Unknown { key, value }), + b'R' | b'C' if value.starts_with(&[4, 3, 2, 1]) => { + Ok(Self::Unknown { key, value }) + } b'C' => { let address = parse_number(sub_key, true, is_64) .ok_or_else(|| anyhow!("Invalid Comment address"))?; @@ -87,8 +96,10 @@ impl<'a> FunctionsAndComments<'a> { .ok_or_else(|| anyhow!("Invalid Comment string")) } b'R' => { - let address = parse_number(sub_key, true, is_64) - .ok_or_else(|| anyhow!("Invalid Repetable Comment address"))?; + let address = + parse_number(sub_key, true, is_64).ok_or_else(|| { + anyhow!("Invalid Repetable Comment address") + })?; parse_maybe_cstr(value) .map(|value| Self::Comment { address, @@ -145,7 +156,9 @@ impl IDBFunction { }) } - fn read_extra_regular(mut input: impl IdaUnpack) -> Result { + fn read_extra_regular( + mut input: impl IdaUnpack, + ) -> Result { // TODO Undertand the sub operation at InnerRef 5c1b89aa-5277-4c98-98f6-cec08e1946ec 0x28f98f let frame = input.unpack_usize_ext_max()?; let _unknown4 = input.unpack_dw()?; @@ -155,7 +168,10 @@ impl IDBFunction { Ok(IDBFunctionExtra::NonTail { frame }) } - fn read_extra_tail(mut input: impl IdaUnpack, address_start: u64) -> Result { + fn read_extra_tail( + mut input: impl IdaUnpack, + address_start: u64, + ) -> Result { // offset of the function owner in relation to the function start let owner_offset = input.unpack_usize()? as i64; let owner = match address_start.checked_add_signed(owner_offset) { @@ -240,12 +256,18 @@ pub struct EntryPoint { pub entry_type: Option, } -pub(crate) fn parse_number(data: &[u8], big_endian: bool, is_64: bool) -> Option { +pub(crate) fn parse_number( + data: &[u8], + big_endian: bool, + is_64: bool, +) -> Option { Some(match (data.len(), is_64, big_endian) { (8, true, true) => u64::from_be_bytes(data.try_into().unwrap()), (8, true, false) => u64::from_le_bytes(data.try_into().unwrap()), (4, false, true) => u32::from_be_bytes(data.try_into().unwrap()).into(), - (4, false, false) => u32::from_le_bytes(data.try_into().unwrap()).into(), + (4, false, false) => { + u32::from_le_bytes(data.try_into().unwrap()).into() + } _ => return None, }) } diff --git a/src/id0/address_info.rs b/src/id0/address_info.rs index 114181a..85b0878 100644 --- a/src/id0/address_info.rs +++ b/src/id0/address_info.rs @@ -32,13 +32,18 @@ impl<'a> Comments<'a> { } } -pub(crate) struct SectionAddressInfoIter<'a, I: Iterator>> { +pub(crate) struct SectionAddressInfoIter< + 'a, + I: Iterator>, +> { all_entries: &'a [ID0Entry], regions: I, current_region: AddressInfoIter<'a>, } -impl<'a, I: Iterator>> SectionAddressInfoIter<'a, I> { +impl<'a, I: Iterator>> + SectionAddressInfoIter<'a, I> +{ pub fn new(all_entries: &'a [ID0Entry], regions: I, is_64: bool) -> Self { Self { all_entries, @@ -64,8 +69,10 @@ impl<'a, I: Iterator> + 'a> Iterator Some(Err(err)) => return Some(Err(err)), }; let is_64 = self.current_region.is_64; - let start_key: Vec = crate::id0::key_from_address(region.start, is_64).collect(); - let end_key: Vec = crate::id0::key_from_address(region.end, is_64).collect(); + let start_key: Vec = + crate::id0::key_from_address(region.start, is_64).collect(); + let end_key: Vec = + crate::id0::key_from_address(region.end, is_64).collect(); let start = self .all_entries .binary_search_by_key(&&start_key[..], |b| &b.key[..]) @@ -105,9 +112,12 @@ impl<'a> Iterator for AddressInfoIter<'a> { // 1.. because it starts with '.' let addr_len = if self.is_64 { 8 } else { 4 }; let key_start = addr_len + 1; - let address = super::parse_number(¤t.key[1..key_start], true, self.is_64).unwrap(); + let address = + super::parse_number(¤t.key[1..key_start], true, self.is_64) + .unwrap(); let key = ¤t.key[key_start..]; - let Some((sub_type, id_value)) = id_subkey_from_idx(key, self.is_64) else { + let Some((sub_type, id_value)) = id_subkey_from_idx(key, self.is_64) + else { return Some(Err(anyhow!("Missing SubType"))); }; diff --git a/src/id0/btree.rs b/src/id0/btree.rs index d82f7ba..ba08385 100644 --- a/src/id0/btree.rs +++ b/src/id0/btree.rs @@ -42,7 +42,10 @@ struct ID0Header { } impl ID0Header { - pub(crate) fn read(input: &mut impl IdaGenericUnpack, buf: &mut Vec) -> Result { + pub(crate) fn read( + input: &mut impl IdaGenericUnpack, + buf: &mut Vec, + ) -> Result { buf.resize(64, 0); input.read_exact(buf)?; // TODO handle the 15 version of the header: @@ -57,7 +60,8 @@ impl ID0Header { // } let mut buf_current = &buf[..]; - let next_free_offset: u32 = bincode::deserialize_from(&mut buf_current)?; + let next_free_offset: u32 = + bincode::deserialize_from(&mut buf_current)?; let page_size: u16 = bincode::deserialize_from(&mut buf_current)?; let root_page: u32 = bincode::deserialize_from(&mut buf_current)?; let record_count: u32 = bincode::deserialize_from(&mut buf_current)?; @@ -141,7 +145,8 @@ impl ID0Section { }; buf.resize(header.page_size.into(), 0); - let mut pages = HashMap::with_capacity(header.page_count.try_into().unwrap()); + let mut pages = + HashMap::with_capacity(header.page_count.try_into().unwrap()); let mut pending_pages = vec![root_page]; loop { if pending_pages.is_empty() { @@ -154,8 +159,10 @@ impl ID0Section { } // read the full page ensure!((page_idx.get() as usize) < pages_in_section); - let page_offset = page_idx.get() as usize * header.page_size as usize; - let page_raw = &input[page_offset..page_offset + header.page_size as usize]; + let page_offset = + page_idx.get() as usize * header.page_size as usize; + let page_raw = + &input[page_offset..page_offset + header.page_size as usize]; let page = ID0Page::read(page_raw, &header)?; // put in the queue the pages that need parsing, AKA children of this page match &page { @@ -179,7 +186,8 @@ impl ID0Section { ensure!(pages.len() <= header.page_count.try_into().unwrap()); // put it all in order on the vector - let mut entries = Vec::with_capacity(header.record_count.try_into().unwrap()); + let mut entries = + Vec::with_capacity(header.record_count.try_into().unwrap()); Self::tree_to_vec(root_page, &mut pages, &mut entries); // make sure the vector is sorted @@ -223,7 +231,10 @@ impl ID0Section { self.entries.iter() } - pub(crate) fn binary_search(&self, key: impl AsRef<[u8]>) -> Result { + pub(crate) fn binary_search( + &self, + key: impl AsRef<[u8]>, + ) -> Result { let key = key.as_ref(); self.entries.binary_search_by_key(&key, |b| &b.key[..]) } @@ -255,7 +266,10 @@ impl ID0Section { self.entries[start..end].iter() } - pub fn sub_values(&self, key: impl AsRef<[u8]>) -> impl Iterator { + pub fn sub_values( + &self, + key: impl AsRef<[u8]>, + ) -> impl Iterator { let key = key.as_ref(); let start = self.binary_search(key).unwrap_or_else(|start| start); let end = self.binary_search_end(key).unwrap_or_else(|end| end); @@ -264,7 +278,9 @@ impl ID0Section { } /// read the `$ segs` entries of the database - pub fn segments(&self) -> Result> + '_> { + pub fn segments( + &self, + ) -> Result> + '_> { let entry = self .get("N$ segs") .ok_or_else(|| anyhow!("Unable to find entry segs"))?; @@ -275,9 +291,9 @@ impl ID0Section { .copied() .collect(); let names = self.segment_strings()?; - Ok(self - .sub_values(key) - .map(move |e| Segment::read(&e.value, self.is_64, names.as_ref(), self))) + Ok(self.sub_values(key).map(move |e| { + Segment::read(&e.value, self.is_64, names.as_ref(), self) + })) } /// read the `$ segstrings` entries of the database @@ -301,7 +317,8 @@ impl ID0Section { ensure!(start <= end); for i in start..end { let name = value_current.unpack_ds()?; - if let Some(_old) = entries.insert(i.try_into().unwrap(), name) { + if let Some(_old) = entries.insert(i.try_into().unwrap(), name) + { return Err(anyhow!("Duplicated id in segstrings {start}")); } } @@ -332,7 +349,8 @@ impl ID0Section { let name = self .get(key) .ok_or_else(|| anyhow!("Not found name for segment {idx}"))?; - parse_maybe_cstr(&name.value).ok_or_else(|| anyhow!("Invalid segment name {idx}")) + parse_maybe_cstr(&name.value) + .ok_or_else(|| anyhow!("Invalid segment name {idx}")) } /// read the `$ loader name` entries of the database @@ -353,7 +371,9 @@ impl ID0Section { } /// read the `Root Node` entries of the database - pub fn root_info(&self) -> Result>> { + pub fn root_info( + &self, + ) -> Result>> { let entry = self .get("NRoot Node") .ok_or_else(|| anyhow!("Unable to find entry Root Node"))?; @@ -371,7 +391,8 @@ impl ID0Section { match (sub_type, sub_key.len()) { (b'N', 1) => { ensure!( - parse_maybe_cstr(&entry.value) == Some(&b"Root Node"[..]), + parse_maybe_cstr(&entry.value) + == Some(&b"Root Node"[..]), "Invalid Root Node Name" ); return Ok(IDBRootInfo::RootNodeName); @@ -380,7 +401,8 @@ impl ID0Section { (b'V', 1) => return Ok(IDBRootInfo::InputFile(&entry.value)), _ => {} } - let Some(value) = parse_number(&sub_key[1..], true, self.is_64) else { + let Some(value) = parse_number(&sub_key[1..], true, self.is_64) + else { return Ok(IDBRootInfo::Unknown(entry)); }; match (sub_type, value as i64) { @@ -407,7 +429,9 @@ impl ID0Section { .map_err(|_| anyhow!("Value Md5 with invalid len")), (b'S', 1303) => parse_maybe_cstr(&entry.value) .and_then(|version| core::str::from_utf8(version).ok()) - .ok_or_else(|| anyhow!("Unable to parse VersionString string")) + .ok_or_else(|| { + anyhow!("Unable to parse VersionString string") + }) .map(IDBRootInfo::VersionString), (b'S', 1349) => entry .value @@ -441,10 +465,9 @@ impl ID0Section { .chain(sub_key.iter()) .copied() .collect(); - let description = self - .sub_values(key) - .next() - .ok_or_else(|| anyhow!("Unable to find id_params inside Root Node"))?; + let description = self.sub_values(key).next().ok_or_else(|| { + anyhow!("Unable to find id_params inside Root Node") + })?; IDBParam::read(&description.value, self.is_64) } @@ -498,7 +521,9 @@ impl ID0Section { // TODO implement $ hidden_ranges // TODO the address_info for 0xff00_00XX (or 0xff00_0000__0000_00XX for 64bits) seesm to be reserved, what happens if there is data at that page? - fn entry_points_raw(&self) -> Result>> { + fn entry_points_raw( + &self, + ) -> Result>> { let entry = self .get("N$ entry points") .ok_or_else(|| anyhow!("Unable to find functions"))?; @@ -516,7 +541,8 @@ impl ID0Section { /// read the `$ entry points` entries of the database pub fn entry_points(&self) -> Result> { - type RawEntryPoint<'a> = HashMap, Option<&'a str>, Option<&'a str>)>; + type RawEntryPoint<'a> = + HashMap, Option<&'a str>, Option<&'a str>)>; let mut entry_points: RawEntryPoint = HashMap::new(); for entry_point in self.entry_points_raw()? { match entry_point? { @@ -524,34 +550,49 @@ impl ID0Section { | EntryPointRaw::Name | EntryPointRaw::Ordinal { .. } => {} EntryPointRaw::Address { key, address } => { - if let Some(_old) = entry_points.entry(key).or_default().0.replace(address) { - return Err(anyhow!("Duplicated function address for {key}")); + if let Some(_old) = + entry_points.entry(key).or_default().0.replace(address) + { + return Err(anyhow!( + "Duplicated function address for {key}" + )); } } EntryPointRaw::ForwardedSymbol { key, symbol } => { - if let Some(_old) = entry_points.entry(key).or_default().1.replace(symbol) { - return Err(anyhow!("Duplicated function symbol for {key}")); + if let Some(_old) = + entry_points.entry(key).or_default().1.replace(symbol) + { + return Err(anyhow!( + "Duplicated function symbol for {key}" + )); } } EntryPointRaw::FunctionName { key, name } => { - if let Some(_old) = entry_points.entry(key).or_default().2.replace(name) { - return Err(anyhow!("Duplicated function name for {key}")); + if let Some(_old) = + entry_points.entry(key).or_default().2.replace(name) + { + return Err(anyhow!( + "Duplicated function name for {key}" + )); } } } } let mut result: Vec<_> = entry_points .into_iter() - .filter_map( - |(key, (address, symbol, name))| match (address, symbol, name) { + .filter_map(|(key, (address, symbol, name))| { + match (address, symbol, name) { // Function without name or address is possible, this is // probably some label that got deleted - (Some(_), _, None) | (None, _, Some(_)) | (None, _, None) => None, + (Some(_), _, None) + | (None, _, Some(_)) + | (None, _, None) => None, (Some(address), forwarded, Some(name)) => { - let entry = match self.find_entry_point_type(key, address) { - Ok(entry) => entry, - Err(error) => return Some(Err(error)), - }; + let entry = + match self.find_entry_point_type(key, address) { + Ok(entry) => entry, + Err(error) => return Some(Err(error)), + }; Some(Ok(EntryPoint { name: name.to_owned(), address, @@ -559,25 +600,37 @@ impl ID0Section { entry_type: entry, })) } - }, - ) + } + }) .collect::>()?; result.sort_by_key(|entry| entry.address); Ok(result) } - fn find_entry_point_type(&self, key: u64, address: u64) -> Result> { - if let Some(key_entry) = self.find_entry_point_type_value(key, 0x3000)? { + fn find_entry_point_type( + &self, + key: u64, + address: u64, + ) -> Result> { + if let Some(key_entry) = + self.find_entry_point_type_value(key, 0x3000)? + { return Ok(Some(key_entry)); } // TODO some times it uses the address as key, it's based on the version? - if let Some(key_entry) = self.find_entry_point_type_value(address, 0x3000)? { + if let Some(key_entry) = + self.find_entry_point_type_value(address, 0x3000)? + { return Ok(Some(key_entry)); } Ok(None) } - fn find_entry_point_type_value(&self, value: u64, key_find: u64) -> Result> { + fn find_entry_point_type_value( + &self, + value: u64, + key_find: u64, + ) -> Result> { let key: Vec = b"." .iter() .copied() @@ -594,7 +647,8 @@ impl ID0Section { let key = parse_number(key, true, self.is_64).unwrap(); // TODO handle other values for the key if key == key_find { - return til::Type::new_from_id0(&entry.value, vec![]).map(Option::Some); + return til::Type::new_from_id0(&entry.value, vec![]) + .map(Option::Some); } } Ok(None) @@ -631,7 +685,10 @@ impl ID0Section { } /// read the label set at address, if any - pub fn label_at(&self, id0_addr: impl Id0AddressKey) -> Result> { + pub fn label_at( + &self, + id0_addr: impl Id0AddressKey, + ) -> Result> { let key: Vec = key_from_address(id0_addr.as_u64(), self.is_64) .chain(Some(b'N')) .collect(); @@ -643,8 +700,8 @@ impl ID0Section { let key_len = key.len(); let key = &entry.key[key_len..]; ensure!(key.is_empty(), "Label ID0 entry with key"); - let label = - parse_maybe_cstr(&entry.value).ok_or_else(|| anyhow!("Label is not valid CStr"))?; + let label = parse_maybe_cstr(&entry.value) + .ok_or_else(|| anyhow!("Label is not valid CStr"))?; Ok(Some(label)) } @@ -863,7 +920,8 @@ impl ID0Page { let reused_key = last_key .get(..indent.into()) .ok_or_else(|| anyhow!("key indent is too small"))?; - let key: Vec = reused_key.iter().copied().chain(ext_key).collect(); + let key: Vec = + reused_key.iter().copied().chain(ext_key).collect(); // update the last key last_key.clear(); @@ -967,7 +1025,10 @@ impl ID0Page { } } -pub(crate) fn key_from_address(address: u64, is_64: bool) -> impl Iterator { +pub(crate) fn key_from_address( + address: u64, + is_64: bool, +) -> impl Iterator { b".".iter().copied().chain(if is_64 { address.to_be_bytes().to_vec() } else { diff --git a/src/id0/dirtree.rs b/src/id0/dirtree.rs index 287e0ed..45f06bf 100644 --- a/src/id0/dirtree.rs +++ b/src/id0/dirtree.rs @@ -16,7 +16,10 @@ impl DirTreeRoot { Self::inner_visit_leafs(&mut handle, &self.entries); } - fn inner_visit_leafs(handle: &mut impl FnMut(&T), entries: &[DirTreeEntry]) { + fn inner_visit_leafs( + handle: &mut impl FnMut(&T), + entries: &[DirTreeEntry], + ) { for entry in entries { match entry { DirTreeEntry::Leaf(entry) => handle(entry), @@ -92,7 +95,10 @@ impl FromDirTreeNumber for Id0TilOrd { /// "\x2e\xff\x00\x00\x31\x53\x00\x02\x00\x00":"\x01\x62\x00\x00\x00\x0d\x90\x20\x80\x88\x08\x10\x80\xe9\x04\x80\xe7\x82\x36\x06\xff\xff\xff\xfc\xd0\xff\xff\xff\xff\x60\x50\x83\x0a\x00\x0d" /// ... /// "N$ dirtree/funcs":"\x31\x00\x00\xff" -pub(crate) fn parse_dirtree<'a, T, I>(entries_iter: I, is_64: bool) -> Result> +pub(crate) fn parse_dirtree<'a, T, I>( + entries_iter: I, + is_64: bool, +) -> Result> where T: FromDirTreeNumber, I: IntoIterator>, @@ -158,7 +164,11 @@ fn dirtree_directory_from_raw( .get_mut(&number) .ok_or_else(|| anyhow!("Invalid dirtree subfolder index"))? .take() - .ok_or_else(|| anyhow!("Same entry in dirtree is owned by multiple parents"))?; + .ok_or_else(|| { + anyhow!( + "Same entry in dirtree is owned by multiple parents" + ) + })?; let DirTreeEntryRaw { name, parent, @@ -183,7 +193,9 @@ struct DirTreeEntryRaw { } impl DirTreeEntryRaw { - fn from_raw(data: &mut I) -> Result { + fn from_raw( + data: &mut I, + ) -> Result { // TODO It's unclear if this value is a version, it seems so match data.read_u8()? { 0 => Self::from_raw_v0(data), @@ -192,7 +204,9 @@ impl DirTreeEntryRaw { } } - fn from_raw_v0(data: &mut I) -> Result { + fn from_raw_v0( + data: &mut I, + ) -> Result { // part 1: header let name = data.read_c_string_raw()?; // TODO maybe just a unpack_dd followed by \x00 @@ -203,8 +217,7 @@ impl DirTreeEntryRaw { let mut entries = vec![]; for is_value in core::iter::successors(Some(false), |x| Some(!(*x))) { // TODO unpack_dw/u8? - // TODO diferenciate an error from EoF - let Ok(entries_len) = data.unpack_dd() else { + let Some(entries_len) = data.unpack_dd_or_eof()? else { break; }; parse_entries(&mut *data, &mut entries, entries_len, is_value)?; @@ -256,7 +269,9 @@ impl DirTreeEntryRaw { /// | entries folder | \x00 | 0..0 are folders | /// | entries values | \x0c | from 0..12 are values | /// - fn from_raw_v1(data: &mut I) -> Result { + fn from_raw_v1( + data: &mut I, + ) -> Result { // part 1: header let name = data.read_c_string_raw()?; // TODO maybe just a unpack_dd followed by \x00 @@ -264,6 +279,7 @@ impl DirTreeEntryRaw { // this value had known values of 0 and 4, as long it's smaller then 0x80 there no // much of a problem, otherwise this could be a unpack_dw/unpack_dd let _unknown: u8 = bincode::deserialize_from(&mut *data)?; + #[cfg(feature = "restrictive")] ensure!(_unknown < 0x80); // TODO unpack_dw/u8? let entries_len = data.unpack_dd()?; @@ -280,13 +296,20 @@ impl DirTreeEntryRaw { // NOTE in case the folder have 0 elements, there will be a 0 value, but don't take that for granted for is_value in core::iter::successors(Some(false), |x| Some(!(*x))) { // TODO unpack_dw/u8? - let num = match data.unpack_dd() { - Ok(num) => num, - // this is an empty folder, so the last value is optional - Err(_) if entries_len == 0 => break, - Err(e) => return Err(e), + let Some(num) = data.unpack_dd_or_eof()? else { + if entries_len == 0 { + // this is an empty folder, so the last value is optional + break; + } else { + return Err(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "Unexpected EoF while reading dirtree entries", + ) + .into()); + } }; - let num = usize::try_from(num).map_err(|_| anyhow!("Invalid number of entries"))?; + let num = usize::try_from(num) + .map_err(|_| anyhow!("Invalid number of entries"))?; ensure!( current_entry.len() >= num, "Invalid number of entry of type in dirtree" @@ -348,10 +371,12 @@ where // no more entries return Ok(None); }; - let (idx, sub_idx, entry) = - next_entry.map_err(|_| anyhow!("Missing expected dirtree entry"))?; + let (idx, sub_idx, entry) = next_entry + .map_err(|_| anyhow!("Missing expected dirtree entry"))?; if sub_idx != 0 { - return Err(anyhow!("Non zero sub_idx for dirtree folder entry")); + return Err(anyhow!( + "Non zero sub_idx for dirtree folder entry" + )); } (idx, sub_idx, entry) } @@ -380,12 +405,13 @@ where let Some(next_entry) = self.iter.next() else { return Ok(false); }; - let (next_idx, next_sub_idx, next_entry) = next_entry.map_err(|_| { - std::io::Error::new( - std::io::ErrorKind::UnexpectedEof, - "Missing part of dirtree entry", - ) - })?; + let (next_idx, next_sub_idx, next_entry) = next_entry + .map_err(|_| { + std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "Missing part of dirtree entry", + ) + })?; if next_idx != *idx { // found a EoF for this entry if next_sub_idx != 0 { @@ -465,7 +491,9 @@ where fn fill_buf(&mut self) -> std::io::Result<&[u8]> { match self.state { DirtreeEntryState::Next { .. } => Ok(&[]), - DirtreeEntryState::Reading { entry, .. } if !entry.is_empty() => Ok(entry), + DirtreeEntryState::Reading { entry, .. } if !entry.is_empty() => { + Ok(entry) + } DirtreeEntryState::Reading { .. } => { if !self.next_sub_entry()? { return Ok(&[]); @@ -497,7 +525,8 @@ fn parse_entries( None => rel_value, // other are relative from the previous Some(last_value_old) => { - let mut value = last_value_old.wrapping_add_signed(rel_value as i64); + let mut value = + last_value_old.wrapping_add_signed(rel_value as i64); // NOTE that in 32bits it wrapps using the u32 limit if !data.is_64() { value &= u32::MAX as u64; diff --git a/src/id0/root_info.rs b/src/id0/root_info.rs index 08e40e8..9fcba13 100644 --- a/src/id0/root_info.rs +++ b/src/id0/root_info.rs @@ -211,8 +211,8 @@ impl IDBParam { let mut cpu = vec![0; cpu_len]; input.read_exact(&mut cpu)?; // remove any \x00 that marks the end of the str - let cpu_str_part = - parse_maybe_cstr(&cpu[..]).ok_or_else(|| anyhow!("Invalid RootInfo CStr cpu name"))?; + let cpu_str_part = parse_maybe_cstr(&cpu[..]) + .ok_or_else(|| anyhow!("Invalid RootInfo CStr cpu name"))?; cpu.truncate(cpu_str_part.len()); // TODO tight those ranges up @@ -223,7 +223,10 @@ impl IDBParam { match version { // TODO old version may contain extra data at the end with unknown purpose ..=699 => {} - 700.. => ensure!(input.inner().is_empty(), "Data left after the IDBParam",), + 700.. => ensure!( + input.inner().is_empty(), + "Data left after the IDBParam", + ), } Ok(param) } @@ -449,7 +452,8 @@ impl IDBParam { } let nametype = input.read_u8()?; - let nametype = NameType::new(nametype).ok_or_else(|| anyhow!("Invalid NameType value"))?; + let nametype = NameType::new(nametype) + .ok_or_else(|| anyhow!("Invalid NameType value"))?; let short_demnames = input.unpack_dd()?; let long_demnames = input.unpack_dd()?; let demnames = DemName::new(input.read_u8()?)?; diff --git a/src/id0/segment.rs b/src/id0/segment.rs index 056c402..edcd823 100644 --- a/src/id0/segment.rs +++ b/src/id0/segment.rs @@ -59,10 +59,9 @@ impl Segment { .map(|name_id| { // TODO I think this is dependent on the version, and not on availability if let Some(names) = names { - names - .get(&name_id) - .map(Vec::to_owned) - .ok_or_else(|| anyhow!("Not found name for segment {name_id}")) + names.get(&name_id).map(Vec::to_owned).ok_or_else(|| { + anyhow!("Not found name for segment {name_id}") + }) } else { // if there is no names, AKA `$ segstrings`, search for the key directly id0.name_by_index(name_id.get().into()).map(<[u8]>::to_vec) diff --git a/src/id1.rs b/src/id1.rs index 2ec9fe5..2693077 100644 --- a/src/id1.rs +++ b/src/id1.rs @@ -33,7 +33,10 @@ impl ID1Section { } } - fn read_inner(input: &mut impl IdaGenericUnpack, header: &IDBHeader) -> Result { + fn read_inner( + input: &mut impl IdaGenericUnpack, + header: &IDBHeader, + ) -> Result { // TODO pages are always 0x2000? const PAGE_SIZE: usize = 0x2000; let mut buf = vec![0; PAGE_SIZE]; @@ -41,8 +44,13 @@ impl ID1Section { let mut header_page = &buf[..]; let version = VaVersion::read(&mut header_page)?; let (npages, seglist_raw) = match version { - VaVersion::Va0 | VaVersion::Va1 | VaVersion::Va2 | VaVersion::Va3 | VaVersion::Va4 => { - let nsegments: u16 = bincode::deserialize_from(&mut header_page)?; + VaVersion::Va0 + | VaVersion::Va1 + | VaVersion::Va2 + | VaVersion::Va3 + | VaVersion::Va4 => { + let nsegments: u16 = + bincode::deserialize_from(&mut header_page)?; let npages: u16 = bincode::deserialize_from(&mut header_page)?; ensure!( npages > 0, @@ -52,13 +60,19 @@ impl ID1Section { // TODO the reference code uses the magic version, should it use // the version itself instead? - let seglist: Vec = if header.magic_version.is_64() { + let seglist: Vec = if header + .magic_version + .is_64() + { (0..nsegments) .map(|_| { - let start: u64 = bincode::deserialize_from(&mut header_page)?; - let end: u64 = bincode::deserialize_from(&mut header_page)?; + let start: u64 = + bincode::deserialize_from(&mut header_page)?; + let end: u64 = + bincode::deserialize_from(&mut header_page)?; ensure!(start <= end); - let offset: u64 = bincode::deserialize_from(&mut header_page)?; + let offset: u64 = + bincode::deserialize_from(&mut header_page)?; Ok(SegInfoVaNRaw { address: start..end, offset, @@ -68,10 +82,13 @@ impl ID1Section { } else { (0..nsegments) .map(|_| { - let start: u32 = bincode::deserialize_from(&mut header_page)?; - let end: u32 = bincode::deserialize_from(&mut header_page)?; + let start: u32 = + bincode::deserialize_from(&mut header_page)?; + let end: u32 = + bincode::deserialize_from(&mut header_page)?; ensure!(start <= end); - let offset: u32 = bincode::deserialize_from(&mut header_page)?; + let offset: u32 = + bincode::deserialize_from(&mut header_page)?; Ok(SegInfoVaNRaw { address: start.into()..end.into(), offset: offset.into(), @@ -82,10 +99,13 @@ impl ID1Section { (u32::from(npages), SegInfoRaw::VaN(seglist)) } VaVersion::VaX => { - let unknown_always3: u32 = bincode::deserialize_from(&mut header_page)?; + let unknown_always3: u32 = + bincode::deserialize_from(&mut header_page)?; ensure!(unknown_always3 == 3); - let nsegments: u32 = bincode::deserialize_from(&mut header_page)?; - let unknown_always2048: u32 = bincode::deserialize_from(&mut header_page)?; + let nsegments: u32 = + bincode::deserialize_from(&mut header_page)?; + let unknown_always2048: u32 = + bincode::deserialize_from(&mut header_page)?; ensure!(unknown_always2048 == 2048); let npages: u32 = bincode::deserialize_from(&mut header_page)?; @@ -95,8 +115,12 @@ impl ID1Section { .map(|_| { let (start, end) = match header.magic_version { crate::IDBMagic::IDA0 | crate::IDBMagic::IDA1 => { - let startea: u32 = bincode::deserialize_from(&mut header_page)?; - let endea: u32 = bincode::deserialize_from(&mut header_page)?; + let startea: u32 = bincode::deserialize_from( + &mut header_page, + )?; + let endea: u32 = bincode::deserialize_from( + &mut header_page, + )?; (startea.into(), endea.into()) } crate::IDBMagic::IDA2 => ( @@ -116,7 +140,9 @@ impl ID1Section { // sort segments by address let mut overlay_check = match &seglist_raw { - SegInfoRaw::VaN(segs) => segs.iter().map(|s| s.address.clone()).collect(), + SegInfoRaw::VaN(segs) => { + segs.iter().map(|s| s.address.clone()).collect() + } SegInfoRaw::VaX(segs) => segs.clone(), }; overlay_check.sort_unstable_by_key(|s| s.start); @@ -129,8 +155,10 @@ impl ID1Section { ensure!(!overlap); // make sure the data fits the available pages - let required_size: u64 = overlay_check.iter().map(|s| (s.end - s.start) * 4).sum(); - let required_pages = required_size.div_ceil(u64::try_from(PAGE_SIZE).unwrap()); + let required_size: u64 = + overlay_check.iter().map(|s| (s.end - s.start) * 4).sum(); + let required_pages = + required_size.div_ceil(u64::try_from(PAGE_SIZE).unwrap()); // TODO if the extra data at the end of the section is identified, review replacing <= with == // -1 because the first page is always the header ensure!(required_pages <= u64::from(npages - 1)); @@ -145,12 +173,17 @@ impl ID1Section { .map(|seg| { // skip any gaps match seg.offset.cmp(¤t_offset) { - std::cmp::Ordering::Less => return Err(anyhow!("invalid offset")), + std::cmp::Ordering::Less => { + return Err(anyhow!("invalid offset")) + } std::cmp::Ordering::Greater => { // TODO can be any deleted sector contains randon data? // skip intermidiate bytes, also ensuring they are all zeros ensure_all_bytes_are_zero( - std::io::Read::take(&mut *input, seg.offset - current_offset), + std::io::Read::take( + &mut *input, + seg.offset - current_offset, + ), &mut buf, )?; current_offset = seg.offset; @@ -158,7 +191,8 @@ impl ID1Section { std::cmp::Ordering::Equal => {} } let len = seg.address.end - seg.address.start; - let (data, _flags) = split_flags_data(&mut *input, len)?; + let (data, _flags) = + split_flags_data(&mut *input, len)?; current_offset += len * 4; Ok(SegInfo { offset: seg.address.start, @@ -172,8 +206,10 @@ impl ID1Section { // the data for the segments are stored sequentialy in disk segs.into_iter() .map(|address| { - let (data, _flags) = - split_flags_data(&mut *input, address.end - address.start)?; + let (data, _flags) = split_flags_data( + &mut *input, + address.end - address.start, + )?; Ok(SegInfo { offset: address.start, data, @@ -206,7 +242,10 @@ struct SegInfoVaNRaw { offset: u64, } -fn ensure_all_bytes_are_zero(mut input: impl IdaGenericUnpack, buf: &mut [u8]) -> Result<()> { +fn ensure_all_bytes_are_zero( + mut input: impl IdaGenericUnpack, + buf: &mut [u8], +) -> Result<()> { loop { match input.read(buf) { // found EoF @@ -214,14 +253,18 @@ fn ensure_all_bytes_are_zero(mut input: impl IdaGenericUnpack, buf: &mut [u8]) - // read something Ok(n) => ensure!(&buf[..n].iter().all(|b| *b == 0)), // ignore interrupts - Err(ref e) if matches!(e.kind(), std::io::ErrorKind::Interrupted) => {} + Err(ref e) + if matches!(e.kind(), std::io::ErrorKind::Interrupted) => {} Err(e) => return Err(e.into()), }; } Ok(()) } -fn ignore_bytes(mut input: impl IdaGenericUnpack, buf: &mut [u8]) -> Result<()> { +fn ignore_bytes( + mut input: impl IdaGenericUnpack, + buf: &mut [u8], +) -> Result<()> { loop { match input.read(buf) { // found EoF @@ -229,19 +272,26 @@ fn ignore_bytes(mut input: impl IdaGenericUnpack, buf: &mut [u8]) -> Result<()> // read something Ok(_n) => {} // ignore interrupts - Err(ref e) if matches!(e.kind(), std::io::ErrorKind::Interrupted) => {} + Err(ref e) + if matches!(e.kind(), std::io::ErrorKind::Interrupted) => {} Err(e) => return Err(e.into()), }; } Ok(()) } -fn split_flags_data(mut input: impl IdaGenericUnpack, len: u64) -> Result<(Vec, Vec)> { +fn split_flags_data( + mut input: impl IdaGenericUnpack, + len: u64, +) -> Result<(Vec, Vec)> { let len = usize::try_from(len).unwrap(); let mut flags = vec![0u32; len]; // SAFETY: don't worry &mut[u32] is compatible with &mut[u8] with len * 4 input.read_exact(unsafe { - &mut *core::slice::from_raw_parts_mut(flags.as_mut_ptr() as *mut u8, len * 4) + &mut *core::slice::from_raw_parts_mut( + flags.as_mut_ptr() as *mut u8, + len * 4, + ) })?; // extract the bytes into other vector and leave the flags there let data = flags diff --git a/src/ida_reader.rs b/src/ida_reader.rs index 731b22d..71d553e 100644 --- a/src/ida_reader.rs +++ b/src/ida_reader.rs @@ -3,6 +3,8 @@ use anyhow::{anyhow, ensure, Result}; use std::io::{BufRead, ErrorKind, Read, Seek}; use std::ops::Range; +use crate::til::{TypeAttribute, TypeAttributeExt}; + pub trait IdbReader: Seek + IdaGenericBufUnpack {} impl IdbReader for R {} @@ -41,9 +43,12 @@ pub trait IdaUnpack: IdaGenericUnpack { if self.is_64() { let start = self.unpack_dq()?; let len = self.unpack_dq()?; + #[cfg(feature = "restrictive")] let end = start .checked_add(len) .ok_or_else(|| anyhow!("Function range overflows"))?; + #[cfg(not(feature = "restrictive"))] + let end = start.saturating_add(len); Ok(start..end) } else { let start = self.unpack_dd_ext_max()?; @@ -52,7 +57,10 @@ pub trait IdaUnpack: IdaGenericUnpack { let end = match start.checked_add(len.into()) { Some(0xFFFF_FFFF) => u64::MAX, Some(value) => value, + #[cfg(feature = "restrictive")] None => return Err(anyhow!("Function range overflows")), + #[cfg(not(feature = "restrictive"))] + None => u64::MAX, }; Ok(start..end) } @@ -85,7 +93,10 @@ impl Read for IdaUnpacker { self.input.read(buf) } - fn read_vectored(&mut self, bufs: &mut [std::io::IoSliceMut<'_>]) -> std::io::Result { + fn read_vectored( + &mut self, + bufs: &mut [std::io::IoSliceMut<'_>], + ) -> std::io::Result { self.input.read_vectored(bufs) } @@ -111,7 +122,11 @@ impl BufRead for IdaUnpacker { self.input.consume(amt); } - fn read_until(&mut self, byte: u8, buf: &mut Vec) -> std::io::Result { + fn read_until( + &mut self, + byte: u8, + buf: &mut Vec, + ) -> std::io::Result { self.input.read_until(byte, buf) } @@ -140,7 +155,9 @@ pub trait IdaGenericBufUnpack: IdaGenericUnpack + BufRead { // skip the ordinal number match (format, (flags >> 31) != 0) { // formats below 0x12 doesn't have 64 bits ord - (0..=0x11, _) | (_, false) => data.extend(self.read_u32()?.to_le_bytes()), + (0..=0x11, _) | (_, false) => { + data.extend(self.read_u32()?.to_le_bytes()) + } (_, true) => data.extend(self.read_u64()?.to_le_bytes()), } @@ -170,11 +187,14 @@ pub trait IdaGenericBufUnpack: IdaGenericUnpack + BufRead { let mut nelem = 0; // TODO check no more then 9 bytes are read loop { - let Some(typ) = self.fill_buf()?.first().copied() else { + let Some(typ) = self.peek_u8()? else { + #[cfg(feature = "restrictive")] return Err(anyhow!(std::io::Error::new( std::io::ErrorKind::UnexpectedEof, "Unexpected EoF on DA" ))); + #[cfg(not(feature = "restrictive"))] + return Ok((nelem, base)); }; if typ & 0x80 == 0 { break; @@ -190,11 +210,14 @@ pub trait IdaGenericBufUnpack: IdaGenericUnpack + BufRead { } nelem = (z >> 4) & 7; loop { - let Some(y) = self.fill_buf()?.first().copied() else { + let Some(y) = self.peek_u8()? else { + #[cfg(feature = "restrictive")] return Err(anyhow!(std::io::Error::new( std::io::ErrorKind::UnexpectedEof, "Unexpected EoF on DA" ))); + #[cfg(not(feature = "restrictive"))] + return Ok((nelem, base)); }; if (y & 0x80) == 0 { break; @@ -216,7 +239,13 @@ pub trait IdaGenericBufUnpack: IdaGenericUnpack + BufRead { let mut buf = vec![]; self.read_until(b'\x00', &mut buf)?; // last char need to be \x00 or we found a EoF - ensure!(buf.pop() == Some(b'\x00'), "Unexpected EoF on CStr"); + if let Some(b'\x00') = buf.last() { + let _ = buf.pop(); // remove the \x00 from the end + } else { + // found EOF, aka could not find the \x00 for the string end + #[cfg(feature = "restrictive")] + return Err(anyhow!("Unexpected EoF on CStr")); + } Ok(buf) } @@ -231,6 +260,17 @@ pub trait IdaGenericBufUnpack: IdaGenericUnpack + BufRead { Ok(self.fill_buf()?.first().copied()) } + // InnerRef b47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x46b690 unpack_dd + // NOTE the orignal implementation never fails, if input hit EoF it a partial result or 0 + /// Reads 1 to 5 bytes. + fn unpack_dd_or_eof(&mut self) -> Result> { + let Some(b1) = self.peek_u8()? else { + return Ok(None); + }; + self.consume(1); + self.unpack_dd_from_byte(b1).map(Option::Some) + } + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x48ce40 fn read_ext_att(&mut self) -> Result { // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x48cec0 @@ -247,6 +287,7 @@ pub trait IdaGenericBufUnpack: IdaGenericUnpack + BufRead { if (start_value >> bit) & 1 != 0 { let value = self.read_u8()?; // TODO is this an error or expect possible value? + #[cfg(feature = "restrictive")] ensure!(value != 0); acc |= (value as u64) << byte; } @@ -257,6 +298,38 @@ pub trait IdaGenericBufUnpack: IdaGenericUnpack + BufRead { } Ok(acc) } + + fn read_tah(&mut self) -> Result> { + // TODO TAH in each type have a especial meaning, verify those + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x477080 + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x452830 + let Some(tah) = self.peek_u8()? else { + return Err(anyhow!(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "Unexpected EoF on DA" + ))); + }; + if tah == 0xFE { + Ok(Some(self.read_type_attribute()?)) + } else { + Ok(None) + } + } + + fn read_sdacl(&mut self) -> Result> { + let Some(sdacl) = self.peek_u8()? else { + return Err(anyhow!(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "Unexpected EoF on SDACL" + ))); + }; + + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x477eff + //NOTE: original op ((sdacl as u8 & 0xcf) ^ 0xC0) as i32 <= 0x01 + matches!(sdacl, 0xC0..=0xC1 | 0xD0..=0xD1 | 0xE0..=0xE1 | 0xF0..=0xF1) + .then(|| self.read_type_attribute()) + .transpose() + } } impl IdaGenericBufUnpack for R {} @@ -267,6 +340,13 @@ pub trait IdaGenericUnpack: Read { Ok(data[0]) } + #[cfg(not(feature = "restrictive"))] + fn read_u8_or_nothing(&mut self) -> Result> { + let mut data = [0; 1]; + let read = self.read_exact_or_nothing(&mut data)?; + Ok((read == data.len()).then_some(data[0])) + } + fn read_u16(&mut self) -> Result { let mut data = [0; 2]; self.read_exact(&mut data)?; @@ -305,7 +385,13 @@ pub trait IdaGenericUnpack: Read { // NOTE: the original implementation never fails, if input hit EoF it a partial result or 0 /// Reads 1 to 3 bytes. fn unpack_dw(&mut self) -> Result { - let b1: u8 = bincode::deserialize_from(&mut *self)?; + #[cfg(feature = "restrictive")] + let b1 = self.read_u8()?; + #[cfg(not(feature = "restrictive"))] + let Some(b1) = self.read_u8_or_nothing()? + else { + return Ok(0); + }; match b1 { // 7 bit value // [0xxx xxxx] @@ -313,7 +399,10 @@ pub trait IdaGenericUnpack: Read { // 14 bits value // [10xx xxxx] xxxx xxxx 0x80..=0xBF => { - let lo: u8 = bincode::deserialize_from(&mut *self)?; + #[cfg(feature = "restrictive")] + let lo = self.read_u8()?; + #[cfg(not(feature = "restrictive"))] + let lo = self.read_u8_or_nothing()?.unwrap_or(0); Ok(u16::from_be_bytes([b1 & 0x3F, lo])) } // 16 bits value @@ -321,7 +410,16 @@ pub trait IdaGenericUnpack: Read { 0xC0..=0xFF => { // NOTE first byte 6 bits seems to be ignored //ensure!(header != 0xC0 && header != 0xFF); - Ok(u16::from_be_bytes(bincode::deserialize_from(&mut *self)?)) + #[cfg(feature = "restrictive")] + let (lo, hi) = (self.read_u8()?, self.read_u8()?); + + #[cfg(not(feature = "restrictive"))] + let (lo, hi) = ( + self.read_u8_or_nothing()?.unwrap_or(0), + self.read_u8_or_nothing()?.unwrap_or(0), + ); + + Ok(u16::from_be_bytes([lo, hi])) } } } @@ -330,7 +428,11 @@ pub trait IdaGenericUnpack: Read { // NOTE the orignal implementation never fails, if input hit EoF it a partial result or 0 /// Reads 1 to 5 bytes. fn unpack_dd(&mut self) -> Result { - let b1: u8 = bincode::deserialize_from(&mut *self)?; + let b1 = self.read_u8()?; + self.unpack_dd_from_byte(b1) + } + + fn unpack_dd_from_byte(&mut self, b1: u8) -> Result { match b1 { // 7 bit value // [0xxx xxxx] @@ -338,13 +440,20 @@ pub trait IdaGenericUnpack: Read { // 14 bits value // [10xx xxxx] xxxx xxxx 0x80..=0xBF => { - let lo: u8 = bincode::deserialize_from(&mut *self)?; + #[cfg(feature = "restrictive")] + let lo = self.read_u8()?; + #[cfg(not(feature = "restrictive"))] + let lo = self.read_u8_or_nothing()?.unwrap_or(0); Ok(u32::from_be_bytes([0, 0, b1 & 0x3F, lo])) } // 29 bit value: // [110x xxxx] xxxx xxxx xxxx xxxx xxxx xxxx 0xC0..=0xDF => { - let bytes: [u8; 3] = bincode::deserialize_from(&mut *self)?; + let mut bytes: [u8; 3] = [0; 3]; + #[cfg(feature = "restrictive")] + self.read_exact(&mut bytes)?; + #[cfg(not(feature = "restrictive"))] + let _size = self.read_exact_or_nothing(&mut bytes)?; Ok(u32::from_be_bytes([ b1 & 0x1F, bytes[0], @@ -357,7 +466,12 @@ pub trait IdaGenericUnpack: Read { 0xE0..=0xFF => { // NOTE first byte 5 bits seems to be ignored //ensure!(header != 0xE0 && header != 0xFF); - Ok(u32::from_be_bytes(bincode::deserialize_from(&mut *self)?)) + let mut bytes: [u8; 4] = [0; 4]; + #[cfg(feature = "restrictive")] + self.read_exact(&mut bytes)?; + #[cfg(not(feature = "restrictive"))] + let _size = self.read_exact_or_nothing(&mut bytes)?; + Ok(u32::from_be_bytes(bytes)) } } } @@ -384,14 +498,20 @@ pub trait IdaGenericUnpack: Read { fn unpack_ds(&mut self) -> Result> { let len = self.unpack_dd()?; let mut result = vec![0; len.try_into()?]; + #[cfg(feature = "restrictive")] self.read_exact(&mut result)?; + #[cfg(not(feature = "restrictive"))] + let _size = self.read_exact_or_nothing(&mut result)?; Ok(result) } fn unpack_dt_bytes(&mut self) -> Result> { let buf_len = self.read_dt()?; let mut buf = vec![0; buf_len.into()]; + #[cfg(feature = "restrictive")] self.read_exact(&mut buf)?; + #[cfg(not(feature = "restrictive"))] + let _size = self.read_exact_or_nothing(&mut buf)?; Ok(buf) } @@ -403,13 +523,19 @@ pub trait IdaGenericUnpack: Read { // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x48cdb0 let mut acc: u32 = 0; for _ in 0..5 { - let b: u32 = self.read_u8()?.into(); + #[cfg(feature = "restrictive")] + let b = self.read_u8()?; + #[cfg(not(feature = "restrictive"))] + let Some(b) = self.read_u8_or_nothing()? + else { + return Ok(acc); + }; if b & 0x80 == 0 { - acc = (b & 0x3F) | (acc << 6); + acc = (b & 0x3F) as u32 | (acc << 6); return Ok(acc); } - acc = (acc << 7) | (b & 0x7F); + acc = (acc << 7) | (b & 0x7F) as u32; } Err(anyhow!("Can't find the end of DE")) } @@ -420,12 +546,19 @@ pub trait IdaGenericUnpack: Read { fn read_dt(&mut self) -> Result { // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x48cd60 let value = match self.read_u8()? { + #[cfg(feature = "restrictive")] 0 => return Err(anyhow!("DT can't have 0 value")), + #[cfg(not(feature = "restrictive"))] + 0 => return Ok(0), //SEG = 2 value @ 0x80.. => { - let inter: u16 = self.read_u8()?.into(); + #[cfg(feature = "restrictive")] + let inter = self.read_u8()?; + #[cfg(not(feature = "restrictive"))] + let inter = self.read_u8_or_nothing()?.unwrap_or(0); + #[cfg(feature = "restrictive")] ensure!(inter != 0, "DT can't have a following 0 value"); - value as u16 & 0x7F | inter << 7 + value as u16 & 0x7F | (inter as u16) << 7 } //SEG = 1 value @ ..=0x7F => value.into(), @@ -461,6 +594,76 @@ pub trait IdaGenericUnpack: Read { } } + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x452830 + fn read_type_attribute(&mut self) -> Result { + use crate::til::flag::tattr_ext::*; + #[cfg(feature = "restrictive")] + let byte0 = self.read_u8()?; + #[cfg(not(feature = "restrictive"))] + let Some(byte0) = self.read_u8_or_nothing()? + else { + return Ok(TypeAttribute { + tattr: 0, + extended: None, + }); + }; + let mut tattr = 0; + if byte0 != 0xfe { + tattr = ((byte0 as u16 & 1) | ((byte0 as u16 >> 3) & 6)) + 1; + } + if byte0 == 0xFE || tattr == 8 { + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x452878 + let mut shift = 0; + // TODO limit the loop to only 0..n + loop { + #[cfg(feature = "restrictive")] + let next_byte = self.read_u8()?; + #[cfg(not(feature = "restrictive"))] + let Some(next_byte) = self.read_u8_or_nothing()? + else { + break; + }; + ensure!( + next_byte != 0, + "Failed to parse TypeAttribute, byte is zero" + ); + tattr |= ((next_byte & 0x7F) as u16) << shift; + if next_byte & 0x80 == 0 { + break; + } + shift += 7; + ensure!( + shift < u16::BITS, + "Failed to find the end of type attribute" + ); + } + } + + if tattr & TAH_HASATTRS == 0 { + return Ok(TypeAttribute { + tattr, + extended: None, + }); + } + // consume this flag + tattr &= !TAH_HASATTRS; + + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x45289e + let loop_cnt = self.read_dt()?; + let extended = (0..loop_cnt) + .map(|_| { + let _value1 = self.unpack_dt_bytes()?; + let _value2 = self.unpack_dt_bytes()?; + // TODO maybe more... + Ok(TypeAttributeExt { _value1, _value2 }) + }) + .collect::>()?; + Ok(TypeAttribute { + tattr, + extended: Some(extended), + }) + } + fn read_bytes_len_u16(&mut self) -> Result> { let len = self.read_u16()?; let mut bytes = vec![0u8; len.into()]; diff --git a/src/lib.rs b/src/lib.rs index b5873b2..43c85a9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,10 +1,13 @@ +#[forbid(unsafe_code)] pub mod id0; pub mod id1; pub(crate) mod ida_reader; pub mod nam; pub mod til; +use std::borrow::Cow; use std::fmt::Debug; +use std::fmt::Write; use std::io::SeekFrom; use std::num::NonZeroU64; @@ -80,15 +83,30 @@ impl IDBParser { } pub fn read_id0_section(&mut self, id0: ID0Offset) -> Result { - read_section(&mut self.input, &self.header, id0.0.get(), ID0Section::read) + read_section( + &mut self.input, + &self.header, + id0.0.get(), + ID0Section::read, + ) } pub fn read_id1_section(&mut self, id1: ID1Offset) -> Result { - read_section(&mut self.input, &self.header, id1.0.get(), ID1Section::read) + read_section( + &mut self.input, + &self.header, + id1.0.get(), + ID1Section::read, + ) } pub fn read_nam_section(&mut self, nam: NamOffset) -> Result { - read_section(&mut self.input, &self.header, nam.0.get(), NamSection::read) + read_section( + &mut self.input, + &self.header, + nam.0.get(), + NamSection::read, + ) } pub fn read_til_section(&mut self, til: TILOffset) -> Result { @@ -96,7 +114,7 @@ impl IDBParser { &mut self.input, &self.header, til.0.get(), - |input, _, compress| TILSection::read(input, compress), + |input, _header, compressed| TILSection::read(input, compressed), ) } @@ -106,12 +124,14 @@ impl IDBParser { output: &mut impl std::io::Write, ) -> Result<()> { self.input.seek(SeekFrom::Start(offset.idb_offset()))?; - let section_header = IDBSectionHeader::read(&self.header, &mut self.input)?; + let section_header = + IDBSectionHeader::read(&self.header, &mut self.input)?; // makes sure the reader doesn't go out-of-bounds - let mut input = std::io::Read::take(&mut self.input, section_header.len); + let mut input = + std::io::Read::take(&mut self.input, section_header.len); match section_header.compress { IDBSectionCompression::Zlib => { - let mut input = flate2::read::ZlibDecoder::new(input); + let mut input = flate2::bufread::ZlibDecoder::new(input); let _ = std::io::copy(&mut input, output)?; } IDBSectionCompression::None => { @@ -127,9 +147,11 @@ impl IDBParser { output: &mut impl std::io::Write, ) -> Result<()> { self.input.seek(SeekFrom::Start(til.0.get()))?; - let section_header = IDBSectionHeader::read(&self.header, &mut self.input)?; + let section_header = + IDBSectionHeader::read(&self.header, &mut self.input)?; // makes sure the reader doesn't go out-of-bounds - let mut input = std::io::Read::take(&mut self.input, section_header.len); + let mut input = + std::io::Read::take(&mut self.input, section_header.len); TILSection::decompress(&mut input, output, section_header.compress) } } @@ -142,7 +164,11 @@ fn read_section<'a, I, T, F>( ) -> Result where I: IdbReader, - F: FnMut(&mut std::io::Take<&'a mut I>, &IDBHeader, IDBSectionCompression) -> Result, + F: FnMut( + &mut std::io::Take<&'a mut I>, + &IDBHeader, + IDBSectionCompression, + ) -> Result, { input.seek(SeekFrom::Start(offset))?; let section_header = IDBSectionHeader::read(header, &mut *input)?; @@ -295,15 +321,18 @@ impl IDBHeader { ) -> Result { #[derive(Debug, Deserialize)] struct V1Raw { - id2_offset: u32, + _id2_offset: u32, checksums: [u32; 3], - unk30_zeroed: u32, + _unk30_zeroed: u32, unk33_checksum: u32, } let v1_raw: V1Raw = bincode::deserialize_from(input)?; - ensure!(v1_raw.unk30_zeroed == 0, "unk30 not zeroed"); - ensure!(v1_raw.id2_offset == 0, "id2 in V1 is not zeroed"); + #[cfg(feature = "restrictive")] + { + ensure!(v1_raw._unk30_zeroed == 0, "unk30 not zeroed"); + ensure!(v1_raw._id2_offset == 0, "id2 in V1 is not zeroed"); + } // TODO ensure all offsets point to after the header Ok(Self { @@ -328,20 +357,23 @@ impl IDBHeader { ) -> Result { #[derive(Debug, Deserialize)] struct V4Raw { - id2_offset: u32, + _id2_offset: u32, checksums: [u32; 3], - unk30_zeroed: u32, + _unk30_zeroed: u32, unk33_checksum: u32, - unk38_zeroed: [u8; 8], - unk40_v5c: u32, + _unk38_zeroed: [u8; 8], + _unk40_v5c: u32, } let v4_raw: V4Raw = bincode::deserialize_from(input)?; - ensure!(v4_raw.unk30_zeroed == 0, "unk30 not zeroed"); - ensure!(v4_raw.id2_offset == 0, "id2 in V4 is not zeroed"); - ensure!(v4_raw.unk38_zeroed == [0; 8], "unk38 is not zeroed"); - ensure!(v4_raw.unk40_v5c == 0x5c, "unk40 is not 0x5C"); + #[cfg(feature = "restrictive")] + { + ensure!(v4_raw._unk30_zeroed == 0, "unk30 not zeroed"); + ensure!(v4_raw._id2_offset == 0, "id2 in V4 is not zeroed"); + ensure!(v4_raw._unk38_zeroed == [0; 8], "unk38 is not zeroed"); + ensure!(v4_raw._unk40_v5c == 0x5c, "unk40 is not 0x5C"); + } // TODO ensure all offsets point to after the header Ok(Self { @@ -367,27 +399,33 @@ impl IDBHeader { #[derive(Debug, Deserialize)] struct V5Raw { nam_offset: u64, - seg_offset_zeroed: u64, + _seg_offset_zeroed: u64, til_offset: u64, initial_checksums: [u32; 3], - unk4_zeroed: u32, + _unk4_zeroed: u32, unk_checksum: u32, - id2_offset_zeroed: u64, + _id2_offset_zeroed: u64, final_checksum: u32, - unk0_v7c: u32, + _unk0_v7c: u32, } let v5_raw: V5Raw = bincode::deserialize_from(input)?; - let id0_offset = - u64::from_le(u64::from(header_raw.offsets[1]) << 32 | u64::from(header_raw.offsets[0])); - let id1_offset = - u64::from_le(u64::from(header_raw.offsets[3]) << 32 | u64::from(header_raw.offsets[2])); + let id0_offset = u64::from_le( + u64::from(header_raw.offsets[1]) << 32 + | u64::from(header_raw.offsets[0]), + ); + let id1_offset = u64::from_le( + u64::from(header_raw.offsets[3]) << 32 + | u64::from(header_raw.offsets[2]), + ); // TODO Final checksum is always zero on v5? - - ensure!(v5_raw.unk4_zeroed == 0, "unk4 not zeroed"); - ensure!(v5_raw.id2_offset_zeroed == 0, "id2 in V5 is not zeroed"); - ensure!(v5_raw.seg_offset_zeroed == 0, "seg in V5 is not zeroed"); - ensure!(v5_raw.unk0_v7c == 0x7C, "unk0 not 0x7C"); + #[cfg(feature = "restrictive")] + { + ensure!(v5_raw._unk4_zeroed == 0, "unk4 not zeroed"); + ensure!(v5_raw._id2_offset_zeroed == 0, "id2 in V5 is not zeroed"); + ensure!(v5_raw._seg_offset_zeroed == 0, "seg in V5 is not zeroed"); + ensure!(v5_raw._unk0_v7c == 0x7C, "unk0 not 0x7C"); + } // TODO ensure all offsets point to after the header Ok(Self { @@ -414,24 +452,31 @@ impl IDBHeader { #[derive(Debug, Deserialize)] struct V6Raw { nam_offset: u64, - seg_offset_zeroed: u64, + _seg_offset_zeroed: u64, til_offset: u64, initial_checksums: [u32; 3], - unk4_zeroed: [u8; 4], + _unk4_zeroed: [u8; 4], unk5_checksum: u32, id2_offset: u64, final_checksum: u32, - unk0_v7c: u32, + _unk0_v7c: u32, } let v6_raw: V6Raw = bincode::deserialize_from(input)?; - let id0_offset = - u64::from_le(u64::from(header_raw.offsets[1]) << 32 | u64::from(header_raw.offsets[0])); - let id1_offset = - u64::from_le(u64::from(header_raw.offsets[3]) << 32 | u64::from(header_raw.offsets[2])); - - ensure!(v6_raw.unk4_zeroed == [0; 4], "unk4 not zeroed"); - ensure!(v6_raw.seg_offset_zeroed == 0, "seg in V6 is not zeroed"); - ensure!(v6_raw.unk0_v7c == 0x7C, "unk0 not 0x7C"); + let id0_offset = u64::from_le( + u64::from(header_raw.offsets[1]) << 32 + | u64::from(header_raw.offsets[0]), + ); + let id1_offset = u64::from_le( + u64::from(header_raw.offsets[3]) << 32 + | u64::from(header_raw.offsets[2]), + ); + + #[cfg(feature = "restrictive")] + { + ensure!(v6_raw._unk4_zeroed == [0; 4], "unk4 not zeroed"); + ensure!(v6_raw._seg_offset_zeroed == 0, "seg in V6 is not zeroed"); + ensure!(v6_raw._unk0_v7c == 0x7C, "unk0 not 0x7C"); + } // TODO ensure all offsets point to after the header Ok(Self { @@ -453,7 +498,10 @@ impl IDBHeader { } impl IDBSectionHeader { - pub fn read(header: &IDBHeader, input: impl IdaGenericUnpack) -> Result { + pub fn read( + header: &IDBHeader, + input: impl IdaGenericUnpack, + ) -> Result { match header.version { IDBVersion::V1 | IDBVersion::V4 => { #[derive(Debug, Deserialize)] @@ -515,7 +563,40 @@ impl VaVersion { } } -fn write_string_len_u8(mut output: O, value: &[u8]) -> Result<()> { +#[derive(Clone)] +pub struct IDBString(Vec); + +impl IDBString { + pub fn new(data: Vec) -> Self { + Self(data) + } + + pub fn as_utf8_lossy(&self) -> Cow { + String::from_utf8_lossy(&self.0) + } + + pub fn as_bytes(&self) -> &[u8] { + &self.0 + } + + pub fn into_inner(self) -> Vec { + self.0 + } +} + +impl std::fmt::Debug for IDBString { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_char('"')?; + f.write_str(&self.as_utf8_lossy())?; + f.write_char('"')?; + Ok(()) + } +} + +fn write_string_len_u8( + mut output: O, + value: &[u8], +) -> Result<()> { output.write_all(&[u8::try_from(value.len()).unwrap()])?; Ok(output.write_all(value)?) } @@ -536,8 +617,8 @@ mod test { 0xaf, 0x81, 0x42, 0x01, 0x53, // TODO 0x01, // void ret 0x03, //n args - 0x3d, 0x08, 0x48, 0x4d, 0x4f, 0x44, 0x55, 0x4c, 0x45, 0x3d, 0x06, 0x44, 0x57, 0x4f, - 0x52, 0x44, 0x00, + 0x3d, 0x08, 0x48, 0x4d, 0x4f, 0x44, 0x55, 0x4c, 0x45, 0x3d, 0x06, + 0x44, 0x57, 0x4f, 0x52, 0x44, 0x00, ]; let _til = til::Type::new_from_id0(&function, vec![]).unwrap(); } @@ -596,13 +677,13 @@ mod test { 0x0a, // arg1 type pointer 0xfe, 0x10, // TypeAttribute val 0x02, // dt len 1 - 0x0d, 0x5f, 0x5f, 0x6f, 0x72, 0x67, 0x5f, 0x61, 0x72, 0x72, 0x64, 0x69, - 0x6d, // TODO some _string: "__org_arrdim" + 0x0d, 0x5f, 0x5f, 0x6f, 0x72, 0x67, 0x5f, 0x61, 0x72, 0x72, 0x64, + 0x69, 0x6d, // TODO some _string: "__org_arrdim" 0x03, 0xac, 0x01, // TODO _other_thing 0x0d, // arg1 pointer type struct 0x01, // struct ref - 0x0e, 0x5f, 0x5f, 0x6a, 0x6d, 0x70, 0x5f, 0x62, 0x75, 0x66, 0x5f, 0x74, 0x61, - 0x67, // "__jmp_buf_tag" + 0x0e, 0x5f, 0x5f, 0x6a, 0x6d, 0x70, 0x5f, 0x62, 0x75, 0x66, 0x5f, + 0x74, 0x61, 0x67, // "__jmp_buf_tag" 0x00, // end of type ]; let _til = til::Type::new_from_id0(&function, vec![]).unwrap(); @@ -636,14 +717,15 @@ mod test { // arg4 ... 0x0a, // pointer 0x7d, // const typedef - 0x08, 0x41, 0x45, 0x53, 0x5f, 0x4b, 0x45, 0x59, // ordinal "AES_KEY" + 0x08, 0x41, 0x45, 0x53, 0x5f, 0x4b, 0x45, + 0x59, // ordinal "AES_KEY" // arg5 0xff, 0x48, // some flag in function arg 0x0a, // pointer 0xfe, 0x10, // TypeAttribute val 0x02, // TypeAttribute loop once - 0x0d, 0x5f, 0x5f, 0x6f, 0x72, 0x67, 0x5f, 0x61, 0x72, 0x72, 0x64, 0x69, - 0x6d, // string "__org_arrdim" + 0x0d, 0x5f, 0x5f, 0x6f, 0x72, 0x67, 0x5f, 0x61, 0x72, 0x72, 0x64, + 0x69, 0x6d, // string "__org_arrdim" 0x03, 0xac, 0x10, // ???? some other TypeAttribute field 0x22, // type unsigned __int8 // arg6 @@ -651,8 +733,8 @@ mod test { 0x0a, // pointer 0xfe, 0x10, // TypeAttribute val 0x02, // TypeAttribute loop once - 0x0d, 0x5f, 0x5f, 0x6f, 0x72, 0x67, 0x5f, 0x61, 0x72, 0x72, 0x64, 0x69, - 0x6d, // string "__org_arrdim" + 0x0d, 0x5f, 0x5f, 0x6f, 0x72, 0x67, 0x5f, 0x61, 0x72, 0x72, 0x64, + 0x69, 0x6d, // string "__org_arrdim" 0x03, 0xac, 0x10, // ???? some other TypeAttribute field 0x22, // type unsigned __int8 // arg7 ... @@ -736,7 +818,11 @@ mod test { #[test] fn parse_idbs() { - let files = find_all("resources/idbs".as_ref(), &["idb".as_ref(), "i64".as_ref()]).unwrap(); + let files = find_all( + "resources/idbs".as_ref(), + &["idb".as_ref(), "i64".as_ref()], + ) + .unwrap(); for filename in files { parse_idb(filename) } @@ -769,7 +855,8 @@ mod test { }; let _: Vec<_> = id0.segments().unwrap().map(Result::unwrap).collect(); - let _: Vec<_> = id0.loader_name().unwrap().map(Result::unwrap).collect(); + let _: Vec<_> = + id0.loader_name().unwrap().map(Result::unwrap).collect(); let _: Vec<_> = id0.root_info().unwrap().map(Result::unwrap).collect(); let _: Vec<_> = id0 .file_regions(version) @@ -810,7 +897,8 @@ mod test { #[test] fn parse_tils() { - let files = find_all("resources/tils".as_ref(), &["til".as_ref()]).unwrap(); + let files = + find_all("resources/tils".as_ref(), &["til".as_ref()]).unwrap(); let _results = files .into_iter() .map(|file| { @@ -833,7 +921,11 @@ mod test { } fn find_all(path: &Path, exts: &[&OsStr]) -> Result> { - fn inner_find_all(path: &Path, exts: &[&OsStr], buf: &mut Vec) -> Result<()> { + fn inner_find_all( + path: &Path, + exts: &[&OsStr], + buf: &mut Vec, + ) -> Result<()> { for entry in std::fs::read_dir(path)?.map(Result::unwrap) { let entry_type = entry.metadata()?.file_type(); if entry_type.is_dir() { diff --git a/src/nam.rs b/src/nam.rs index 83d226d..6f11574 100644 --- a/src/nam.rs +++ b/src/nam.rs @@ -38,13 +38,18 @@ impl NamSection { let version = VaVersion::read(&mut header_page)?; let (npages, nnames, pagesize) = match version { - VaVersion::Va0 | VaVersion::Va1 | VaVersion::Va2 | VaVersion::Va3 | VaVersion::Va4 => { + VaVersion::Va0 + | VaVersion::Va1 + | VaVersion::Va2 + | VaVersion::Va3 + | VaVersion::Va4 => { let always1: u16 = bincode::deserialize_from(&mut header_page)?; ensure!(always1 == 1); let npages: u64 = if header.magic_version.is_64() { bincode::deserialize_from(&mut header_page)? } else { - bincode::deserialize_from::<_, u32>(&mut header_page)?.into() + bincode::deserialize_from::<_, u32>(&mut header_page)? + .into() }; let always0: u16 = bincode::deserialize_from(&mut header_page)?; ensure!(always0 == 0); @@ -52,24 +57,29 @@ impl NamSection { // TODO nnames / 2? Why? bincode::deserialize_from::<_, u64>(&mut header_page)? / 2 } else { - bincode::deserialize_from::<_, u32>(&mut header_page)?.into() + bincode::deserialize_from::<_, u32>(&mut header_page)? + .into() }; - let pagesize: u32 = bincode::deserialize_from(&mut header_page)?; + let pagesize: u32 = + bincode::deserialize_from(&mut header_page)?; ensure!(pagesize >= 64); (npages, nnames, pagesize) } VaVersion::VaX => { let always3: u32 = bincode::deserialize_from(&mut header_page)?; ensure!(always3 == 3); - let one_or_zero: u32 = bincode::deserialize_from(&mut header_page)?; + let one_or_zero: u32 = + bincode::deserialize_from(&mut header_page)?; ensure!([0, 1].contains(&one_or_zero)); // TODO always2048 have some relation to pagesize? - let always2048: u32 = bincode::deserialize_from(&mut header_page)?; + let always2048: u32 = + bincode::deserialize_from(&mut header_page)?; ensure!(always2048 == 2048); let npages: u64 = if header.magic_version.is_64() { bincode::deserialize_from(&mut header_page)? } else { - bincode::deserialize_from::<_, u32>(&mut header_page)?.into() + bincode::deserialize_from::<_, u32>(&mut header_page)? + .into() }; let always0: u32 = bincode::deserialize_from(&mut header_page)?; ensure!(always0 == 0); @@ -77,7 +87,8 @@ impl NamSection { // TODO nnames / 2? Why? bincode::deserialize_from::<_, u64>(&mut header_page)? / 2 } else { - bincode::deserialize_from::<_, u32>(&mut header_page)?.into() + bincode::deserialize_from::<_, u32>(&mut header_page)? + .into() }; (npages, nnames, DEFAULT_PAGE_SIZE.try_into().unwrap()) } @@ -115,7 +126,8 @@ impl NamSection { let name = if header.magic_version.is_64() { bincode::deserialize_from::<_, u64>(&mut input) } else { - bincode::deserialize_from::<_, u32>(&mut input).map(u64::from) + bincode::deserialize_from::<_, u32>(&mut input) + .map(u64::from) }; let Ok(name) = name else { break; diff --git a/src/til.rs b/src/til.rs index 6d10644..f0fd53b 100644 --- a/src/til.rs +++ b/src/til.rs @@ -9,7 +9,13 @@ pub mod section; pub mod r#struct; pub mod union; -use std::num::{NonZeroU16, NonZeroU8}; +mod size_calculator; + +use section::TILSectionHeader; +pub use size_calculator::*; + +use std::collections::HashMap; +use std::num::NonZeroU8; use anyhow::{anyhow, ensure, Context, Result}; @@ -21,21 +27,63 @@ use crate::til::function::{Function, FunctionRaw}; use crate::til::pointer::{Pointer, PointerRaw}; use crate::til::r#enum::{Enum, EnumRaw}; use crate::til::r#struct::{Struct, StructRaw}; -use crate::til::section::TILSectionHeader; use crate::til::union::{Union, UnionRaw}; +use crate::IDBString; #[derive(Debug, Clone)] pub struct TILTypeInfo { - _flags: u32, - pub name: Vec, + pub name: IDBString, pub ordinal: u64, pub tinfo: Type, +} + +impl TILTypeInfo { + pub(crate) fn new( + til: &TILSectionHeader, + type_by_name: &HashMap, usize>, + type_by_ord: &HashMap, + name: IDBString, + ordinal: u64, + tinfo_raw: TypeRaw, + fields: Vec>, + ) -> Result { + let mut fields_iter = fields + .into_iter() + .map(|field| (!field.is_empty()).then_some(IDBString::new(field))); + let tinfo = Type::new( + til, + type_by_name, + type_by_ord, + tinfo_raw, + &mut fields_iter, + )?; + #[cfg(feature = "restrictive")] + ensure!( + fields_iter.next().is_none(), + "Extra fields found for til type \"{}\"", + name.as_utf8_lossy() + ); + Ok(Self { + name, + ordinal, + tinfo, + }) + } +} + +#[derive(Debug, Clone)] +pub(crate) struct TILTypeInfoRaw { + _flags: u32, + pub name: IDBString, + pub ordinal: u64, + pub tinfo: TypeRaw, _cmt: Vec, _fieldcmts: Vec, + fields: Vec>, _sclass: u8, } -impl TILTypeInfo { +impl TILTypeInfoRaw { pub(crate) fn read( input: &mut impl IdaGenericBufUnpack, til: &TILSectionHeader, @@ -51,7 +99,8 @@ impl TILTypeInfo { input.read_raw_til_type(til.format)? }; let mut cursor = &data[..]; - let result = TILTypeInfo::read_inner(&mut cursor, til)?; + let result = Self::read_inner(&mut cursor, til)?; + #[cfg(feature = "restrictive")] ensure!( cursor.is_empty(), "Unable to parse til type fully, left {} bytes", @@ -63,34 +112,32 @@ impl TILTypeInfo { fn read_inner(cursor: &mut &[u8], til: &TILSectionHeader) -> Result { let flags: u32 = cursor.read_u32()?; // TODO verify if flags equal to 0x7fff_fffe? - let name = cursor.read_c_string_raw()?; + let name = IDBString::new(cursor.read_c_string_raw()?); let is_u64 = (flags >> 31) != 0; let ordinal = match (til.format, is_u64) { // formats below 0x12 doesn't have 64 bits ord (0..=0x11, _) | (_, false) => cursor.read_u32()?.into(), (_, true) => cursor.read_u64()?, }; - let tinfo_raw = - TypeRaw::read(&mut *cursor, til).context("parsing `TILTypeInfo::tiinfo`")?; + let tinfo = TypeRaw::read(&mut *cursor, til).with_context(|| { + format!( + "parsing `TILTypeInfo::tiinfo` for type \"{}\"", + name.as_utf8_lossy() + ) + })?; let _info = cursor.read_c_string_raw()?; let cmt = cursor.read_c_string_raw()?; let fields = cursor.read_c_string_vec()?; let fieldcmts = cursor.read_c_string_raw()?; let sclass: u8 = cursor.read_u8()?; - let mut fields_iter = fields.into_iter(); - let tinfo = Type::new(til, tinfo_raw, &mut fields_iter)?; - ensure!( - fields_iter.as_slice().is_empty(), - "Extra fields found for til" - ); - Ok(Self { _flags: flags, name, ordinal, tinfo, _cmt: cmt, + fields, _fieldcmts: fieldcmts, _sclass: sclass, }) @@ -110,43 +157,62 @@ pub enum TypeVariant { Pointer(Pointer), Function(Function), Array(Array), - Typedef(Typedef), + Typeref(Typeref), Struct(Struct), Union(Union), Enum(Enum), - // TODO narrow what kinds of Type can be inside the Ref - StructRef(Box), - UnionRef(Box), - EnumRef(Box), Bitfield(Bitfield), } impl Type { pub(crate) fn new( til: &TILSectionHeader, + type_by_name: &HashMap, usize>, + type_by_ord: &HashMap, tinfo_raw: TypeRaw, - fields: &mut impl Iterator>, + fields: &mut impl Iterator>, ) -> Result { let type_variant = match tinfo_raw.variant { TypeVariantRaw::Basic(x) => TypeVariant::Basic(x), TypeVariantRaw::Bitfield(x) => TypeVariant::Bitfield(x), - TypeVariantRaw::Typedef(x) => TypeVariant::Typedef(x), - TypeVariantRaw::Pointer(x) => Pointer::new(til, x, fields).map(TypeVariant::Pointer)?, + TypeVariantRaw::Typedef(x) => { + Typeref::new(type_by_name, type_by_ord, x) + .map(TypeVariant::Typeref)? + } + TypeVariantRaw::Pointer(x) => { + Pointer::new(til, type_by_name, type_by_ord, x, fields) + .map(TypeVariant::Pointer)? + } TypeVariantRaw::Function(x) => { - Function::new(til, x, fields).map(TypeVariant::Function)? + Function::new(til, type_by_name, type_by_ord, x, fields) + .map(TypeVariant::Function)? + } + TypeVariantRaw::Array(x) => { + Array::new(til, type_by_name, type_by_ord, x, fields) + .map(TypeVariant::Array)? + } + TypeVariantRaw::Struct(x) => { + Struct::new(til, type_by_name, type_by_ord, x, fields) + .map(TypeVariant::Struct)? + } + TypeVariantRaw::Union(x) => { + Union::new(til, type_by_name, type_by_ord, x, fields) + .map(TypeVariant::Union)? } - TypeVariantRaw::Array(x) => Array::new(til, x, fields).map(TypeVariant::Array)?, - TypeVariantRaw::Struct(x) => Struct::new(til, x, fields).map(TypeVariant::Struct)?, - TypeVariantRaw::Union(x) => Union::new(til, x, fields).map(TypeVariant::Union)?, - TypeVariantRaw::Enum(x) => Enum::new(til, x, fields).map(TypeVariant::Enum)?, - TypeVariantRaw::StructRef(type_raw) => { - TypeVariant::StructRef(Box::new(Type::new(til, *type_raw, fields)?)) + TypeVariantRaw::Enum(x) => { + Enum::new(til, x, fields).map(TypeVariant::Enum)? } - TypeVariantRaw::UnionRef(type_raw) => { - TypeVariant::UnionRef(Box::new(Type::new(til, *type_raw, fields)?)) + TypeVariantRaw::StructRef(x) => { + Typeref::new_struct(type_by_name, type_by_ord, x) + .map(TypeVariant::Typeref)? } - TypeVariantRaw::EnumRef(type_raw) => { - TypeVariant::EnumRef(Box::new(Type::new(til, *type_raw, fields)?)) + TypeVariantRaw::UnionRef(x) => { + Typeref::new_union(type_by_name, type_by_ord, x) + .map(TypeVariant::Typeref)? + } + TypeVariantRaw::EnumRef(x) => { + Typeref::new_enum(type_by_name, type_by_ord, x) + .map(TypeVariant::Typeref)? } }; Ok(Self { @@ -156,23 +222,30 @@ impl Type { }) } // TODO find the best way to handle type parsing from id0 - pub(crate) fn new_from_id0(data: &[u8], fields: Vec>) -> Result { + pub(crate) fn new_from_id0( + data: &[u8], + fields: Vec>, + ) -> Result { // TODO it's unclear what header information id0 types use to parse tils // maybe it just use the til sector header, or more likelly it's from // IDBParam in the `Root Node` let header = section::TILSectionHeader { format: 12, flags: section::TILSectionFlags(0), - title: Vec::new(), - description: Vec::new(), - compiler_id: 0, - cm: 0, + description: IDBString::new(Vec::new()), + dependencies: Vec::new(), size_enum: None, size_int: 4.try_into().unwrap(), size_bool: 1.try_into().unwrap(), def_align: None, size_long_double: None, extended_sizeof_info: None, + cc: None, + cn: None, + type_ordinal_alias: None, + is_universal: true, + compiler_id: crate::id0::Compiler::Unknown, + cm: None, }; let mut reader = data; let type_raw = TypeRaw::read(&mut reader, &header)?; @@ -181,17 +254,31 @@ impl Type { &[b'\x00'] => {} // in continuations, the \x00 may be missing &[] => {} - rest => { + _rest => { + #[cfg(feature = "restrictive")] return Err(anyhow!( "Extra {} bytes after reading TIL from ID0", - rest.len() + _rest.len() )); } } - let mut fields_iter = fields.into_iter(); - let result = Self::new(&header, type_raw, &mut fields_iter)?; + let mut fields_iter = fields.into_iter().map(|field| { + if field.is_empty() { + None + } else { + Some(IDBString::new(field)) + } + }); + let result = Self::new( + &header, + &HashMap::new(), + &HashMap::new(), + type_raw, + &mut fields_iter, + )?; + #[cfg(feature = "restrictive")] ensure!( - fields_iter.as_slice().is_empty(), + fields_iter.next().is_none(), "Extra fields found for id0 til" ); Ok(result) @@ -211,18 +298,21 @@ pub(crate) enum TypeVariantRaw { Pointer(PointerRaw), Function(FunctionRaw), Array(ArrayRaw), - Typedef(Typedef), + Typedef(TypedefRaw), Struct(StructRaw), Union(UnionRaw), Enum(EnumRaw), - StructRef(Box), - UnionRef(Box), - EnumRef(Box), + StructRef(TypedefRaw), + UnionRef(TypedefRaw), + EnumRef(TypedefRaw), Bitfield(Bitfield), } impl TypeRaw { - pub fn read(input: &mut impl IdaGenericBufUnpack, til: &TILSectionHeader) -> Result { + pub fn read( + input: &mut impl IdaGenericBufUnpack, + til: &TILSectionHeader, + ) -> Result { let metadata: u8 = input.read_u8()?; let type_base = metadata & flag::tf_mask::TYPE_BASE_MASK; let type_flags = metadata & flag::tf_mask::TYPE_FLAGS_MASK; @@ -237,31 +327,41 @@ impl TypeRaw { // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x472e13 print_til_type let variant = match (type_base, type_flags) { (..=flag::tf_last_basic::BT_LAST_BASIC, _) => { - Basic::new(til, type_base, type_flags).map(TypeVariantRaw::Basic)? + Basic::new(til, type_base, type_flags) + .context("Type::Basic") + .map(TypeVariantRaw::Basic)? } // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x4804d7 - (flag::tf_ptr::BT_PTR, _) => PointerRaw::read(input, til, type_flags) - .context("Type::Pointer") - .map(TypeVariantRaw::Pointer)?, + (flag::tf_ptr::BT_PTR, _) => { + PointerRaw::read(input, til, type_flags) + .context("Type::Pointer") + .map(TypeVariantRaw::Pointer)? + } // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x48075a - (flag::tf_array::BT_ARRAY, _) => ArrayRaw::read(input, til, type_flags) - .context("Type::Array") - .map(TypeVariantRaw::Array)?, + (flag::tf_array::BT_ARRAY, _) => { + ArrayRaw::read(input, til, type_flags) + .context("Type::Array") + .map(TypeVariantRaw::Array)? + } // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x48055d - (flag::tf_func::BT_FUNC, _) => FunctionRaw::read(input, til, type_flags) - .context("Type::Function") - .map(TypeVariantRaw::Function)?, + (flag::tf_func::BT_FUNC, _) => { + FunctionRaw::read(input, til, type_flags) + .context("Type::Function") + .map(TypeVariantRaw::Function)? + } (flag::tf_complex::BT_BITFIELD, _) => TypeVariantRaw::Bitfield( Bitfield::read(input, type_flags).context("Type::Bitfield")?, ), // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x480369 - (flag::tf_complex::BT_COMPLEX, flag::tf_complex::BTMT_TYPEDEF) => Typedef::read(input) - .context("Type::Typedef") - .map(TypeVariantRaw::Typedef)?, + (flag::tf_complex::BT_COMPLEX, flag::tf_complex::BTMT_TYPEDEF) => { + TypedefRaw::read(input) + .context("Type::Typedef") + .map(TypeVariantRaw::Typedef)? + } // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x480378 @@ -285,7 +385,9 @@ impl TypeRaw { (flag::tf_complex::BT_COMPLEX, _) => unreachable!(), // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x47395d print_til_type - (flag::BT_RESERVED, _) => return Err(anyhow!("Wrong/Unknown type: {metadata:02x}")), + (flag::BT_RESERVED, _) => { + return Err(anyhow!("Wrong/Unknown type: {metadata:02x}")) + } (flag::BT_RESERVED.., _) => unreachable!(), }; @@ -296,7 +398,10 @@ impl TypeRaw { }) } - pub fn read_ref(input: &mut impl IdaGenericUnpack, header: &TILSectionHeader) -> Result { + pub fn read_ref( + input: &mut impl IdaGenericUnpack, + header: &TILSectionHeader, + ) -> Result { let mut bytes = input.unpack_dt_bytes()?; if !bytes.starts_with(b"=") { @@ -306,9 +411,8 @@ impl TypeRaw { let mut bytes = &bytes[..]; let result = TypeRaw::read(&mut bytes, header)?; - if !bytes.is_empty() { - return Err(anyhow!("Unable to fully parser Type ref")); - } + #[cfg(feature = "restrictive")] + ensure!(bytes.is_empty(), "Unable to fully parser Type ref"); Ok(result) } } @@ -364,7 +468,9 @@ impl Basic { // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x480874 BT_UNK => { let bytes = match btmt { - BTMT_SIZE0 => return Err(anyhow!("forbidden use of BT_UNK")), + BTMT_SIZE0 => { + return Err(anyhow!("forbidden use of BT_UNK")) + } BTMT_SIZE12 => 2, // BT_UNK_WORD BTMT_SIZE48 => 8, // BT_UNK_QWORD BTMT_SIZE128 => 0, // BT_UNKNOWN @@ -398,7 +504,10 @@ impl Basic { return match bt_int { BT_INT8 => Ok(Self::Char), BT_INT => Ok(Self::SegReg), // BT_SEGREG - _ => Err(anyhow!("Reserved use of tf_int::BTMT_CHAR {:x}", btmt)), + _ => Err(anyhow!( + "Reserved use of tf_int::BTMT_CHAR {:x}", + btmt + )), }; } _ => unreachable!(), @@ -453,13 +562,12 @@ impl Basic { } #[derive(Clone, Debug)] -pub enum Typedef { - // TODO make this a `Id0TilOrd` +pub enum TypedefRaw { Ordinal(u32), - Name(Vec), + Name(Option), } -impl Typedef { +impl TypedefRaw { fn read(input: &mut impl IdaGenericUnpack) -> Result { let buf = input.unpack_dt_bytes()?; match &buf[..] { @@ -467,15 +575,114 @@ impl Typedef { let mut tmp = data; let de = tmp.read_de()?; if !tmp.is_empty() { - return Err(anyhow!("Typedef Ordinal with more data then expected")); + return Err(anyhow!( + "Typedef Ordinal with more data then expected" + )); } - Ok(Typedef::Ordinal(de)) + Ok(Self::Ordinal(de)) } - _ => Ok(Typedef::Name(buf)), + _ => Ok(Self::Name(if buf.is_empty() { + None + } else { + Some(IDBString::new(buf)) + })), } } } +#[derive(Clone, Debug)] +pub struct Typeref { + pub ref_type: Option, + pub typeref_value: TyperefValue, +} + +#[derive(Clone, Debug)] +pub enum TyperefValue { + Ref(usize), + UnsolvedName(Option), + UnsolvedOrd(u32), +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum TyperefType { + Struct, + Union, + Enum, +} + +impl Typeref { + pub(crate) fn new( + type_by_name: &HashMap, usize>, + type_by_ord: &HashMap, + tyref: TypedefRaw, + ) -> Result { + let pos = match tyref { + // TODO check is ord is set on the header + TypedefRaw::Ordinal(ord) => { + let Some(pos) = type_by_ord.get(&(ord.into())) else { + return Ok(Self { + ref_type: None, + typeref_value: TyperefValue::UnsolvedOrd(ord), + }); + }; + pos + } + TypedefRaw::Name(None) => { + return Ok(Self { + ref_type: None, + typeref_value: TyperefValue::UnsolvedName(None), + }) + } + TypedefRaw::Name(Some(name)) => { + let Some(pos) = type_by_name.get(name.as_bytes()) else { + return Ok(Self { + ref_type: None, + typeref_value: TyperefValue::UnsolvedName(Some(name)), + }); + }; + pos + } + }; + Ok(Self { + ref_type: None, + typeref_value: TyperefValue::Ref(*pos), + }) + } + + fn new_struct( + type_by_name: &HashMap, usize>, + type_by_ord: &HashMap, + x: TypedefRaw, + ) -> Result { + let mut result = Self::new(type_by_name, type_by_ord, x)?; + result.ref_type = Some(TyperefType::Struct); + // TODO check the inner type is in fact a struct + Ok(result) + } + + fn new_union( + type_by_name: &HashMap, usize>, + type_by_ord: &HashMap, + x: TypedefRaw, + ) -> Result { + let mut result = Self::new(type_by_name, type_by_ord, x)?; + result.ref_type = Some(TyperefType::Union); + // TODO check the inner type is in fact a union + Ok(result) + } + + fn new_enum( + type_by_name: &HashMap, usize>, + type_by_ord: &HashMap, + x: TypedefRaw, + ) -> Result { + let mut result = Self::new(type_by_name, type_by_ord, x)?; + result.ref_type = Some(TyperefType::Enum); + // TODO check the inner type is in fact a enum + Ok(result) + } +} + #[derive(Debug, Copy, Clone)] pub enum TILModifier { Const, @@ -506,7 +713,11 @@ impl TILMacro { let have_param = flag & 0x100 != 0; let param_num = have_param.then_some((flag & 0xFF) as u8); if !have_param { - ensure!(flag & 0xFF == 0, "Unknown/Invalid value for TILMacro flag"); + #[cfg(feature = "restrictive")] + ensure!( + flag & 0xFF == 0, + "Unknown/Invalid value for TILMacro flag" + ); } // TODO find the InnerRef for this let value = input.read_c_string_raw()?; @@ -527,7 +738,9 @@ impl TILMacro { } match (max_param, param_idx) { (None, _) => max_param = Some(param_idx), - (Some(max), param_idx) if param_idx > max => max_param = Some(param_idx), + (Some(max), param_idx) if param_idx > max => { + max_param = Some(param_idx) + } (Some(_), _) => {} } Some(TILMacroValue::Param(param_idx)) @@ -539,12 +752,14 @@ impl TILMacro { (_, None) => {} // having params, where should not (None, Some(_max)) => { + #[cfg(feature = "restrictive")] return Err(anyhow!( "Macro value have params but it is not declared in the flag", )) } // only using params that exist (Some(params), Some(max)) if max <= params => { + #[cfg(feature = "restrictive")] ensure!( max <= params, "Macro value have more params then declared in the flag" @@ -571,88 +786,16 @@ pub struct TypeFlag(pub u8); #[derive(Clone, Copy, Debug)] pub struct CallingConventionFlag(pub u8); -#[derive(Clone, Copy, Debug)] -pub struct TypeAttribute(pub u16); -impl TypeAttribute { - // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x452830 - fn read(input: &mut impl IdaGenericUnpack) -> Result { - let byte0: u8 = input.read_u8()?; - let mut val = 0; - if byte0 != 0xfe { - val = ((byte0 as u16 & 1) | ((byte0 as u16 >> 3) & 6)) + 1; - } - if byte0 == 0xFE || val == 8 { - // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x452878 - let mut shift = 0; - // TODO limit the loop to only 0..n - loop { - let next_byte: u8 = input.read_u8()?; - ensure!( - next_byte != 0, - "Failed to parse TypeAttribute, byte is zero" - ); - val |= ((next_byte & 0x7F) as u16) << shift; - if next_byte & 0x80 == 0 { - break; - } - shift += 7; - } - } - - if val & 0x10 == 0 { - return Ok(TypeAttribute(val)); - } - - // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x45289e - let loop_cnt = input.read_dt()?; - for _ in 0..loop_cnt { - let _string = input.unpack_dt_bytes()?; - let _other_thing = input.unpack_dt_bytes()?; - // TODO maybe more... - } - Ok(TypeAttribute(val)) - } -} - -#[derive(Clone, Copy, Debug)] -pub struct TAH(pub TypeAttribute); -impl TAH { - fn read(input: &mut impl IdaGenericBufUnpack) -> Result { - // TODO TAH in each type have a especial meaning, verify those - // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x477080 - // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x452830 - let Some(tah) = input.fill_buf()?.first().copied() else { - return Err(anyhow!(std::io::Error::new( - std::io::ErrorKind::UnexpectedEof, - "Unexpected EoF on DA" - ))); - }; - if tah == 0xFE { - Ok(Self(TypeAttribute::read(input)?)) - } else { - Ok(Self(TypeAttribute(0))) - } - } +#[derive(Clone, Debug)] +pub struct TypeAttribute { + pub tattr: u16, + pub extended: Option>, } -#[derive(Clone, Copy, Debug)] -pub struct SDACL(pub TypeAttribute); -impl SDACL { - fn read(input: &mut impl IdaGenericBufUnpack) -> Result { - let Some(sdacl) = input.fill_buf()?.first().copied() else { - return Err(anyhow!(std::io::Error::new( - std::io::ErrorKind::UnexpectedEof, - "Unexpected EoF on SDACL" - ))); - }; - - // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x477eff - match sdacl { - //NOTE: original op ((sdacl & 0xcf) ^ 0xC0) <= 0x01 - 0xd0..=0xff | 0xc0 | 0xc1 => Ok(Self(TypeAttribute::read(input)?)), - _ => Ok(Self(TypeAttribute(0))), - } - } +#[derive(Clone, Debug)] +pub struct TypeAttributeExt { + pub _value1: Vec, + pub _value2: Vec, } fn serialize_dt(value: u16) -> Result> { @@ -669,48 +812,3 @@ fn serialize_dt(value: u16) -> Result> { result.push(hi as u8); Ok(result) } - -#[derive(Clone, Copy, Debug)] -pub struct StructModifierRaw { - /// Unaligned struct - is_unaligned: bool, - /// Gcc msstruct attribute - is_msstruct: bool, - /// C++ object, not simple pod type - is_cpp_obj: bool, - /// Virtual function table - is_vftable: bool, - /// Alignment in bytes - alignment: Option, - /// other unknown value - others: Option, -} - -impl StructModifierRaw { - // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x46c4fc print_til_types_att - pub fn from_value(value: u16) -> StructModifierRaw { - use flag::tattr_udt::*; - - // TODO 0x8 seems to be a the packed flag in structs - const TAUDT_ALIGN_MASK: u16 = 0x7; - // TODO find the flag for this and the InnerRef - let is_msstruct = value & TAUDT_MSSTRUCT != 0; - let is_cpp_obj = value & TAUDT_CPPOBJ != 0; - let is_unaligned = value & TAUDT_UNALIGNED != 0; - let is_vftable = value & TAUDT_VFTABLE != 0; - let alignment_raw = value & TAUDT_ALIGN_MASK; - let alignment = - (alignment_raw != 0).then(|| NonZeroU8::new(1 << (alignment_raw - 1)).unwrap()); - let all_masks = - TAUDT_MSSTRUCT | TAUDT_CPPOBJ | TAUDT_UNALIGNED | TAUDT_VFTABLE | TAUDT_ALIGN_MASK; - let others = NonZeroU16::new(value & !all_masks); - Self { - is_unaligned, - is_msstruct, - is_cpp_obj, - is_vftable, - alignment, - others, - } - } -} diff --git a/src/til/array.rs b/src/til/array.rs index 499a5b6..670cffa 100644 --- a/src/til/array.rs +++ b/src/til/array.rs @@ -1,35 +1,48 @@ +use std::collections::HashMap; +use std::num::{NonZeroU16, NonZeroU8}; + use crate::ida_reader::IdaGenericBufUnpack; -use crate::til::section::TILSectionHeader; -use crate::til::{Type, TypeRaw, TAH}; +use crate::til::{Type, TypeAttribute, TypeRaw}; +use crate::IDBString; + +use super::section::TILSectionHeader; #[derive(Clone, Debug)] pub struct Array { + pub alignment: Option, pub base: u8, - // TODO make this Option? - pub nelem: u16, - pub tah: TAH, + pub nelem: Option, pub elem_type: Box, } impl Array { pub(crate) fn new( til: &TILSectionHeader, + type_by_name: &HashMap, usize>, + type_by_ord: &HashMap, value: ArrayRaw, - fields: &mut impl Iterator>, + fields: &mut impl Iterator>, ) -> anyhow::Result { Ok(Self { + alignment: value.alignment, base: value.base, nelem: value.nelem, - tah: value.tah, - elem_type: Type::new(til, *value.elem_type, fields).map(Box::new)?, + elem_type: Type::new( + til, + type_by_name, + type_by_ord, + *value.elem_type, + fields, + ) + .map(Box::new)?, }) } } #[derive(Clone, Debug)] pub(crate) struct ArrayRaw { + pub alignment: Option, pub base: u8, - pub nelem: u16, - pub tah: TAH, + pub nelem: Option, pub elem_type: Box, } @@ -39,10 +52,10 @@ impl ArrayRaw { header: &TILSectionHeader, metadata: u8, ) -> anyhow::Result { + use crate::til::flag::tattr::*; use crate::til::flag::tf_array::*; let (base, nelem) = match metadata { BTMT_NONBASED => { - // TODO if num_elem==0 then the array size is unknown let nelem = input.read_dt()?; (0, nelem) } @@ -53,12 +66,35 @@ impl ArrayRaw { } }; // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x48078e - let tah = TAH::read(&mut *input)?; + let mut alignment = None; + if let Some(TypeAttribute { + tattr, + extended: _extended, + }) = input.read_tah()? + { + let alignment_raw = (tattr & MAX_DECL_ALIGN) as u8; + let _is_unknown_8 = alignment_raw & 0x8 != 0; + #[cfg(feature = "restrictive")] + anyhow::ensure!(!_is_unknown_8, "Unknown flat 8 set on Array"); + alignment = ((alignment_raw & 0x7) != 0).then(|| { + NonZeroU8::new(1 << ((alignment_raw & 0x7) - 1)).unwrap() + }); + #[cfg(feature = "restrictive")] + anyhow::ensure!( + tattr & !MAX_DECL_ALIGN == 0, + "unknown TypeAttribute {tattr:x}" + ); + #[cfg(feature = "restrictive")] + anyhow::ensure!( + _extended.is_none(), + "unknown TypeAttribute ext {_extended:x?}" + ); + } let elem_type = TypeRaw::read(&mut *input, header)?; Ok(ArrayRaw { base, - nelem, - tah, + alignment, + nelem: NonZeroU16::new(nelem), elem_type: Box::new(elem_type), }) } diff --git a/src/til/bitfield.rs b/src/til/bitfield.rs index 6c457d3..4507d08 100644 --- a/src/til/bitfield.rs +++ b/src/til/bitfield.rs @@ -1,15 +1,36 @@ +use std::num::NonZeroU8; + +use anyhow::Result; + use crate::ida_reader::IdaGenericBufUnpack; -use crate::til::TAH; -#[derive(Debug, Clone)] +use super::TypeAttribute; + +#[derive(Debug, Clone, Copy)] pub struct Bitfield { pub unsigned: bool, + // TODO what a 0 width bitfield means? The start of a new byte-field? + // ntddk_win10.til + // struct _D3DKMDT_DISPLAYMODE_FLAGS { + // unsigned __int32 ValidatedAgainstMonitorCaps : 1; + // unsigned __int32 RoundedFakeMode : 1; + // unsigned __int32 : 0; + // __int32 ModePruningReason : 4; + // unsigned __int32 Stereo : 1; + // unsigned __int32 AdvancedScanCapable : 1; + // unsigned __int32 PreferredTiming : 1; + // unsigned __int32 PhysicalModeSupported : 1; + // unsigned __int32 Reserved : 24; + // }; pub width: u16, - pub nbytes: i32, + pub nbytes: NonZeroU8, } impl Bitfield { - pub(crate) fn read(input: &mut impl IdaGenericBufUnpack, metadata: u8) -> anyhow::Result { + pub(crate) fn read( + input: &mut impl IdaGenericBufUnpack, + metadata: u8, + ) -> Result { // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x472f3c print_til_type let nbytes = match metadata { super::flag::tf_complex::BTMT_BFLDI8 => 1, @@ -21,11 +42,28 @@ impl Bitfield { let dt = input.read_dt()?; let width = dt >> 1; let unsigned = (dt & 1) > 0; - let _tag = TAH::read(&mut *input)?; + match input.read_tah()? { + None => {} + Some(TypeAttribute { + tattr: _tattr, + extended: _extended, + }) => { + #[cfg(feature = "restrictive")] + anyhow::ensure!( + _tattr == 0, + "Unknown TypeAttribute {_tattr:x}" + ); + #[cfg(feature = "restrictive")] + anyhow::ensure!( + _extended.is_none(), + "Unknown TypeAttribute ext {_extended:x?}" + ); + } + } Ok(Self { unsigned, width, - nbytes, + nbytes: nbytes.try_into().unwrap(), }) } } diff --git a/src/til/enum.rs b/src/til/enum.rs index df07f4f..92bb38b 100644 --- a/src/til/enum.rs +++ b/src/til/enum.rs @@ -1,14 +1,18 @@ use std::num::NonZeroU8; use crate::ida_reader::IdaGenericBufUnpack; -use crate::til::section::TILSectionHeader; -use crate::til::{flag, StructModifierRaw, TypeRaw, TypeVariantRaw, SDACL, TAH}; +use crate::til::{flag, TypeAttribute, TypeRaw, TypeVariantRaw}; +use crate::IDBString; use anyhow::{anyhow, ensure}; +use super::section::TILSectionHeader; + #[derive(Clone, Debug)] pub struct Enum { + pub is_signed: bool, + pub is_unsigned: bool, pub output_format: EnumFormat, - pub members: Vec<(Option>, u64)>, + pub members: Vec<(Option, u64)>, pub groups: Option>, pub storage_size: Option, // TODO parse type attributes @@ -18,14 +22,16 @@ impl Enum { pub(crate) fn new( _til: &TILSectionHeader, value: EnumRaw, - fields: &mut impl Iterator>, + fields: &mut impl Iterator>, ) -> anyhow::Result { let members = value .members .into_iter() - .map(|member| (fields.next(), member)) + .map(|member| (fields.next().flatten(), member)) .collect(); Ok(Self { + is_signed: value.is_signed, + is_unsigned: value.is_unsigned, output_format: value.output_format, members, groups: value.groups, @@ -36,6 +42,8 @@ impl Enum { #[derive(Clone, Debug)] pub(crate) struct EnumRaw { + is_signed: bool, + is_unsigned: bool, output_format: EnumFormat, groups: Option>, members: Vec, @@ -55,21 +63,61 @@ impl EnumRaw { // is ref // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x4803b4 let ref_type = TypeRaw::read_ref(&mut *input, header)?; - let _taenum_bits = SDACL::read(&mut *input)?.0; - return Ok(TypeVariantRaw::EnumRef(Box::new(ref_type))); + // TODO ensure all bits from sdacl are parsed + let _taenum_bits = input.read_sdacl()?; + let TypeVariantRaw::Typedef(ref_type) = ref_type.variant else { + return Err(anyhow!("EnumRef Non Typedef")); + }; + return Ok(TypeVariantRaw::EnumRef(ref_type)); }; - let taenum_bits = TAH::read(&mut *input)?.0; - let _modifiers = StructModifierRaw::from_value(taenum_bits.0); - // TODO parse ext attr + let mut is_64 = false; + let mut is_signed = false; + let mut is_unsigned = false; + if let Some(TypeAttribute { + tattr, + extended: _extended, + }) = input.read_tah()? + { + // TODO enum have an align field (MAX_DECL_ALIGN) in tattr? + is_64 = tattr & TAENUM_64BIT != 0; + is_signed = tattr & TAENUM_SIGNED != 0; + is_unsigned = tattr & TAENUM_UNSIGNED != 0; + #[cfg(feature = "restrictive")] + ensure!( + tattr & !(TAENUM_64BIT | TAENUM_SIGNED | TAENUM_UNSIGNED) == 0, + "Invalid Enum taenum_bits {tattr:x}" + ); + #[cfg(feature = "restrictive")] + ensure!( + !(is_signed && is_unsigned), + "Enum can't be signed and unsigned at the same time" + ); + #[cfg(feature = "restrictive")] + ensure!( + _extended.is_none(), + "Unable to parse extended attributes for Enum" + ); + } + + // all BTE bits are consumed let bte = input.read_u8()?; + let storage_size_raw = bte & BTE_SIZE_MASK; + #[cfg(feature = "restrictive")] + ensure!( + bte & BTE_RESERVED == 0, + "Enum BTE including the Always off sub-field" + ); + let have_subarrays = bte & BTE_BITFIELD != 0; + let output_format_raw = bte & BTE_OUT_MASK; // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x452312 deserialize_enum ensure!( bte & BTE_ALWAYS != 0, - "Enum BTE missing the Always sub-field" + "Enum BTE missing the Always on sub-field" ); - let storage_size: Option = match bte & BTE_SIZE_MASK { - 0 => header.size_enum, + + let storage_size: Option = match storage_size_raw { + 0 => None, emsize @ 1..=4 => Some((1 << (emsize - 1)).try_into().unwrap()), // Allowed at InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x4523c8 deserialize_enum 5..=7 => return Err(anyhow!("BTE emsize with reserved values")), @@ -78,14 +126,15 @@ impl EnumRaw { // TODO enum size defaults to 4? let storage_size_final = storage_size.map(NonZeroU8::get).unwrap_or(4); let mask: u64 = if storage_size_final >= 16 { - // is saturating valid? - //u64::MAX + #[cfg(feature = "restrictive")] return Err(anyhow!("Bytes size is too big")); + #[cfg(not(feature = "restrictive"))] + u64::MAX } else { u64::MAX >> (u64::BITS - (storage_size_final as u32 * 8)) }; - let output_format = match bte & BTE_OUT_MASK { + let output_format = match output_format_raw { BTE_HEX => EnumFormat::Hex, BTE_CHAR => EnumFormat::Char, BTE_SDEC => EnumFormat::SignedDecimal, @@ -93,11 +142,10 @@ impl EnumRaw { _ => unreachable!(), }; - let is_64 = (taenum_bits.0 & TAENUM_64BIT) != 0; let mut low_acc: u32 = 0; let mut high_acc: u32 = 0; let mut group_acc = 0; - let mut groups = (bte & BTE_BITFIELD != 0).then_some(vec![]); + let mut groups = have_subarrays.then_some(vec![]); let members = (0..member_num) .map(|_member_idx| { if let Some(groups) = &mut groups { @@ -120,6 +168,8 @@ impl EnumRaw { .collect::>()?; Ok(TypeVariantRaw::Enum(EnumRaw { + is_signed, + is_unsigned, output_format, members, groups, diff --git a/src/til/flag.rs b/src/til/flag.rs index 21ed69f..05405e0 100644 --- a/src/til/flag.rs +++ b/src/til/flag.rs @@ -391,7 +391,9 @@ pub mod tf_conv_unk { /// Convenience definitions: shortcuts pub mod tf_shortcuts { - use super::{tf_bool, tf_complex, tf_conv_unk, tf_float, tf_int, tf_unk, TypeT}; + use super::{ + tf_bool, tf_complex, tf_conv_unk, tf_float, tf_int, tf_unk, TypeT, + }; /// byte pub const BTF_BYTE: TypeT = tf_conv_unk::BT_UNK_BYTE; /// unknown @@ -448,13 +450,16 @@ pub mod tf_shortcuts { pub const BTF_TBYTE: TypeT = tf_float::BT_FLOAT | tf_float::BTMT_SPECFLT; /// struct - pub const BTF_STRUCT: TypeT = tf_complex::BT_COMPLEX | tf_complex::BTMT_STRUCT; + pub const BTF_STRUCT: TypeT = + tf_complex::BT_COMPLEX | tf_complex::BTMT_STRUCT; /// union - pub const BTF_UNION: TypeT = tf_complex::BT_COMPLEX | tf_complex::BTMT_UNION; + pub const BTF_UNION: TypeT = + tf_complex::BT_COMPLEX | tf_complex::BTMT_UNION; /// enum pub const BTF_ENUM: TypeT = tf_complex::BT_COMPLEX | tf_complex::BTMT_ENUM; /// typedef - pub const BTF_TYPEDEF: TypeT = tf_complex::BT_COMPLEX | tf_complex::BTMT_TYPEDEF; + pub const BTF_TYPEDEF: TypeT = + tf_complex::BT_COMPLEX | tf_complex::BTMT_TYPEDEF; } /// Type attributes diff --git a/src/til/function.rs b/src/til/function.rs index 7063d24..088b12a 100644 --- a/src/til/function.rs +++ b/src/til/function.rs @@ -1,17 +1,19 @@ +use std::collections::HashMap; use std::num::NonZeroU8; use crate::ida_reader::{IdaGenericBufUnpack, IdaGenericUnpack}; -use crate::til::section::TILSectionHeader; -use crate::til::{Basic, Type, TypeRaw, TAH}; -use anyhow::{anyhow, ensure, Result}; +use crate::til::{Basic, Type, TypeRaw}; +use crate::IDBString; +use anyhow::{anyhow, ensure, Context, Result}; +use super::section::TILSectionHeader; use super::TypeVariantRaw; #[derive(Debug, Clone)] pub struct Function { pub calling_convention: Option, pub ret: Box, - pub args: Vec<(Option>, Type, Option)>, + pub args: Vec<(Option, Type, Option)>, pub retloc: Option, pub method: Option, @@ -28,14 +30,28 @@ pub struct Function { impl Function { pub(crate) fn new( til: &TILSectionHeader, + type_by_name: &HashMap, usize>, + type_by_ord: &HashMap, value: FunctionRaw, - fields: &mut impl Iterator>, + fields: &mut impl Iterator>, ) -> Result { - let ret = Type::new(til, *value.ret, &mut *fields)?; + let ret = Type::new( + til, + type_by_name, + type_by_ord, + *value.ret, + &mut *fields, + )?; let mut args = Vec::with_capacity(value.args.len()); for (arg_type, arg_loc) in value.args { - let field_name = fields.next(); - let new_member = Type::new(til, arg_type, &mut *fields)?; + let field_name = fields.next().flatten(); + let new_member = Type::new( + til, + type_by_name, + type_by_ord, + arg_type, + &mut *fields, + )?; args.push((field_name, new_member, arg_loc)); } Ok(Self { @@ -127,7 +143,7 @@ impl FunctionRaw { }; // TODO InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x473bf1 print_til_type - let (cc, mut flags, _spoiled) = read_cc(&mut *input)?; + let (cc, flags, _spoiled) = read_cc(&mut *input)?; let cc = CallingConvention::from_cm_raw(cc)?; // TODO investigate why this don't hold true @@ -141,37 +157,48 @@ impl FunctionRaw { // consume the flags and verify if a unknown value is present // TODO find those in flags - let _have_spoiled = flags & 0x0001 != 0; - flags &= !1; - let is_noret = flags & 0x0002 != 0; - flags &= !0x0002; - let is_pure = flags & 0x0004 != 0; - flags &= !0x0004; - let is_high = flags & 0x0008 != 0; - flags &= !0x0008; - let is_static = flags & 0x0010 != 0; - flags &= !0x0010; - let is_virtual = flags & 0x0020 != 0; - flags &= !0x0020; - // TODO find this flag meaning - //let is_TODO = flags & 0x0200 != 0; - flags &= !0x0200; - let is_const = flags & 0x00400 != 0; - flags &= !0x0400; - let is_constructor = flags & 0x0800 != 0; - flags &= !0x0800; - let is_destructor = flags & 0x1000 != 0; - flags &= !0x0100; - ensure!(flags == 0, "unknown function attrs({flags:04X})"); - - let _tah = TAH::read(&mut *input)?; - - let ret = TypeRaw::read(&mut *input, header)?; + let have_spoiled = flags & 0x0001 != 0; + if !have_spoiled { + ensure!(_spoiled.is_empty()); + } + let flags_lower = ((flags & 0xFF) >> 1) as u8; + + let is_noret = flags_lower & BFA_NORET != 0; + let is_pure = flags_lower & BFA_PURE != 0; + let is_high = flags_lower & BFA_HIGH != 0; + let is_static = flags_lower & BFA_STATIC != 0; + let is_virtual = flags_lower & BFA_VIRTUAL != 0; + #[cfg(feature = "restrictive")] + ensure!( + flags_lower + & !(BFA_NORET | BFA_PURE | BFA_HIGH | BFA_STATIC | BFA_VIRTUAL) + == 0 + ); + + // TODO find those flags + const BFA_CONST: u8 = 0x4; + const BFA_CONSTRUCTOR: u8 = 0x8; + const BFA_DESTRUCTOR: u8 = 0x10; + let flags_upper = ((flags & 0xFF00) >> 8) as u8; + let is_const = flags_upper & BFA_CONST != 0; + let is_constructor = flags_upper & BFA_CONSTRUCTOR != 0; + let is_destructor = flags_upper & BFA_DESTRUCTOR != 0; + #[cfg(feature = "restrictive")] + ensure!( + flags_upper & !(BFA_CONST | BFA_CONSTRUCTOR | BFA_DESTRUCTOR) == 0 + ); + + let ret = + TypeRaw::read(&mut *input, header).context("Return Argument")?; // TODO double check documentation for [flag::tf_func::BT_FUN] - let is_special_pe = cc.map(CallingConvention::is_special_pe).unwrap_or(false); - let have_retloc = - is_special_pe && !matches!(&ret.variant, TypeVariantRaw::Basic(Basic::Void)); - let retloc = have_retloc.then(|| ArgLoc::read(&mut *input)).transpose()?; + let is_special_pe = + cc.map(CallingConvention::is_special_pe).unwrap_or(false); + let have_retloc = is_special_pe + && !matches!(&ret.variant, TypeVariantRaw::Basic(Basic::Void)); + let retloc = have_retloc + .then(|| ArgLoc::read(&mut *input)) + .transpose() + .context("Retloc")?; let mut result = Self { calling_convention: cc, @@ -195,17 +222,19 @@ impl FunctionRaw { let n = input.read_dt()?; result.args = (0..n) - .map(|_| -> Result<_> { + .map(|i| -> Result<_> { let tmp = input.peek_u8()?; if tmp == Some(0xFF) { input.consume(1); // TODO what is this? let _flags = input.read_de()?; } - let tinfo = TypeRaw::read(&mut *input, header)?; + let tinfo = TypeRaw::read(&mut *input, header) + .with_context(|| format!("Argument Type {i}"))?; let argloc = is_special_pe .then(|| ArgLoc::read(&mut *input)) - .transpose()?; + .transpose() + .with_context(|| format!("Argument Argloc {i}"))?; Ok((tinfo, argloc)) }) @@ -272,7 +301,12 @@ impl ArgLoc { let sval = input.read_de()?; Ok(Self::Static(sval)) } - ALOC_CUSTOM.. => Err(anyhow!("Custom implementation for ArgLoc")), + #[cfg(feature = "restrictive")] + ALOC_CUSTOM.. => { + Err(anyhow!("Custom implementation for ArgLoc")) + } + #[cfg(not(feature = "restrictive"))] + ALOC_CUSTOM.. => Ok(Self::None), } } } @@ -315,9 +349,15 @@ impl CallingConvention { Ok(Some(match cm & CM_CC_MASK { // !ERR(spoil)! - CM_CC_SPOILED => return Err(anyhow!("Unexpected Spoiled Function Calling Convention")), + CM_CC_SPOILED => { + return Err(anyhow!( + "Unexpected Spoiled Function Calling Convention" + )) + } // this is an invalid value - CM_CC_INVALID => return Err(anyhow!("Invalid Function Calling Convention")), + CM_CC_INVALID => { + return Err(anyhow!("Invalid Function Calling Convention")) + } CM_CC_UNKNOWN => return Ok(None), CM_CC_VOIDARG => Self::Voidarg, CM_CC_CDECL => Self::Cdecl, @@ -370,19 +410,19 @@ impl CCPtrSize { pub const fn near_bytes(self) -> NonZeroU8 { match self { - CCPtrSize::N8F16 => unsafe { NonZeroU8::new_unchecked(1) }, - CCPtrSize::N16F32 => unsafe { NonZeroU8::new_unchecked(2) }, - CCPtrSize::N32F48 => unsafe { NonZeroU8::new_unchecked(4) }, - CCPtrSize::N64 => unsafe { NonZeroU8::new_unchecked(8) }, + CCPtrSize::N8F16 => NonZeroU8::new(1).unwrap(), + CCPtrSize::N16F32 => NonZeroU8::new(2).unwrap(), + CCPtrSize::N32F48 => NonZeroU8::new(4).unwrap(), + CCPtrSize::N64 => NonZeroU8::new(8).unwrap(), } } pub const fn far_bytes(self) -> NonZeroU8 { match self { - CCPtrSize::N8F16 => unsafe { NonZeroU8::new_unchecked(2) }, - CCPtrSize::N16F32 => unsafe { NonZeroU8::new_unchecked(4) }, - CCPtrSize::N32F48 => unsafe { NonZeroU8::new_unchecked(6) }, - CCPtrSize::N64 => unsafe { NonZeroU8::new_unchecked(8) }, + CCPtrSize::N8F16 => NonZeroU8::new(2).unwrap(), + CCPtrSize::N16F32 => NonZeroU8::new(4).unwrap(), + CCPtrSize::N32F48 => NonZeroU8::new(6).unwrap(), + CCPtrSize::N64 => NonZeroU8::new(8).unwrap(), } } } @@ -449,7 +489,9 @@ pub enum CallMethod { // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x476e60 /// [BT_FUNC](https://hex-rays.com/products/ida/support/sdkdoc/group__tf__func.html#ga7b7fee21f21237beb6d91e854410e0fa) -fn read_cc(input: &mut impl IdaGenericBufUnpack) -> Result<(u8, u16, Vec<(u16, u8)>)> { +fn read_cc( + input: &mut impl IdaGenericBufUnpack, +) -> Result<(u8, u16, Vec<(u16, u8)>)> { let mut cc = input.read_u8()?; // TODO find the flag for that if cc & 0xF0 != 0xA0 { @@ -513,9 +555,12 @@ fn read_cc_spoiled( } else { let size = (b >> 4) + 1; // TODO what if (b & 0xF) == 0? + #[cfg(feature = "restrictive")] let reg = (b & 0xF) .checked_sub(1) .ok_or_else(|| anyhow!("invalid spoiled reg value"))?; + #[cfg(not(feature = "restrictive"))] + let reg = (b & 0xF).saturating_sub(1); spoiled.push((reg.into(), size)) } } diff --git a/src/til/pointer.rs b/src/til/pointer.rs index 79fc92b..46ecbd0 100644 --- a/src/til/pointer.rs +++ b/src/til/pointer.rs @@ -1,8 +1,12 @@ +use std::collections::HashMap; + use anyhow::Result; use crate::ida_reader::IdaGenericBufUnpack; -use crate::til::section::TILSectionHeader; -use crate::til::{Type, TypeRaw, TAH}; +use crate::til::{Type, TypeAttribute, TypeRaw}; +use crate::IDBString; + +use super::section::TILSectionHeader; #[derive(Debug, Clone)] pub struct Pointer { @@ -15,23 +19,38 @@ pub struct Pointer { impl Pointer { pub(crate) fn new( til: &TILSectionHeader, + type_by_name: &HashMap, usize>, + type_by_ord: &HashMap, raw: PointerRaw, - fields: &mut impl Iterator>, + fields: &mut impl Iterator>, ) -> Result { let shifted = raw .shifted .map(|(t, v)| -> Result<_> { Ok(( // TODO if this type allow non typedef, this may consume fields - Type::new(til, *t, &mut vec![].into_iter()).map(Box::new)?, + Type::new( + til, + type_by_name, + type_by_ord, + *t, + &mut vec![].into_iter(), + ) + .map(Box::new)?, v, )) }) .transpose()?; - let typ = Type::new(til, *raw.typ, fields).map(Box::new)?; + let typ = Type::new(til, type_by_name, type_by_ord, *raw.typ, fields) + .map(Box::new)?; Ok(Self { // TODO forward fields to closure? - closure: PointerType::new(til, raw.closure)?, + closure: PointerType::new( + til, + type_by_name, + type_by_ord, + raw.closure, + )?, modifier: raw.modifier, shifted, typ, @@ -49,12 +68,17 @@ pub enum PointerType { } impl PointerType { - fn new(til: &TILSectionHeader, raw: PointerTypeRaw) -> Result { + fn new( + til: &TILSectionHeader, + type_by_name: &HashMap, usize>, + type_by_ord: &HashMap, + raw: PointerTypeRaw, + ) -> Result { match raw { PointerTypeRaw::Closure(c) => { // TODO subtype get the fields? let mut sub_fields = vec![].into_iter(); - Type::new(til, *c, &mut sub_fields) + Type::new(til, type_by_name, type_by_ord, *c, &mut sub_fields) .map(Box::new) .map(Self::Closure) } @@ -79,6 +103,8 @@ pub(crate) struct PointerRaw { pub modifier: Option, pub shifted: Option<(Box, u32)>, pub typ: Box, + // TODO find meaning: normally 5 in one type at `vc10_64` and `ntddk64` + pub _ta_lower: u8, } impl PointerRaw { @@ -87,6 +113,7 @@ impl PointerRaw { header: &TILSectionHeader, metadata: u8, ) -> Result { + use crate::til::flag::tattr::*; use crate::til::flag::tattr_ptr::*; use crate::til::flag::tf_ptr::*; // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x478d67 @@ -101,10 +128,35 @@ impl PointerRaw { }; // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x4804fa // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x459b7e - let tah = TAH::read(&mut *input)?; + let (_ta_lower, is_shifted, ptr_type_raw) = match input.read_tah()? { + None => (0, false, 0), + Some(TypeAttribute { + tattr, + extended: _extended, + }) => { + // all bits of tattr are consumed + let ta_lower = (tattr & MAX_DECL_ALIGN) as u8; + let is_shifted = tattr & TAPTR_SHIFTED != 0; + let ptr_type = tattr & TAPTR_RESTRICT; + #[cfg(feature = "restrictive")] + anyhow::ensure!( + tattr & !(TAPTR_SHIFTED | TAPTR_RESTRICT | MAX_DECL_ALIGN) + == 0, + "Invalid Pointer taenum_bits {tattr:x}" + ); + if let Some(_extended) = _extended { + // TODO parse extended values, known: + // "__org_arrdim" :"\xac\xXX" + // "__org_typedef":..., + // "__argz_create":"\xac\xac" + } + (ta_lower, is_shifted, ptr_type) + } + }; + let typ = TypeRaw::read(&mut *input, header)?; // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x459bc6 - let shifted = (tah.0 .0 & TAPTR_SHIFTED != 0) + let shifted = is_shifted .then(|| -> Result<_> { // TODO allow typedef only? let typ = TypeRaw::read(&mut *input, header)?; @@ -114,25 +166,20 @@ impl PointerRaw { .transpose()?; // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x459bc6 print_til_type_att - let modifier = match tah.0 .0 & (TAPTR_RESTRICT | TAPTR_PTR64 | TAPTR_PTR32) { + let modifier = match ptr_type_raw { 0x00 => None, TAPTR_PTR32 => Some(PointerModifier::Ptr32), TAPTR_PTR64 => Some(PointerModifier::Ptr64), TAPTR_RESTRICT => Some(PointerModifier::Restricted), _ => unreachable!(), }; - // TODO other values are known to exist - //let all_flags = TAPTR_RESTRICT | TAPTR_PTR64 | TAPTR_PTR32 | TAPTR_SHIFTED; - //anyhow::ensure!( - // tah.0 .0 & !all_flags == 0, - // "Unknown value for pointer modifier" - //); Ok(Self { closure, modifier, shifted, typ: Box::new(typ), + _ta_lower, }) } } @@ -148,7 +195,10 @@ pub(crate) enum PointerTypeRaw { } impl PointerTypeRaw { - fn read(input: &mut impl IdaGenericBufUnpack, header: &TILSectionHeader) -> Result { + fn read( + input: &mut impl IdaGenericBufUnpack, + header: &TILSectionHeader, + ) -> Result { let closure_type = input.read_u8()?; if closure_type == 0xFF { // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x473b5a diff --git a/src/til/section.rs b/src/til/section.rs index 0cb5f48..ba6cd08 100644 --- a/src/til/section.rs +++ b/src/til/section.rs @@ -1,32 +1,44 @@ use crate::id0::{Compiler, Id0TilOrd}; use crate::ida_reader::{IdaGenericBufUnpack, IdaGenericUnpack}; -use crate::til::{flag, Basic, TILMacro, TILTypeInfo, TypeVariant}; -use crate::IDBSectionCompression; +use crate::til::{flag, TILMacro, TILTypeInfo, TILTypeInfoRaw}; +use crate::{IDBSectionCompression, IDBString}; use anyhow::{anyhow, ensure, Result}; use serde::{Deserialize, Serialize}; -use std::collections::HashSet; + use std::fmt::Debug; use std::io::{BufReader, Read, Write}; use std::num::NonZeroU8; use super::function::{CCModel, CCPtrSize, CallingConvention}; -use super::r#enum::Enum; -use super::r#struct::Struct; -use super::union::Union; -use super::{Type, Typedef}; // TODO migrate this to flags pub const TIL_SECTION_MAGIC: &[u8; 6] = b"IDATIL"; #[derive(Debug, Clone)] pub struct TILSection { + pub header: TILSectionHeader, + pub symbols: Vec, + pub types: Vec, + pub macros: Option>, +} + +#[derive(Debug, Clone)] +pub(crate) struct TILSectionRaw { + pub header: TILSectionHeader, + pub symbols: Vec, + pub types: Vec, + pub macros: Option>, +} + +#[derive(Debug, Clone)] +pub struct TILSectionHeader { pub format: u32, /// short file name (without path and extension) - pub title: Vec, + pub description: IDBString, pub flags: TILSectionFlags, // TODO unclear what exacly dependency is for /// module required - pub dependency: Option>, + pub dependencies: Vec, /// the compiler used to generated types pub compiler_id: Compiler, /// default calling convention @@ -38,16 +50,13 @@ pub struct TILSection { //pub cc: CallingConvention, //pub cm: CCPtrSize, pub def_align: Option, - pub symbols: Vec, // TODO create a struct for ordinal aliases pub type_ordinal_alias: Option>, - pub types: Vec, pub size_int: NonZeroU8, pub size_bool: NonZeroU8, pub size_enum: Option, pub extended_sizeof_info: Option, pub size_long_double: Option, - pub macros: Option>, pub is_universal: bool, } @@ -59,11 +68,11 @@ pub struct TILSectionExtendedSizeofInfo { } #[derive(Debug, Clone)] -pub struct TILSectionHeader { +pub struct TILSectionHeaderRaw { pub format: u32, pub flags: TILSectionFlags, - pub title: Vec, pub description: Vec, + pub dependencies: Vec, pub compiler_id: u8, pub cm: u8, pub size_enum: Option, @@ -91,11 +100,7 @@ pub struct TILSectionHeader2 { pub def_align: u8, } -impl TILSection { - pub fn parse(mut input: impl IdaGenericBufUnpack) -> Result { - Self::read_inner(&mut input) - } - +impl TILSectionRaw { pub(crate) fn read( input: &mut impl IdaGenericBufUnpack, compress: IDBSectionCompression, @@ -103,63 +108,75 @@ impl TILSection { match compress { IDBSectionCompression::None => Self::read_inner(input), IDBSectionCompression::Zlib => { - let mut input = BufReader::new(flate2::read::ZlibDecoder::new(input)); + let mut input = + BufReader::new(flate2::bufread::ZlibDecoder::new(input)); Self::read_inner(&mut input) } } } fn read_inner(input: &mut impl IdaGenericBufUnpack) -> Result { - let header = Self::read_header(&mut *input)?; - let symbols = Self::read_bucket(&mut *input, &header, None, None)?; + let header_raw = Self::read_header(&mut *input)?; + + // TODO verify that is always false? + let _mod = header_raw.flags.is_mod(); + let _uni = header_raw.flags.is_universal(); + let _ord = header_raw.flags.has_ordinal(); + let _ali = header_raw.flags.has_type_aliases(); + let _stm = header_raw.flags.has_extra_stream(); + + let cc = CallingConvention::from_cm_raw(header_raw.cm)?; + let cn = CCPtrSize::from_cm_raw(header_raw.cm, header_raw.size_int); + let cm = CCModel::from_cm_raw(header_raw.cm); + + let dependencies = if !header_raw.dependencies.is_empty() { + header_raw + .dependencies + .split(|x| *x == b',') + .map(<[_]>::to_vec) + .map(IDBString::new) + .collect() + } else { + vec![] + }; + let mut header = TILSectionHeader { + format: header_raw.format, + description: IDBString::new(header_raw.description), + flags: header_raw.flags, + dependencies, + compiler_id: Compiler::from_value(header_raw.compiler_id), + cc, + cn, + cm, + def_align: header_raw.def_align, + size_long_double: header_raw.size_long_double, + is_universal: header_raw.flags.is_universal(), + size_bool: header_raw.size_bool, + size_int: header_raw.size_int, + size_enum: header_raw.size_enum, + extended_sizeof_info: header_raw.extended_sizeof_info, + type_ordinal_alias: None, + }; + + let symbols = Self::read_bucket(&mut *input, &header, None)?; // TODO create an ordinal -> type mapping, to make sure the ordinals are not duplicated // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x42e292 let (next_ordinal, type_ordinal_alias) = Self::read_next_ordinal_and_alias(&mut *input, &header)?; - let types = Self::read_bucket( - &mut *input, - &header, - next_ordinal, - type_ordinal_alias.as_deref(), - )?; + header.type_ordinal_alias = type_ordinal_alias; + let types = Self::read_bucket(&mut *input, &header, next_ordinal)?; let macros = header .flags .has_macro_table() .then(|| Self::read_macros(&mut *input, &header)) .transpose()?; - // TODO verify that is always false? - let _mod = header.flags.is_mod(); - let _uni = header.flags.is_universal(); - let _ord = header.flags.has_ordinal(); - let _ali = header.flags.has_type_aliases(); - let _stm = header.flags.has_extra_stream(); - - let cc = CallingConvention::from_cm_raw(header.cm)?; - let cn = CCPtrSize::from_cm_raw(header.cm, header.size_int); - let cm = CCModel::from_cm_raw(header.cm); - - Ok(TILSection { - format: header.format, - title: header.title, - flags: header.flags, - dependency: header.description.is_empty().then_some(header.description), - compiler_id: Compiler::from_value(header.compiler_id), - cc, - cn, - cm, - def_align: header.def_align, - size_long_double: header.size_long_double, - is_universal: header.flags.is_universal(), - size_bool: header.size_bool, - size_int: header.size_int, - size_enum: header.size_enum, - extended_sizeof_info: header.extended_sizeof_info, + Ok(Self { symbols, - type_ordinal_alias, types, macros, + header, }) } @@ -215,13 +232,17 @@ impl TILSection { Ok((Some(next_ord), Some(ordinals))) } - fn read_header(input: &mut impl IdaGenericUnpack) -> Result { + fn read_header( + input: &mut impl IdaGenericUnpack, + ) -> Result { // TODO this break a few files let signature: [u8; 6] = bincode::deserialize_from(&mut *input)?; ensure!(signature == *TIL_SECTION_MAGIC, "Invalid TIL Signature"); // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x431eb5 let (format, flags) = match input.read_u32()? { - format @ 0x13.. => return Err(anyhow!("Invalid TIL format {format}")), + format @ 0x13.. => { + return Err(anyhow!("Invalid TIL format {format}")) + } // read the flag after the format format @ 0x10..=0x12 => { let flags = TILSectionFlags::new(input.read_u32()?)?; @@ -236,24 +257,25 @@ impl TILSection { flags, }; - let title = input.read_bytes_len_u8()?; - let mut description = input.read_bytes_len_u8()?; + let description = input.read_bytes_len_u8()?; + let mut dependencies = input.read_bytes_len_u8()?; // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x431f64 // remove the "_arm" from the description const MACOS_ARM_EXCEPTION: &[u8] = b"macosx_arm"; - if let Some(pos) = description + if let Some(pos) = dependencies .windows(MACOS_ARM_EXCEPTION.len()) .position(|window| window == MACOS_ARM_EXCEPTION) { - description = description[..pos + 6] + dependencies = dependencies[..pos + 6] .iter() - .chain(&description[pos + MACOS_ARM_EXCEPTION.len()..]) + .chain(&dependencies[pos + MACOS_ARM_EXCEPTION.len()..]) .copied() .collect::>(); } - let header2: TILSectionHeader2 = bincode::deserialize_from(&mut *input)?; + let header2: TILSectionHeader2 = + bincode::deserialize_from(&mut *input)?; // TODO header2.cm default to 0x13 // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x42ef86 @@ -267,8 +289,10 @@ impl TILSection { let ls = input.read_u8()?; let lls = input.read_u8()?; Ok(TILSectionExtendedSizeofInfo { - size_short: NonZeroU8::new(ss).ok_or_else(|| anyhow!("Invalid short size"))?, - size_long: NonZeroU8::new(ls).ok_or_else(|| anyhow!("Invalid long size"))?, + size_short: NonZeroU8::new(ss) + .ok_or_else(|| anyhow!("Invalid short size"))?, + size_long: NonZeroU8::new(ls) + .ok_or_else(|| anyhow!("Invalid long size"))?, size_long_long: NonZeroU8::new(lls) .ok_or_else(|| anyhow!("Invalid long long size"))?, }) @@ -279,19 +303,20 @@ impl TILSection { let size_long_double = header1 .flags .has_size_long_double() - .then(|| bincode::deserialize_from::<_, u8>(&mut *input)) + .then(|| input.read_u8()) .transpose()? .map(|size| size.try_into()) .transpose()?; - let def_align = - (header2.def_align != 0).then(|| NonZeroU8::new(1 << (header2.def_align - 1)).unwrap()); + let def_align = (header2.def_align != 0) + .then(|| NonZeroU8::new(1 << (header2.def_align - 1)).unwrap()); - Ok(TILSectionHeader { + Ok(TILSectionHeaderRaw { format: header1.format, flags: header1.flags, - title, description, + dependencies, compiler_id: header2.compiler_id, + // TODO panic if None? size_enum: header2.size_enum.try_into().ok(), size_int: header2.size_int.try_into()?, size_bool: header2.size_bool.try_into()?, @@ -302,22 +327,184 @@ impl TILSection { }) } - pub fn decompress( + fn read_bucket_header( + input: &mut impl IdaGenericUnpack, + ) -> Result<(u32, u32)> { + let ndefs = bincode::deserialize_from(&mut *input)?; + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x42e3e0 + //ensure!(ndefs < 0x55555555); + let len = bincode::deserialize_from(&mut *input)?; + Ok((ndefs, len)) + } + + fn read_bucket_zip_header( input: &mut impl IdaGenericUnpack, + ) -> Result<(u32, u32, u32)> { + let (ndefs, len) = Self::read_bucket_header(&mut *input)?; + let compressed_len = bincode::deserialize_from(&mut *input)?; + Ok((ndefs, len, compressed_len)) + } + + fn read_bucket( + input: &mut impl IdaGenericBufUnpack, + header: &TILSectionHeader, + next_ordinal: Option, + ) -> Result> { + if header.flags.is_zip() { + Self::read_bucket_zip(&mut *input, header, next_ordinal) + } else { + Self::read_bucket_normal(&mut *input, header, next_ordinal) + } + } + + fn read_bucket_normal( + input: &mut impl IdaGenericBufUnpack, + header: &TILSectionHeader, + next_ordinal: Option, + ) -> Result> { + let (ndefs, len) = Self::read_bucket_header(&mut *input)?; + Self::read_bucket_inner(&mut *input, header, ndefs, len, next_ordinal) + } + + fn read_bucket_zip( + input: &mut impl IdaGenericBufUnpack, + header: &TILSectionHeader, + next_ordinal: Option, + ) -> Result> { + let (ndefs, len, compressed_len) = + Self::read_bucket_zip_header(&mut *input)?; + // make sure the decompressor don't read out-of-bounds + let mut compressed_input = input.take(compressed_len.into()); + let mut inflate = BufReader::new(flate2::bufread::ZlibDecoder::new( + &mut compressed_input, + )); + // make sure only the defined size is decompressed + let type_info = Self::read_bucket_inner( + &mut inflate, + header, + ndefs, + len, + next_ordinal, + )?; + #[cfg(feature = "restrictive")] + ensure!( + compressed_input.limit() == 0, + "TypeBucket compressed data is smaller then expected" + ); + Ok(type_info) + } + + fn read_bucket_inner( + input: &mut impl IdaGenericBufUnpack, + header: &TILSectionHeader, + ndefs: u32, + len: u32, + next_ord: Option, + ) -> Result> { + if let Some(next_ord) = next_ord { + let alias: u32 = header + .type_ordinal_alias + .as_ref() + .map(|x| x.len()) + .unwrap_or(0) + .try_into() + .unwrap(); + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x42e3e0 + ensure!(ndefs + alias + 1 <= next_ord); + } + let mut input = input.take(len.into()); + let type_info_raw: Vec<_> = (0..ndefs) + .map(|i| TILTypeInfoRaw::read(&mut input, header, i == ndefs - 1)) + .collect::>()?; + #[cfg(feature = "restrictive")] + ensure!( + input.limit() == 0, + "TypeBucket total data is smaller then expected" + ); + Ok(type_info_raw) + } + + fn read_macros( + input: &mut impl IdaGenericBufUnpack, + header: &TILSectionHeader, + ) -> Result> { + if header.flags.is_zip() { + Self::read_macros_zip(&mut *input) + } else { + Self::read_macros_normal(&mut *input) + } + } + + fn read_macros_normal( + input: &mut impl IdaGenericBufUnpack, + ) -> Result> { + let (ndefs, len) = Self::read_bucket_header(&mut *input)?; + let mut input = input.take(len.into()); + let type_info = (0..ndefs) + .map(|_| TILMacro::read(&mut input)) + .collect::>()?; + #[cfg(feature = "restrictive")] + ensure!( + input.limit() == 0, + "TypeBucket macro total data is smaller then expected" + ); + Ok(type_info) + } + + fn read_macros_zip( + input: &mut impl IdaGenericBufUnpack, + ) -> Result> { + let (ndefs, len, compressed_len) = + Self::read_bucket_zip_header(&mut *input)?; + // make sure the decompressor don't read out-of-bounds + let mut compressed_input = input.take(compressed_len.into()); + let inflate = BufReader::new(flate2::bufread::ZlibDecoder::new( + &mut compressed_input, + )); + // make sure only the defined size is decompressed + let mut decompressed_input = inflate.take(len.into()); + let type_info = (0..ndefs.try_into().unwrap()) + .map(|_| TILMacro::read(&mut decompressed_input)) + .collect::, _>>()?; + // make sure the input was fully consumed + #[cfg(feature = "restrictive")] + ensure!( + decompressed_input.limit() == 0, + "TypeBucket macros data is smaller then expected" + ); + #[cfg(feature = "restrictive")] + ensure!( + compressed_input.limit() == 0, + "TypeBucket macros compressed data is smaller then expected" + ); + Ok(type_info) + } + // TODO replace usize with a IDTypeIdx type +} + +impl TILSection { + pub fn decompress( + input: &mut impl IdaGenericBufUnpack, output: &mut impl Write, compress: IDBSectionCompression, ) -> Result<()> { match compress { IDBSectionCompression::Zlib => { - let mut input = flate2::read::ZlibDecoder::new(input); + let mut input = + BufReader::new(flate2::bufread::ZlibDecoder::new(input)); Self::decompress_inner(&mut input, output) } - IDBSectionCompression::None => Self::decompress_inner(input, output), + IDBSectionCompression::None => { + Self::decompress_inner(input, output) + } } } - fn decompress_inner(input: &mut impl IdaGenericUnpack, output: &mut impl Write) -> Result<()> { - let mut header = Self::read_header(&mut *input)?; + fn decompress_inner( + input: &mut impl IdaGenericBufUnpack, + output: &mut impl Write, + ) -> Result<()> { + let mut header = TILSectionRaw::read_header(&mut *input)?; let og_flags = header.flags; // disable the zip flag header.flags.set_zip(false); @@ -346,8 +533,8 @@ impl TILSection { def_align, }; bincode::serialize_into(&mut *output, &header1)?; - crate::write_string_len_u8(&mut *output, &header.title)?; crate::write_string_len_u8(&mut *output, &header.description)?; + crate::write_string_len_u8(&mut *output, &header.dependencies)?; bincode::serialize_into(&mut *output, &header2)?; if header.flags.have_extended_sizeof_info() { let sizes = header.extended_sizeof_info.unwrap(); @@ -362,7 +549,10 @@ impl TILSection { } if header.flags.has_size_long_double() { - bincode::serialize_into(&mut *output, &header.size_long_double.unwrap().get())?; + bincode::serialize_into( + &mut *output, + &header.size_long_double.unwrap().get(), + )?; } // if not zipped, just copy the rest of the data, there is no possible zip @@ -378,7 +568,7 @@ impl TILSection { .flags .has_ordinal() .then(|| -> Result { - let result: u32 = bincode::deserialize_from(&mut *input)?; + let result = input.read_u32()?; bincode::serialize_into(&mut *output, &result)?; Ok(result) }) @@ -395,13 +585,38 @@ impl TILSection { Ok(()) } - // TODO replace usize with a IDTypeIdx type + #[allow(dead_code)] + fn decompress_bucket( + input: &mut impl IdaGenericBufUnpack, + output: &mut impl std::io::Write, + ) -> Result<()> { + let (ndefs, len, compressed_len) = + TILSectionRaw::read_bucket_zip_header(&mut *input)?; + bincode::serialize_into(&mut *output, &TILBucketRaw { len, ndefs })?; + // write the decompressed data + let mut compressed_input = input.take(compressed_len.into()); + let inflate = flate2::bufread::ZlibDecoder::new(&mut compressed_input); + let mut decompressed_input = inflate.take(len.into()); + std::io::copy(&mut decompressed_input, output)?; + #[cfg(feature = "restrictive")] + ensure!( + decompressed_input.limit() == 0, + "TypeBucket data is smaller then expected" + ); + #[cfg(feature = "restrictive")] + ensure!( + compressed_input.limit() == 0, + "TypeBucket compressed data is smaller then expected" + ); + Ok(()) + } + pub fn get_type_by_idx(&self, idx: usize) -> &TILTypeInfo { &self.types[idx] } pub fn get_name_idx(&self, name: &[u8]) -> Option { - self.types.iter().position(|ty| ty.name.as_slice() == name) + self.types.iter().position(|ty| ty.name.as_bytes() == name) } pub fn get_name(&self, name: &[u8]) -> Option<&TILTypeInfo> { @@ -410,7 +625,7 @@ impl TILSection { pub fn get_ord_idx(&self, id0_ord: Id0TilOrd) -> Option { // first search the ordinal alias - if let Some(ordinals) = &self.type_ordinal_alias { + if let Some(ordinals) = &self.header.type_ordinal_alias { // it's unclear what is the first value if let Some((_src, dst)) = ordinals .iter() @@ -428,21 +643,24 @@ impl TILSection { } pub fn sizeof_short(&self) -> NonZeroU8 { - self.extended_sizeof_info + self.header + .extended_sizeof_info .as_ref() .map(|x| x.size_short) .unwrap_or(2.try_into().unwrap()) } pub fn sizeof_long(&self) -> NonZeroU8 { - self.extended_sizeof_info + self.header + .extended_sizeof_info .as_ref() .map(|x| x.size_long) .unwrap_or(4.try_into().unwrap()) } pub fn sizeof_long_long(&self) -> NonZeroU8 { - self.extended_sizeof_info + self.header + .extended_sizeof_info .as_ref() .map(|x| x.size_long_long) .unwrap_or(8.try_into().unwrap()) @@ -450,106 +668,68 @@ impl TILSection { // TODO check this impl in InnerRef pub fn addr_size(&self) -> NonZeroU8 { - self.cn + self.header + .cn .map(CCPtrSize::near_bytes) .unwrap_or(NonZeroU8::new(4).unwrap()) } +} - // TODO stub implementation - pub fn type_size_bytes(&self, type_idx: Option, ty: &Type) -> Result { - let mut map = type_idx.into_iter().collect(); - self.inner_type_size_bytes(ty, &mut map) - } - - // map is used to avoid loops - fn inner_type_size_bytes(&self, ty: &Type, map: &mut HashSet) -> Result { - Ok(match &ty.type_variant { - TypeVariant::Basic(Basic::Char) => 1, - // TODO what is the SegReg size? - TypeVariant::Basic(Basic::SegReg) => 1, - TypeVariant::Basic(Basic::Void) => 0, - TypeVariant::Basic(Basic::Unknown { bytes }) => (*bytes).into(), - TypeVariant::Basic(Basic::Bool) => self.size_bool.get().into(), - TypeVariant::Basic(Basic::Short { .. }) => self.sizeof_short().get().into(), - TypeVariant::Basic(Basic::Int { .. }) => self.size_int.get().into(), - TypeVariant::Basic(Basic::Long { .. }) => self.sizeof_long().get().into(), - TypeVariant::Basic(Basic::LongLong { .. }) => self.sizeof_long_long().get().into(), - TypeVariant::Basic(Basic::IntSized { bytes, .. }) => bytes.get().into(), - TypeVariant::Basic(Basic::BoolSized { bytes }) => bytes.get().into(), - // TODO what's the long double default size if it's not defined? - TypeVariant::Basic(Basic::LongDouble) => { - self.size_long_double.map(|x| x.get()).unwrap_or(8).into() - } - TypeVariant::Basic(Basic::Float { bytes }) => bytes.get().into(), - // TODO is pointer always near? Do pointer size default to 4? - TypeVariant::Pointer(_) => self.addr_size().get().into(), - TypeVariant::Function(_) => 0, // function type dont have a size, only a pointer to it - TypeVariant::Array(array) => { - let element_len = self.inner_type_size_bytes(&array.elem_type, map)?; - element_len * array.nelem as u64 - } - TypeVariant::Typedef(Typedef::Name(name)) => { - let inner_type_idx = self.get_name_idx(name).ok_or_else(|| { - anyhow!( - "Unable to find typedef by name: {}", - String::from_utf8_lossy(name) - ) - })?; - if !map.insert(inner_type_idx) { - return Err(anyhow!( - "Loop detected, type inside itself using named typedef " - )); - } - let inner_type = self.get_type_by_idx(inner_type_idx); - let result = self.inner_type_size_bytes(&inner_type.tinfo, map)?; - map.remove(&inner_type_idx); - result - } - TypeVariant::Typedef(Typedef::Ordinal(ord)) => { - let inner_type_idx = self - .get_ord_idx(crate::id0::Id0TilOrd { ord: (*ord).into() }) - .ok_or_else(|| anyhow!("Unable to find typedef by ord: {ord}",))?; - if !map.insert(inner_type_idx) { - return Err(anyhow!( - "Loop detected, type inside itself using ordinal typedef" - )); - } - let inner_type = self.get_type_by_idx(inner_type_idx); - let result = self.inner_type_size_bytes(&inner_type.tinfo, map)?; - map.remove(&inner_type_idx); - result - } - TypeVariant::StructRef(ref_type) - | TypeVariant::UnionRef(ref_type) - | TypeVariant::EnumRef(ref_type) => self.inner_type_size_bytes(ref_type, map)?, - TypeVariant::Struct(Struct { members, .. }) => { - let mut sum = 0u64; - // TODO default alignment, seems like default alignemnt is the field size - let align: u64 = 1; - for member in members { - let field_size = self.inner_type_size_bytes(&member.member_type, map)?; - let align_diff = sum % align; - if align_diff != 0 { - sum += align - align_diff; - } - sum += field_size; - } - sum - } - TypeVariant::Union(Union { members, .. }) => { - let mut max = 0; - for (_, member) in members { - let size = self.inner_type_size_bytes(member, map)?; - max = max.max(size); - } - max - } - TypeVariant::Enum(Enum { storage_size, .. }) => storage_size - .or(self.size_enum) - .map(|x| x.get()) - .unwrap_or(4) - .into(), - TypeVariant::Bitfield(bitfield) => bitfield.width.into(), +impl TILSection { + pub fn read( + input: &mut impl IdaGenericBufUnpack, + compress: IDBSectionCompression, + ) -> Result { + let type_info_raw = TILSectionRaw::read(input, compress)?; + // TODO check for dups? + let type_by_name = type_info_raw + .types + .iter() + .enumerate() + .map(|(i, til)| (til.name.clone().into_inner(), i)) + .collect(); + let type_by_ord = type_info_raw + .types + .iter() + .enumerate() + .map(|(i, til)| (til.ordinal, i)) + .collect(); + let symbols = type_info_raw + .symbols + .into_iter() + .map(|ty| { + TILTypeInfo::new( + &type_info_raw.header, + &type_by_name, + &type_by_ord, + ty.name, + ty.ordinal, + ty.tinfo, + ty.fields, + ) + }) + .collect::>()?; + let types = type_info_raw + .types + .into_iter() + .map(|ty| { + TILTypeInfo::new( + &type_info_raw.header, + &type_by_name, + &type_by_ord, + ty.name, + ty.ordinal, + ty.tinfo, + ty.fields, + ) + }) + .collect::>()?; + + Ok(Self { + header: type_info_raw.header, + symbols, + types, + macros: type_info_raw.macros, }) } } @@ -559,6 +739,7 @@ impl TILSection { pub struct TILSectionFlags(pub(crate) u16); impl TILSectionFlags { fn new(value: u32) -> Result { + #[cfg(feature = "restrictive")] ensure!( value < (flag::til::TIL_SLD as u32) << 1, "Unknown flag values for TILSectionFlags" @@ -618,165 +799,3 @@ pub(crate) struct TILBucketRaw { ndefs: u32, len: u32, } - -impl TILSection { - fn read_bucket_header(input: &mut impl IdaGenericUnpack) -> Result<(u32, u32)> { - let ndefs = bincode::deserialize_from(&mut *input)?; - // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x42e3e0 - //ensure!(ndefs < 0x55555555); - let len = bincode::deserialize_from(&mut *input)?; - Ok((ndefs, len)) - } - - fn read_bucket_zip_header(input: &mut impl IdaGenericUnpack) -> Result<(u32, u32, u32)> { - let (ndefs, len) = Self::read_bucket_header(&mut *input)?; - let compressed_len = bincode::deserialize_from(&mut *input)?; - Ok((ndefs, len, compressed_len)) - } - - fn read_bucket( - input: &mut impl IdaGenericBufUnpack, - header: &TILSectionHeader, - next_ordinal: Option, - ordinal_alias: Option<&[(u32, u32)]>, - ) -> Result> { - if header.flags.is_zip() { - Self::read_bucket_zip(&mut *input, header, next_ordinal, ordinal_alias) - } else { - Self::read_bucket_normal(&mut *input, header, next_ordinal, ordinal_alias) - } - } - - fn read_bucket_normal( - input: &mut impl IdaGenericBufUnpack, - header: &TILSectionHeader, - next_ordinal: Option, - ordinal_alias: Option<&[(u32, u32)]>, - ) -> Result> { - let (ndefs, len) = Self::read_bucket_header(&mut *input)?; - Self::read_bucket_inner(&mut *input, header, ndefs, len, next_ordinal, ordinal_alias) - } - - fn read_bucket_zip( - input: &mut impl IdaGenericUnpack, - header: &TILSectionHeader, - next_ordinal: Option, - ordinal_alias: Option<&[(u32, u32)]>, - ) -> Result> { - let (ndefs, len, compressed_len) = Self::read_bucket_zip_header(&mut *input)?; - // make sure the decompressor don't read out-of-bounds - let mut compressed_input = input.take(compressed_len.into()); - let mut inflate = BufReader::new(flate2::read::ZlibDecoder::new(&mut compressed_input)); - // make sure only the defined size is decompressed - let type_info = Self::read_bucket_inner( - &mut inflate, - header, - ndefs, - len, - next_ordinal, - ordinal_alias, - )?; - ensure!( - compressed_input.limit() == 0, - "TypeBucket compressed data is smaller then expected" - ); - Ok(type_info) - } - - fn read_bucket_inner( - input: &mut impl IdaGenericBufUnpack, - header: &TILSectionHeader, - ndefs: u32, - len: u32, - next_ord: Option, - ordinal_alias: Option<&[(u32, u32)]>, - ) -> Result> { - if let Some(next_ord) = next_ord { - let alias: u32 = ordinal_alias - .map(|x| x.len()) - .unwrap_or(0) - .try_into() - .unwrap(); - // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x42e3e0 - ensure!(ndefs + alias + 1 <= next_ord); - } - let mut input = input.take(len.into()); - let type_info = (0..ndefs) - .map(|i| TILTypeInfo::read(&mut input, header, i == ndefs - 1)) - .collect::>()?; - ensure!( - input.limit() == 0, - "TypeBucket total data is smaller then expected" - ); - Ok(type_info) - } - - fn read_macros( - input: &mut impl IdaGenericBufUnpack, - header: &TILSectionHeader, - ) -> Result> { - if header.flags.is_zip() { - Self::read_macros_zip(&mut *input) - } else { - Self::read_macros_normal(&mut *input) - } - } - - fn read_macros_normal(input: &mut impl IdaGenericBufUnpack) -> Result> { - let (ndefs, len) = Self::read_bucket_header(&mut *input)?; - let mut input = input.take(len.into()); - let type_info = (0..ndefs) - .map(|_| TILMacro::read(&mut input)) - .collect::>()?; - ensure!( - input.limit() == 0, - "TypeBucket macro total data is smaller then expected" - ); - Ok(type_info) - } - - fn read_macros_zip(input: &mut impl IdaGenericUnpack) -> Result> { - let (ndefs, len, compressed_len) = Self::read_bucket_zip_header(&mut *input)?; - // make sure the decompressor don't read out-of-bounds - let mut compressed_input = input.take(compressed_len.into()); - let inflate = BufReader::new(flate2::read::ZlibDecoder::new(&mut compressed_input)); - // make sure only the defined size is decompressed - let mut decompressed_input = inflate.take(len.into()); - let type_info = (0..ndefs.try_into().unwrap()) - .map(|_| TILMacro::read(&mut decompressed_input)) - .collect::, _>>()?; - // make sure the input was fully consumed - ensure!( - decompressed_input.limit() == 0, - "TypeBucket macros data is smaller then expected" - ); - ensure!( - compressed_input.limit() == 0, - "TypeBucket macros compressed data is smaller then expected" - ); - Ok(type_info) - } - - #[allow(dead_code)] - fn decompress_bucket( - input: &mut impl IdaGenericUnpack, - output: &mut impl std::io::Write, - ) -> Result<()> { - let (ndefs, len, compressed_len) = Self::read_bucket_zip_header(&mut *input)?; - bincode::serialize_into(&mut *output, &TILBucketRaw { len, ndefs })?; - // write the decompressed data - let mut compressed_input = input.take(compressed_len.into()); - let inflate = flate2::read::ZlibDecoder::new(&mut compressed_input); - let mut decompressed_input = inflate.take(len.into()); - std::io::copy(&mut decompressed_input, output)?; - ensure!( - decompressed_input.limit() == 0, - "TypeBucket data is smaller then expected" - ); - ensure!( - compressed_input.limit() == 0, - "TypeBucket compressed data is smaller then expected" - ); - Ok(()) - } -} diff --git a/src/til/size_calculator.rs b/src/til/size_calculator.rs new file mode 100644 index 0000000..e9bd53f --- /dev/null +++ b/src/til/size_calculator.rs @@ -0,0 +1,245 @@ +use std::collections::{HashMap, HashSet}; +use std::num::NonZeroU8; + +use crate::til::bitfield::Bitfield; + +use super::r#enum::Enum; +use super::r#struct::StructMember; +use super::section::TILSection; +use super::union::Union; +use super::{Basic, Type, TypeVariant, Typeref, TyperefValue}; + +pub struct TILTypeSizeSolver<'a> { + section: &'a TILSection, + solved: HashMap, + // HACK used to avoid infinte lopping during recursive solving + solving: HashSet, +} + +impl<'a> TILTypeSizeSolver<'a> { + pub fn new(section: &'a TILSection) -> Self { + Self { + section, + solved: Default::default(), + solving: Default::default(), + } + } + + // TODO make a type for type_idx and symbol_idx, accept both here + /// NOTE that type_idx need to be specified if not a symbol + pub fn type_size_bytes( + &mut self, + type_idx: Option, + ty: &Type, + ) -> Option { + assert!(self.solving.is_empty()); + if let Some(idx) = type_idx { + // if cached return it + if let Some(solved) = self.cached(idx) { + return Some(solved); + } + self.solving.insert(idx); + } + let result = self.inner_type_size_bytes(ty); + if let Some(idx) = type_idx { + assert!(self.solving.remove(&idx)); + } + assert!(self.solving.is_empty()); + if let (Some(idx), Some(result)) = (type_idx, result) { + assert!(self.solved.insert(idx, result).is_none()); + } + result + } + + fn cached(&self, idx: usize) -> Option { + self.solved.get(&idx).copied() + } + + fn inner_type_size_bytes(&mut self, ty: &Type) -> Option { + Some(match &ty.type_variant { + TypeVariant::Basic(Basic::Char) => 1, + // TODO what is the SegReg size? + TypeVariant::Basic(Basic::SegReg) => 1, + TypeVariant::Basic(Basic::Void) => 0, + TypeVariant::Basic(Basic::Unknown { bytes }) => (*bytes).into(), + TypeVariant::Basic(Basic::Bool) => { + self.section.header.size_bool.get().into() + } + TypeVariant::Basic(Basic::Short { .. }) => { + self.section.sizeof_short().get().into() + } + TypeVariant::Basic(Basic::Int { .. }) => { + self.section.header.size_int.get().into() + } + TypeVariant::Basic(Basic::Long { .. }) => { + self.section.sizeof_long().get().into() + } + TypeVariant::Basic(Basic::LongLong { .. }) => { + self.section.sizeof_long_long().get().into() + } + TypeVariant::Basic(Basic::IntSized { bytes, .. }) => { + bytes.get().into() + } + TypeVariant::Basic(Basic::BoolSized { bytes }) => { + bytes.get().into() + } + // TODO what's the long double default size if it's not defined? + TypeVariant::Basic(Basic::LongDouble) => self + .section + .header + .size_long_double + .map(|x| x.get()) + .unwrap_or(8) + .into(), + TypeVariant::Basic(Basic::Float { bytes }) => bytes.get().into(), + // TODO is pointer always near? Do pointer size default to 4? + TypeVariant::Pointer(_) => self.section.addr_size().get().into(), + TypeVariant::Function(_) => 0, // function type dont have a size, only a pointer to it + TypeVariant::Array(array) => { + let element_len = + self.inner_type_size_bytes(&array.elem_type)?; + let nelem = array.nelem.map(|x| x.get()).unwrap_or(0) as u64; + element_len * nelem + } + TypeVariant::Typeref(ref_type) => self.solve_typedef(ref_type)?, + TypeVariant::Struct(til_struct) => { + let mut sum = 0u64; + // TODO default alignment, seems like default alignemnt is the field size + let align: u64 = 1; + let mut members = &til_struct.members[..]; + loop { + let Some(first_member) = members.first() else { + // no more members + break; + }; + let field_size = + match &first_member.member_type.type_variant { + // if bit-field, condensate one or more to create a byte-field + TypeVariant::Bitfield(bitfield) => { + members = &members[1..]; + // NOTE it skips 0..n members + condensate_bitfields_from_struct( + *bitfield, + &mut members, + ) + .get() + .into() + } + // get the inner type size + _ => { + let first = &members[0]; + members = &members[1..]; + // next member + self.inner_type_size_bytes(&first.member_type)? + } + }; + if !til_struct.is_unaligned { + let align = match ( + first_member.alignment.map(|x| x.get().into()), + self.alignemnt( + &first_member.member_type, + field_size, + ), + ) { + (Some(a), Some(b)) => a.max(b), + (Some(a), None) | (None, Some(a)) => a, + (None, None) => align, + }; + let align = align.max(1); + let align_diff = sum % align; + if align_diff != 0 { + sum += align - align_diff; + } + } + sum += field_size; + } + sum + } + TypeVariant::Union(Union { members, .. }) => { + let mut max = 0; + for (_, member) in members { + let size = self.inner_type_size_bytes(member)?; + max = max.max(size); + } + max + } + TypeVariant::Enum(Enum { storage_size, .. }) => storage_size + .or(self.section.header.size_enum) + .map(|x| x.get()) + .unwrap_or(4) + .into(), + TypeVariant::Bitfield(bitfield) => bitfield.width.into(), + }) + } + + fn solve_typedef(&mut self, typedef: &Typeref) -> Option { + let TyperefValue::Ref(idx) = &typedef.typeref_value else { + return None; + }; + // if cached return it + if let Some(solved) = self.cached(*idx) { + return Some(solved); + } + if !self.solving.insert(*idx) { + return None; + } + let inner_type = self.section.get_type_by_idx(*idx); + let result = self.inner_type_size_bytes(&inner_type.tinfo); + self.solving.remove(&idx); + if let Some(result) = result { + assert!(self.solved.insert(*idx, result).is_none()); + } + result + } + + fn alignemnt(&mut self, til: &Type, til_size: u64) -> Option { + match &til.type_variant { + // TODO basic types have a inherited alignment? + TypeVariant::Basic(_) + | TypeVariant::Enum(_) + | TypeVariant::Pointer(_) => Some(til_size), + TypeVariant::Array(array) => { + let size = self.inner_type_size_bytes(&array.elem_type); + self.alignemnt(&array.elem_type, size.unwrap_or(1)) + } + TypeVariant::Typeref(ty) => { + let TyperefValue::Ref(idx) = &ty.typeref_value else { + return None; + }; + let ty = &self.section.types[*idx].tinfo; + let size = self.inner_type_size_bytes(ty).unwrap_or(1); + self.alignemnt(ty, size) + } + _ => None, + } + } +} + +fn condensate_bitfields_from_struct( + first_field: Bitfield, + rest: &mut &[StructMember], +) -> NonZeroU8 { + let field_bytes = first_field.nbytes; + let field_bits: u16 = u16::from(first_field.nbytes.get()) * 8; + let mut condensated_bits = first_field.width; + + loop { + let Some(TypeVariant::Bitfield(member)) = + rest.first().map(|x| &x.member_type.type_variant) + else { + // no more bit-fields to condensate + break; + }; + // condensate the bit-field into the byte-field + condensated_bits += member.width; + // check if this bit start the next field + if field_bytes != member.nbytes || condensated_bits > field_bits { + // NOTE this don't consume the current member + break; + } + + // advance to the next member + *rest = &rest[1..]; + } + field_bytes +} diff --git a/src/til/struct.rs b/src/til/struct.rs index 618508f..3cac211 100644 --- a/src/til/struct.rs +++ b/src/til/struct.rs @@ -1,12 +1,14 @@ -use std::num::{NonZeroU16, NonZeroU8}; +use std::collections::HashMap; +use std::num::NonZeroU8; use crate::ida_reader::IdaGenericBufUnpack; -use crate::til::section::TILSectionHeader; -use crate::til::{Type, TypeRaw, SDACL}; -use anyhow::{anyhow, Result}; -use num_enum::{FromPrimitive, IntoPrimitive}; +use crate::til::{Type, TypeRaw}; +use crate::IDBString; +use anyhow::{anyhow, ensure, Context, Result}; +use num_enum::{FromPrimitive, IntoPrimitive, TryFromPrimitive}; -use super::{StructModifierRaw, TypeVariantRaw}; +use super::section::TILSectionHeader; +use super::{TypeAttribute, TypeVariantRaw}; #[derive(Clone, Debug)] pub struct Struct { @@ -17,35 +19,45 @@ pub struct Struct { /// Gcc msstruct attribute pub is_msstruct: bool, /// C++ object, not simple pod type - pub is_cpp_obj: bool, + pub is_cppobj: bool, /// Virtual function table - pub is_vftable: bool, + pub is_vft: bool, + /// Unknown meaning, use at your own risk + pub is_uknown_8: bool, /// Alignment in bytes pub alignment: Option, - // TODO delete others, parse all values or return an error - /// other unparsed values from the type attribute - pub others: Option, } impl Struct { pub(crate) fn new( til: &TILSectionHeader, + type_by_name: &HashMap, usize>, + type_by_ord: &HashMap, value: StructRaw, - fields: &mut impl Iterator>, + fields: &mut impl Iterator>, ) -> Result { let members = value .members .into_iter() - .map(|member| StructMember::new(til, fields.next(), member, &mut *fields)) + .map(|member| { + StructMember::new( + til, + fields.next().flatten(), + type_by_name, + type_by_ord, + member, + &mut *fields, + ) + }) .collect::>()?; Ok(Struct { effective_alignment: value.effective_alignment, members, - is_unaligned: value.modifier.is_unaligned, - is_msstruct: value.modifier.is_msstruct, - is_cpp_obj: value.modifier.is_cpp_obj, - is_vftable: value.modifier.is_vftable, - alignment: value.modifier.alignment, - others: value.modifier.others, + is_unaligned: value.is_unaligned, + is_msstruct: value.is_msstruct, + is_cppobj: value.is_cppobj, + is_vft: value.is_vft, + is_uknown_8: value.is_unknown_8, + alignment: value.alignment, }) } } @@ -53,8 +65,20 @@ impl Struct { #[derive(Clone, Debug)] pub(crate) struct StructRaw { effective_alignment: Option, - modifier: StructModifierRaw, members: Vec, + + /// Unaligned struct + is_unaligned: bool, + /// Gcc msstruct attribute + is_msstruct: bool, + /// C++ object, not simple pod type + is_cppobj: bool, + /// Virtual function table + is_vft: bool, + // TODO unknown meaning + is_unknown_8: bool, + /// Alignment in bytes + alignment: Option, } impl StructRaw { @@ -67,91 +91,234 @@ impl StructRaw { // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x4803b4 // simple reference let ref_type = TypeRaw::read_ref(&mut *input, header)?; - let _taudt_bits = SDACL::read(&mut *input)?; - return Ok(TypeVariantRaw::StructRef(Box::new(ref_type))); + let _taudt_bits = input.read_sdacl()?; + let TypeVariantRaw::Typedef(ref_type) = ref_type.variant else { + return Err(anyhow!("StructRef Non Typedef")); + }; + return Ok(TypeVariantRaw::StructRef(ref_type)); }; // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x4808f9 let mem_cnt = n >> 3; // TODO what is effective_alignment and how it's diferent from Modifier alignment? let alpow = n & 7; - let effective_alignment = (alpow != 0).then(|| NonZeroU8::new(1 << (alpow - 1)).unwrap()); + let effective_alignment = + (alpow != 0).then(|| NonZeroU8::new(1 << (alpow - 1)).unwrap()); // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x459c97 - let taudt_bits = SDACL::read(&mut *input)?; + let mut alignment = None; + let mut is_unknown_8 = false; + let mut is_msstruct = false; + let mut is_unaligned = false; + let mut is_cppobj = false; + let mut is_vft = false; + let mut is_method = false; + let mut is_bitset2 = false; + if let Some(TypeAttribute { + tattr, + extended: _extended, + }) = input.read_sdacl()? + { + use crate::til::flag::tattr::*; + use crate::til::flag::tattr_field::*; + use crate::til::flag::tattr_udt::*; + + let align_raw = (tattr & MAX_DECL_ALIGN) as u8; + + // TODO WHY? + is_unknown_8 = align_raw & 0x8 != 0; + alignment = (align_raw & 0x7 != 0) + .then(|| NonZeroU8::new(1 << ((align_raw & 0x7) - 1)).unwrap()); + + is_msstruct = tattr & TAUDT_MSSTRUCT != 0; + is_unaligned = tattr & TAUDT_UNALIGNED != 0; + is_cppobj = tattr & TAUDT_CPPOBJ != 0; + is_vft = tattr & TAUDT_VFTABLE != 0; + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x478203 + // TODO using a field flag on the struct seems out-of-place + is_method = tattr & TAFLD_METHOD != 0; + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x47822d + // TODO this value can't be right, it defines the alignment! + is_bitset2 = align_raw & 0x4 != 0; + + const _ALL_FLAGS: u16 = MAX_DECL_ALIGN + | TAUDT_MSSTRUCT + | TAUDT_UNALIGNED + | TAUDT_CPPOBJ + | TAUDT_VFTABLE + | TAFLD_METHOD; + #[cfg(feature = "restrictive")] + ensure!( + tattr & !_ALL_FLAGS == 0, + "Invalid Struct taenum_bits {tattr:x}" + ); + #[cfg(feature = "restrictive")] + ensure!( + _extended.is_none(), + "Unable to parse extended attributes for struct" + ); + } + let members = (0..mem_cnt) - .map(|_| StructMemberRaw::read(&mut *input, header, taudt_bits.0 .0)) + .map(|i| { + StructMemberRaw::read( + &mut *input, + header, + is_method, + is_bitset2, + ) + .with_context(|| format!("Member {i}")) + }) .collect::>()?; - // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x46c4fc print_til_types_att - let modifier = StructModifierRaw::from_value(taudt_bits.0 .0); Ok(TypeVariantRaw::Struct(Self { effective_alignment, - modifier, members, + is_unaligned, + is_msstruct, + is_cppobj, + is_vft, + is_unknown_8, + alignment, })) } } #[derive(Clone, Debug)] pub struct StructMember { - pub name: Option>, + pub name: Option, pub member_type: Type, - pub sdacl: SDACL, pub att: Option, + + pub alignment: Option, + pub is_baseclass: bool, + pub is_unaligned: bool, + pub is_vft: bool, + pub is_method: bool, + pub is_unknown_8: bool, } impl StructMember { fn new( til: &TILSectionHeader, - name: Option>, + name: Option, + type_by_name: &HashMap, usize>, + type_by_ord: &HashMap, m: StructMemberRaw, - fields: &mut impl Iterator>, + fields: &mut impl Iterator>, ) -> Result { Ok(Self { name, - member_type: Type::new(til, m.ty, fields)?, - sdacl: m.sdacl, + member_type: Type::new( + til, + type_by_name, + type_by_ord, + m.ty, + fields, + )?, att: m.att, + alignment: m.alignment, + is_baseclass: m.is_baseclass, + is_unaligned: m.is_unaligned, + is_vft: m.is_vft, + is_method: m.is_method, + is_unknown_8: m.is_unknown_8, }) } } #[derive(Clone, Debug)] pub(crate) struct StructMemberRaw { pub ty: TypeRaw, - pub sdacl: SDACL, pub att: Option, + pub alignment: Option, + pub is_baseclass: bool, + pub is_unaligned: bool, + pub is_vft: bool, + pub is_method: bool, + pub is_unknown_8: bool, } impl StructMemberRaw { fn read( input: &mut impl IdaGenericBufUnpack, header: &TILSectionHeader, - taudt_bits: u16, + is_bit_set: bool, + is_bit_set2: bool, ) -> Result { let ty = TypeRaw::read(&mut *input, header)?; - // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x478203 - let is_bit_set = taudt_bits & 0x200 != 0; - // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x478256 let att = is_bit_set .then(|| Self::read_member_att_1(input, header)) .transpose()?; + let mut alignment = None; + let mut is_baseclass = false; + let mut is_unaligned = false; + let mut is_vft = false; + let mut is_method = false; + let mut is_unknown_8 = false; + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x47825d - let mut sdacl = SDACL(crate::til::TypeAttribute(0)); - if !is_bit_set || matches!(att, Some(_att1)) { + if !is_bit_set || att.is_some() { // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x47825d - sdacl = SDACL::read(&mut *input)?; + if let Some(TypeAttribute { + tattr, + extended: _extended, + }) = input.read_sdacl()? + { + use crate::til::flag::tattr::*; + use crate::til::flag::tattr_field::*; + + let alignment_raw = (tattr & MAX_DECL_ALIGN) as u8; + is_unknown_8 = alignment_raw & 0x8 != 0; + alignment = ((alignment_raw & 0x7) != 0).then(|| { + NonZeroU8::new(1 << ((alignment_raw & 0x7) - 1)).unwrap() + }); + is_baseclass = tattr & TAFLD_BASECLASS != 0; + is_unaligned = tattr & TAFLD_UNALIGNED != 0; + let is_virtbase = tattr & TAFLD_VIRTBASE != 0; + ensure!( + !is_virtbase, + "UDT Member virtual base is not supported yet" + ); + is_vft = tattr & TAFLD_VFTABLE != 0; + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x478203 + is_method = tattr & TAFLD_METHOD != 0; + const _ALL_FLAGS: u16 = MAX_DECL_ALIGN + | TAFLD_BASECLASS + | TAFLD_UNALIGNED + | TAFLD_VFTABLE + | TAFLD_METHOD; + #[cfg(feature = "restrictive")] + ensure!( + tattr & !_ALL_FLAGS == 0, + "Invalid Struct taenum_bits {tattr:x}" + ); + #[cfg(feature = "restrictive")] + ensure!( + _extended.is_none(), + "Unable to parse extended attributes for struct member" + ); + } + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x47822d - if taudt_bits & 4 != 0 && sdacl.0 .0 & 0x200 == 0 { + if is_bit_set2 && !is_method { // TODO there is more to this impl? // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x478411 // todo!(); } } - Ok(Self { ty, sdacl, att }) + Ok(Self { + ty, + att, + alignment, + is_baseclass, + is_unaligned, + is_vft, + is_method, + is_unknown_8, + }) } // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x486cd0 @@ -162,7 +329,9 @@ impl StructMemberRaw { let att = input.read_ext_att()?; // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x486d0d match att & 0xf { - 0xd..=0xf => Err(anyhow!("Invalid value for member attribute {att:#x}")), + 0xd..=0xf => { + Err(anyhow!("Invalid value for member attribute {att:#x}")) + } 0..=7 => Ok(StructMemberAtt::Var0to7(Self::basic_att(input, att)?)), 8 | 0xb => todo!(), // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x486d3f @@ -193,7 +362,10 @@ impl StructMemberRaw { } } - fn basic_att(input: &mut impl IdaGenericBufUnpack, att: u64) -> Result { + fn basic_att( + input: &mut impl IdaGenericBufUnpack, + att: u64, + ) -> Result { if (att >> 8) & 0x10 != 0 { // TODO this is diferent from the implementation, double check the read_de and this code // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x486df0 @@ -214,7 +386,6 @@ impl StructMemberRaw { #[derive(Clone, Copy, Debug)] pub enum StructMemberAtt { - // Var0to7(Var1(0)) seems to indicate a "None" kind of value Var0to7(StructMemberAttBasic), Var9 { val1: u32, @@ -228,6 +399,7 @@ pub enum StructMemberAtt { }, } +// InnerRef InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x720880 #[derive(Clone, Copy, Debug)] pub enum StructMemberAttBasic { Var1(u64), @@ -242,6 +414,7 @@ pub enum StructMemberAttBasic { impl StructMemberAtt { pub fn str_type(self) -> Option { match self { + // 0x8 0xa "__strlit" StructMemberAtt::VarAorC { val1, att0: StructMemberAttBasic::Var1(0xa), @@ -249,6 +422,86 @@ impl StructMemberAtt { _ => None, } } + pub fn offset_type(self) -> Option { + match self { + // 0x8 0x9 "__offset" + StructMemberAtt::Var9 { + val1, + att0: None, + att1: 0, + att2: u64::MAX, + } => Some(ExtAttOffset { + offset: (val1 & 0xf) as u8, + flag: val1 & !0xf, + }), + _ => None, + } + } + + pub fn basic(self) -> Option { + match self { + StructMemberAtt::Var0to7(StructMemberAttBasic::Var1(raw)) => { + ExtAttBasic::from_raw(raw, None) + } + // 0x9 0x1000 "__tabform" + StructMemberAtt::Var0to7(StructMemberAttBasic::Var2 { + att, + val1, + val2, + val3: u32::MAX, + }) if att & 0x1000 != 0 => { + ExtAttBasic::from_raw(att & !0x1000, Some((val1, val2))) + } + _ => None, + } + } + + pub fn basic_offset_type(self) -> Option<(u32, bool)> { + // TODO find the InnerRef + match self { + StructMemberAtt::Var9 { + val1, + att0: Some(att0 @ (0 | 0x4e8 | 0x3f58)), + att1: 0, + att2: u64::MAX, + } => Some((val1, att0 != 0)), + _ => None, + } + } +} + +#[derive(Clone, Copy, Debug)] +pub struct ExtAttOffset { + pub offset: u8, + // InnerRef InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x720aa0 + flag: u32, +} + +impl ExtAttOffset { + pub fn is_rvaoff(&self) -> bool { + self.flag & 0x10 != 0 + } + pub fn is_pastend(&self) -> bool { + self.flag & 0x20 != 0 + } + pub fn is_nobase(&self) -> bool { + self.flag & 0x80 != 0 + } + pub fn is_subtract(&self) -> bool { + self.flag & 0x100 != 0 + } + pub fn is_signedop(&self) -> bool { + self.flag & 0x200 != 0 + } + pub fn is_nozeroes(&self) -> bool { + self.flag & 0x400 != 0 + } + pub fn is_noones(&self) -> bool { + self.flag & 0x800 != 0 + } + pub fn is_selfref(&self) -> bool { + self.flag & 0x1000 != 0 + } } #[derive(Clone, Copy, Debug, FromPrimitive, IntoPrimitive)] @@ -268,3 +521,87 @@ impl StringType { self.into() } } + +#[derive(Clone, Copy, Debug)] +pub struct ExtAttBasic { + pub fmt: ExtAttBasicFmt, + pub tabform: Option, + pub is_signed: bool, + pub is_inv_sign: bool, + pub is_inv_bits: bool, + pub is_lzero: bool, +} +impl ExtAttBasic { + fn from_raw(value: u64, val1: Option<(u32, u32)>) -> Option { + use ExtAttBasicFmt::*; + let fmt = match value & 0xf { + 0x1 => Bin, + 0x2 => Oct, + 0x3 => Hex, + 0x4 => Dec, + 0x5 => Float, + 0x6 => Char, + 0x7 => Segm, + 0x9 => Off, + _ => return None, + }; + let is_inv_sign = value & 0x100 != 0; + let is_inv_bits = value & 0x200 != 0; + let is_signed = value & 0x400 != 0; + let is_lzero = value & 0x800 != 0; + + let tabform = val1.map(|(val1, val2)| { + let val1 = ExtAttBasicTabformVal1::try_from_primitive( + val1.try_into().ok()?, + ) + .ok()?; + Some(ExtAttBasicTabform { val1, val2 }) + }); + let tabform = match tabform { + // convert correctly + Some(Some(val)) => Some(val), + // coud not convert, return nothing + Some(None) => return None, + // there is no tabform + None => None, + }; + + // TODO panic on unknown values? + Some(Self { + fmt, + tabform, + is_signed, + is_inv_sign, + is_inv_bits, + is_lzero, + }) + } +} + +#[derive(Clone, Copy, Debug)] +pub enum ExtAttBasicFmt { + Bin, + Oct, + Hex, + Dec, + Float, + Char, + Segm, + Off, +} + +#[derive(Clone, Copy, Debug)] +pub struct ExtAttBasicTabform { + pub val1: ExtAttBasicTabformVal1, + pub val2: u32, +} + +#[derive(Clone, Copy, Debug, TryFromPrimitive, IntoPrimitive)] +#[repr(u8)] +pub enum ExtAttBasicTabformVal1 { + NODUPS = 0, + HEX = 1, + DEC = 2, + OCT = 3, + BIN = 4, +} diff --git a/src/til/union.rs b/src/til/union.rs index c5e6026..975a913 100644 --- a/src/til/union.rs +++ b/src/til/union.rs @@ -1,38 +1,53 @@ +use anyhow::{anyhow, Context, Result}; + +use std::collections::HashMap; use std::num::NonZeroU8; use crate::ida_reader::IdaGenericBufUnpack; -use crate::til::section::TILSectionHeader; -use crate::til::{Type, TypeRaw, SDACL}; +use crate::til::{Type, TypeRaw}; +use crate::IDBString; -use super::{StructModifierRaw, TypeVariantRaw}; +use super::section::TILSectionHeader; +use super::{TypeAttribute, TypeVariantRaw}; #[derive(Clone, Debug)] pub struct Union { pub effective_alignment: u16, pub alignment: Option, - pub members: Vec<(Option>, Type)>, - // TODO parse type attributes - //others: StructMemberRaw, + pub members: Vec<(Option, Type)>, + + pub is_unaligned: bool, + pub is_unknown_8: bool, } impl Union { pub(crate) fn new( til: &TILSectionHeader, + type_by_name: &HashMap, usize>, + type_by_ord: &HashMap, value: UnionRaw, - fields: &mut impl Iterator>, - ) -> anyhow::Result { + fields: &mut impl Iterator>, + ) -> Result { let members = value .members .into_iter() .map(|member| { - let field_name = fields.next(); - let new_member = Type::new(til, member, &mut *fields)?; + let field_name = fields.next().flatten(); + let new_member = Type::new( + til, + type_by_name, + type_by_ord, + member, + &mut *fields, + )?; Ok((field_name, new_member)) }) - .collect::>()?; + .collect::>()?; Ok(Union { effective_alignment: value.effective_alignment, alignment: value.alignment, members, + is_unaligned: value.is_unaligned, + is_unknown_8: value.is_unknown_8, }) } } @@ -44,36 +59,74 @@ pub(crate) struct UnionRaw { effective_alignment: u16, alignment: Option, members: Vec, + is_unaligned: bool, + is_unknown_8: bool, } impl UnionRaw { pub fn read( input: &mut impl IdaGenericBufUnpack, header: &TILSectionHeader, - ) -> anyhow::Result { + ) -> Result { let Some(n) = input.read_dt_de()? else { // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x4803b4 // is ref let ref_type = TypeRaw::read_ref(&mut *input, header)?; - let _taudt_bits = SDACL::read(&mut *input)?; - return Ok(TypeVariantRaw::UnionRef(Box::new(ref_type))); + let _taudt_bits = input.read_sdacl()?; + let TypeVariantRaw::Typedef(ref_type) = ref_type.variant else { + return Err(anyhow!("UnionRef Non Typedef")); + }; + return Ok(TypeVariantRaw::UnionRef(ref_type)); }; // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x4808f9 let alpow = n & 7; let mem_cnt = n >> 3; let effective_alignment = if alpow == 0 { 0 } else { 1 << (alpow - 1) }; - let taudt_bits = SDACL::read(&mut *input)?; - let modifiers = StructModifierRaw::from_value(taudt_bits.0 .0); - // TODO check InnerRef to how to handle modifiers - let alignment = modifiers.alignment; + + let mut alignment = None; + let mut is_unaligned = false; + let mut is_unknown_8 = false; + if let Some(TypeAttribute { + tattr, + extended: _extended, + }) = input.read_sdacl()? + { + use crate::til::flag::tattr::*; + use crate::til::flag::tattr_udt::*; + + let alignment_raw = (tattr & MAX_DECL_ALIGN) as u8; + is_unknown_8 = alignment_raw & 0x8 != 0; + alignment = ((alignment_raw & 0x7) != 0).then(|| { + NonZeroU8::new(1 << ((alignment_raw & 0x7) - 1)).unwrap() + }); + is_unaligned = tattr & TAUDT_UNALIGNED != 0; + + const _ALL_FLAGS: u16 = MAX_DECL_ALIGN | TAUDT_UNALIGNED; + #[cfg(feature = "restrictive")] + anyhow::ensure!( + tattr & !_ALL_FLAGS == 0, + "Invalid Union taenum_bits {tattr:x}" + ); + #[cfg(feature = "restrictive")] + anyhow::ensure!( + _extended.is_none(), + "Unable to parse extended attributes for union" + ); + } + let members = (0..mem_cnt) - .map(|_| TypeRaw::read(&mut *input, header)) - .collect::>()?; + .map(|i| { + TypeRaw::read(&mut *input, header) + .with_context(|| format!("Member {i}")) + }) + .collect::>()?; Ok(TypeVariantRaw::Union(Self { effective_alignment, alignment, members, + is_unaligned, + is_unknown_8, })) } } diff --git a/src/tools/decompress_til.rs b/src/tools/decompress_til.rs index 6fe92b7..355c16b 100644 --- a/src/tools/decompress_til.rs +++ b/src/tools/decompress_til.rs @@ -14,16 +14,20 @@ pub fn decompress_til(args: &Args, til_args: &DecompressTilArgs) -> Result<()> { FileType::Idb => { let input = BufReader::new(File::open(&args.input)?); let mut parser = IDBParser::new(input)?; - let til_offset = parser - .til_section_offset() - .ok_or_else(|| anyhow!("IDB file don't contains a TIL sector"))?; + let til_offset = parser.til_section_offset().ok_or_else(|| { + anyhow!("IDB file don't contains a TIL sector") + })?; // TODO make decompress til public parser.decompress_til_section(til_offset, &mut output) } FileType::Til => { let mut input = BufReader::new(File::open(&args.input)?); // TODO make decompress til public - TILSection::decompress(&mut input, &mut output, idb_rs::IDBSectionCompression::None) + TILSection::decompress( + &mut input, + &mut output, + idb_rs::IDBSectionCompression::None, + ) } } } diff --git a/src/tools/dump_addr_info.rs b/src/tools/dump_addr_info.rs index c73b30a..5905709 100644 --- a/src/tools/dump_addr_info.rs +++ b/src/tools/dump_addr_info.rs @@ -8,8 +8,12 @@ pub fn dump_addr_info(args: &Args) -> Result<()> { // TODO create a function for that in ida_info let version = match id0.ida_info()? { - idb_rs::id0::IDBParam::V1(idb_rs::id0::IDBParam1 { version, .. }) => version, - idb_rs::id0::IDBParam::V2(idb_rs::id0::IDBParam2 { version, .. }) => version, + idb_rs::id0::IDBParam::V1(idb_rs::id0::IDBParam1 { + version, .. + }) => version, + idb_rs::id0::IDBParam::V2(idb_rs::id0::IDBParam2 { + version, .. + }) => version, }; for entry in id0.address_info(version)? { let (addr, info) = entry?; @@ -19,7 +23,10 @@ pub fn dump_addr_info(args: &Args) -> Result<()> { key: [key_type, rest @ ..], value, } if (*key_type as char).is_ascii_graphic() => { - println!("Other('{}':{rest:02x?}:{value:02x?})", *key_type as char); + println!( + "Other('{}':{rest:02x?}:{value:02x?})", + *key_type as char + ); } idb_rs::id0::AddressInfo::Other { key, value } => { println!("Other({key:02x?}:{value:02x?})",); diff --git a/src/tools/dump_dirtree.rs b/src/tools/dump_dirtree.rs index fbe1c23..67e86bc 100644 --- a/src/tools/dump_dirtree.rs +++ b/src/tools/dump_dirtree.rs @@ -1,6 +1,9 @@ use idb_rs::id0::{DirTreeEntry, DirTreeRoot}; -pub fn print_dirtree(mut handle_print: impl FnMut(&T), dirtree: &DirTreeRoot) { +pub fn print_dirtree( + mut handle_print: impl FnMut(&T), + dirtree: &DirTreeRoot, +) { inner_print_dirtree(&mut handle_print, &dirtree.entries, 0); } diff --git a/src/tools/dump_dirtree_funcs.rs b/src/tools/dump_dirtree_funcs.rs index 68611e4..1c53df0 100644 --- a/src/tools/dump_dirtree_funcs.rs +++ b/src/tools/dump_dirtree_funcs.rs @@ -19,7 +19,8 @@ pub fn print_function(id0: &ID0Section, address: Id0Address) -> Result<()> { let mut ty = None; for info in infos { match info? { - idb_rs::id0::AddressInfo::Comment(_) | idb_rs::id0::AddressInfo::Other { .. } => {} + idb_rs::id0::AddressInfo::Comment(_) + | idb_rs::id0::AddressInfo::Other { .. } => {} idb_rs::id0::AddressInfo::Label(label) => { if let Some(_old) = name.replace(label) { panic!("Multiple labels can't be return for address") @@ -27,7 +28,10 @@ pub fn print_function(id0: &ID0Section, address: Id0Address) -> Result<()> { } idb_rs::id0::AddressInfo::TilType(addr_ty) => { ensure!( - matches!(&addr_ty.type_variant, idb_rs::til::TypeVariant::Function(_)), + matches!( + &addr_ty.type_variant, + idb_rs::til::TypeVariant::Function(_) + ), "Type for function at {address:#?} is invalid" ); if let Some(_old) = ty.replace(addr_ty) { diff --git a/src/tools/dump_dirtree_types.rs b/src/tools/dump_dirtree_types.rs index 1671bf8..497140a 100644 --- a/src/tools/dump_dirtree_types.rs +++ b/src/tools/dump_dirtree_types.rs @@ -9,17 +9,19 @@ use idb_rs::{id0::Id0TilOrd, IDBParser}; pub fn dump_dirtree_types(args: &Args) -> Result<()> { // parse the id0 sector/file let (id0, til) = match args.input_type() { - FileType::Til => return Err(anyhow!("TIL don't contains any ID0 data")), + FileType::Til => { + return Err(anyhow!("TIL don't contains any ID0 data")) + } FileType::Idb => { let input = BufReader::new(File::open(&args.input)?); let mut parser = IDBParser::new(input)?; - let id0_offset = parser - .id0_section_offset() - .ok_or_else(|| anyhow!("IDB file don't contains a ID0 sector"))?; + let id0_offset = parser.id0_section_offset().ok_or_else(|| { + anyhow!("IDB file don't contains a ID0 sector") + })?; let id0 = parser.read_id0_section(id0_offset)?; - let til_offset = parser - .til_section_offset() - .ok_or_else(|| anyhow!("IDB file don't contains a TIL sector"))?; + let til_offset = parser.til_section_offset().ok_or_else(|| { + anyhow!("IDB file don't contains a TIL sector") + })?; let til = parser.read_til_section(til_offset)?; (id0, til) } diff --git a/src/tools/dump_functions.rs b/src/tools/dump_functions.rs index e052f1b..72988fc 100644 --- a/src/tools/dump_functions.rs +++ b/src/tools/dump_functions.rs @@ -75,7 +75,9 @@ pub fn dump_functions(args: &Args) -> Result<()> { print!(" {:#x}:", address.as_u64()); print_function(&id0, address)? } - idb_rs::id0::DirTreeEntry::Directory { name: _, entries } => buffer.extend(entries), + idb_rs::id0::DirTreeEntry::Directory { name: _, entries } => { + buffer.extend(entries) + } } } diff --git a/src/tools/dump_segments.rs b/src/tools/dump_segments.rs index 5b2e987..f85ac2d 100644 --- a/src/tools/dump_segments.rs +++ b/src/tools/dump_segments.rs @@ -13,8 +13,12 @@ pub fn dump_segments(args: &Args) -> Result<()> { // TODO create a function for that in ida_info let version = match id0.ida_info()? { - idb_rs::id0::IDBParam::V1(idb_rs::id0::IDBParam1 { version, .. }) => version, - idb_rs::id0::IDBParam::V2(idb_rs::id0::IDBParam2 { version, .. }) => version, + idb_rs::id0::IDBParam::V1(idb_rs::id0::IDBParam1 { + version, .. + }) => version, + idb_rs::id0::IDBParam::V2(idb_rs::id0::IDBParam2 { + version, .. + }) => version, }; println!(); println!("Segments AKA `$ fileregions`: "); diff --git a/src/tools/dump_til.rs b/src/tools/dump_til.rs index be92fd8..9e06748 100644 --- a/src/tools/dump_til.rs +++ b/src/tools/dump_til.rs @@ -2,7 +2,7 @@ use std::fs::File; use std::io::BufReader; use anyhow::{anyhow, Result}; -use idb_rs::til::section::TILSection; +use idb_rs::til::section::{TILSection, TILSectionExtendedSizeofInfo}; use idb_rs::til::TILMacro; use idb_rs::IDBParser; @@ -14,44 +14,50 @@ pub fn dump_til(args: &Args) -> Result<()> { FileType::Idb => { let input = BufReader::new(File::open(&args.input)?); let mut parser = IDBParser::new(input)?; - let til_offset = parser - .til_section_offset() - .ok_or_else(|| anyhow!("IDB file don't contains a TIL sector"))?; + let til_offset = parser.til_section_offset().ok_or_else(|| { + anyhow!("IDB file don't contains a TIL sector") + })?; parser.read_til_section(til_offset)? } FileType::Til => { - let input = BufReader::new(File::open(&args.input)?); - idb_rs::til::section::TILSection::parse(input)? + let mut input = BufReader::new(File::open(&args.input)?); + idb_rs::til::section::TILSection::read( + &mut input, + idb_rs::IDBSectionCompression::None, + )? } }; // this deconstruction is to changes on TILSection to force a review on this code let TILSection { - format, - title, - flags: _, - dependency, - compiler_id, - cc, - cm, - cn, - def_align, - type_ordinal_alias, - size_int, - size_enum, - size_bool, - extended_sizeof_info: _, - size_long_double, - is_universal, symbols, types, macros, + header: + idb_rs::til::section::TILSectionHeader { + flags: _, + format, + description, + dependencies, + compiler_id, + cc, + cn, + cm, + def_align, + type_ordinal_alias, + size_int, + size_bool, + size_enum, + extended_sizeof_info, + size_long_double, + is_universal, + }, } = &til; // write the header info println!("format: {format}"); - println!("title: {}", String::from_utf8_lossy(title)); - if let Some(dependency) = dependency { - println!("dependency: {}", String::from_utf8_lossy(dependency)); + println!("description: {}", description.as_utf8_lossy()); + for (i, dependency) in dependencies.iter().enumerate() { + println!("dependency-{i}: {}", dependency.as_utf8_lossy()); } println!("id: {compiler_id:?}"); println!("cc: {cc:?}"); @@ -61,6 +67,16 @@ pub fn dump_til(args: &Args) -> Result<()> { println!("size_int: {size_int}"); println!("size_bool: {size_bool}"); println!("size_enum: {size_enum:?}"); + if let Some(TILSectionExtendedSizeofInfo { + size_short, + size_long, + size_long_long, + }) = extended_sizeof_info + { + println!("size_short: {size_short}"); + println!("size_long: {size_long}"); + println!("size_long_long: {size_long_long}"); + } println!("is_universal: {is_universal}"); if let Some(type_ordinal_numbers) = type_ordinal_alias { println!("type_ordinal_numbers: {type_ordinal_numbers:?}"); @@ -94,8 +110,12 @@ pub fn dump_til(args: &Args) -> Result<()> { let value: String = value .iter() .map(|c| match c { - idb_rs::til::TILMacroValue::Char(c) => format!("{}", *c as char), - idb_rs::til::TILMacroValue::Param(param) => format!("{{P{}}}", *param), + idb_rs::til::TILMacroValue::Char(c) => { + format!("{}", *c as char) + } + idb_rs::til::TILMacroValue::Param(param) => { + format!("{{P{}}}", *param) + } }) .collect(); println!("------------------------------`{name}`------------------------------"); diff --git a/src/tools/tilib.rs b/src/tools/tilib.rs index b6f932b..20c85d9 100644 --- a/src/tools/tilib.rs +++ b/src/tools/tilib.rs @@ -1,15 +1,18 @@ use idb_rs::id0::{Compiler, Id0TilOrd}; use idb_rs::til::array::Array; +use idb_rs::til::bitfield::Bitfield; use idb_rs::til::function::{CallingConvention, Function}; use idb_rs::til::pointer::Pointer; use idb_rs::til::r#enum::Enum; use idb_rs::til::r#struct::{Struct, StructMemberAtt}; use idb_rs::til::section::TILSection; use idb_rs::til::union::Union; -use idb_rs::til::{Basic, TILTypeInfo, Type, TypeVariant, Typedef}; -use idb_rs::IDBParser; +use idb_rs::til::{ + Basic, TILTypeInfo, TILTypeSizeSolver, Type, TypeVariant, Typeref, + TyperefType, TyperefValue, +}; +use idb_rs::{IDBParser, IDBSectionCompression, IDBString}; -use std::borrow::Borrow; use std::fs::File; use std::io::{BufReader, Result, Write}; use std::num::NonZeroU8; @@ -18,17 +21,18 @@ use crate::{Args, FileType}; pub fn tilib_print(args: &Args) -> anyhow::Result<()> { // parse the id0 sector/file - let input = BufReader::new(File::open(&args.input)?); + let mut input = BufReader::new(File::open(&args.input)?); match args.input_type() { FileType::Til => { - let section = TILSection::parse(input)?; + let section = + TILSection::read(&mut input, IDBSectionCompression::None)?; print_til_section(std::io::stdout(), §ion)?; } FileType::Idb => { let mut parser = IDBParser::new(input)?; - let til_offset = parser - .til_section_offset() - .ok_or_else(|| anyhow::anyhow!("IDB file don't contains a TIL sector"))?; + let til_offset = parser.til_section_offset().ok_or_else(|| { + anyhow::anyhow!("IDB file don't contains a TIL sector") + })?; let section = parser.read_til_section(til_offset)?; print_til_section(std::io::stdout(), §ion)?; } @@ -37,24 +41,31 @@ pub fn tilib_print(args: &Args) -> anyhow::Result<()> { } fn print_til_section(mut fmt: impl Write, section: &TILSection) -> Result<()> { - if let Some(dependency) = §ion.dependency { - let dep = core::str::from_utf8(dependency).unwrap(); + if !section.header.dependencies.is_empty() { // TODO open those files? What todo with then? - // TODO some files still missing this warning - writeln!(fmt, "Warning: {dep}: No such file or directory")?; + write!(fmt, "Warning: ")?; + for dependency in §ion.header.dependencies { + fmt.write_all(dependency.as_bytes())?; + writeln!(fmt, ": No such file or directory")?; + } } + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x40b6d0 writeln!(fmt)?; writeln!(fmt, "TYPE INFORMATION LIBRARY CONTENTS")?; print_header(&mut fmt, section)?; writeln!(fmt)?; + let mut size_solver = TILTypeSizeSolver::new(section); + + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x40b926 writeln!(fmt, "SYMBOLS")?; - print_symbols(&mut fmt, section)?; + print_symbols(&mut fmt, section, &mut size_solver)?; writeln!(fmt)?; + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x40b94d writeln!(fmt, "TYPES")?; - print_types(&mut fmt, section)?; + print_types(&mut fmt, section, &mut size_solver)?; writeln!(fmt)?; // TODO streams @@ -71,11 +82,9 @@ fn print_til_section(mut fmt: impl Write, section: &TILSection) -> Result<()> { fn print_header(fmt: &mut impl Write, section: &TILSection) -> Result<()> { // the description of the file // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x40b710 - writeln!( - fmt, - "Description: {}", - core::str::from_utf8(§ion.title).unwrap() - )?; + write!(fmt, "Description: ")?; + fmt.write_all(§ion.header.description.as_bytes())?; + writeln!(fmt)?; // flags from the section header // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x40b721 @@ -98,12 +107,12 @@ fn print_header(fmt: &mut impl Write, section: &TILSection) -> Result<()> { writeln!( fmt, "Compiler : {}", - compiler_id_to_str(section.compiler_id) + compiler_id_to_str(section.header.compiler_id) )?; // alignement and convention stuff // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x40b7ed - if let Some(cn) = section.cn { + if let Some(cn) = section.header.cn { write!( fmt, "sizeof(near*) = {} sizeof(far*) = {}", @@ -112,8 +121,8 @@ fn print_header(fmt: &mut impl Write, section: &TILSection) -> Result<()> { )?; } // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x40ba3b - if let Some(cm) = section.cm { - if section.cn.is_some() { + if let Some(cm) = section.header.cm { + if section.header.cn.is_some() { write!(fmt, " ")?; } let code = if cm.is_code_near() { "near" } else { "far" }; @@ -121,11 +130,11 @@ fn print_header(fmt: &mut impl Write, section: &TILSection) -> Result<()> { write!(fmt, "{code} code, {data} data",)?; } // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x40b860 - if let Some(cc) = section.cc { - if section.cm.is_some() || section.cn.is_some() { - write!(fmt, ",")?; + if let Some(cc) = section.header.cc { + if section.header.cm.is_some() || section.header.cn.is_some() { + write!(fmt, ", ")?; } - writeln!(fmt, "{}", calling_convention_to_str(cc))?; + write!(fmt, "{}", calling_convention_to_str(cc))?; } writeln!(fmt)?; @@ -134,50 +143,58 @@ fn print_header(fmt: &mut impl Write, section: &TILSection) -> Result<()> { writeln!( fmt, "default_align = {} sizeof(bool) = {} sizeof(long) = {} sizeof(llong) = {}", - section.def_align.map(|x| x.get()).unwrap_or(0), - section.size_bool, + section.header.def_align.map(|x| x.get()).unwrap_or(0), + section.header.size_bool, section.sizeof_long(), section.sizeof_long_long(), )?; writeln!( fmt, "sizeof(enum) = {} sizeof(int) = {} sizeof(short) = {}", - section.size_enum.map(NonZeroU8::get).unwrap_or(0), - section.size_int, + section.header.size_enum.map(NonZeroU8::get).unwrap_or(0), + section.header.size_int, section.sizeof_short(), )?; writeln!( fmt, "sizeof(long double) = {}", - section.size_long_double.map(NonZeroU8::get).unwrap_or(0) + section + .header + .size_long_double + .map(NonZeroU8::get) + .unwrap_or(0) )?; Ok(()) } -fn print_section_flags(fmt: &mut impl Write, section: &TILSection) -> Result<()> { - write!(fmt, "Flags : {:04X}", section.flags.as_raw())?; - if section.flags.is_zip() { +fn print_section_flags( + fmt: &mut impl Write, + section: &TILSection, +) -> Result<()> { + let flags = section.header.flags; + write!(fmt, "Flags : {:04X}", flags.as_raw())?; + if flags.is_zip() { write!(fmt, " compressed")?; } - if section.flags.has_macro_table() { + if flags.has_macro_table() { write!(fmt, " macro_table_present")?; } - if section.flags.have_extended_sizeof_info() { + if flags.have_extended_sizeof_info() { write!(fmt, " extended_sizeof_info")?; } - if section.flags.is_universal() { + if flags.is_universal() { write!(fmt, " universal")?; } - if section.flags.has_ordinal() { + if flags.has_ordinal() { write!(fmt, " ordinals_present")?; } - if section.flags.has_type_aliases() { + if flags.has_type_aliases() { write!(fmt, " aliases_present")?; } - if section.flags.has_extra_stream() { + if flags.has_extra_stream() { write!(fmt, " extra_streams")?; } - if section.flags.has_size_long_double() { + if flags.has_size_long_double() { write!(fmt, " sizeof_long_double")?; } writeln!(fmt) @@ -196,32 +213,85 @@ fn compiler_id_to_str(compiler: Compiler) -> &'static str { } } -fn print_symbols(fmt: &mut impl Write, section: &TILSection) -> Result<()> { +fn print_symbols( + fmt: &mut impl Write, + section: &TILSection, + solver: &mut TILTypeSizeSolver<'_>, +) -> Result<()> { for symbol in §ion.symbols { - print_til_type_len(fmt, section, None, &symbol.tinfo)?; - let len = section.type_size_bytes(None, &symbol.tinfo).ok(); - match len { - // TODO What is that???? Find it in InnerRef... - Some(8) => write!(fmt, " {:016X} ", symbol.ordinal)?, - // TODO is limited to 32bits in InnerRef? - _ => write!(fmt, " {:08X} ", symbol.ordinal & 0xFFFF_FFFF)?, - } - let name = std::str::from_utf8(&symbol.name).unwrap(); - print_til_type(fmt, section, Some(name), &symbol.tinfo, true, false)?; + print_til_type_len(fmt, None, &symbol.tinfo, solver)?; + let len = solver.type_size_bytes(None, &symbol.tinfo); + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x409a80 + match len.and_then(|b| u32::try_from(b).ok()) { + Some(8) => write!(fmt, " {:016X}", symbol.ordinal)?, + Some(bytes @ 0..=7) => write!( + fmt, + " {:08X}", + symbol.ordinal & !(u64::MAX << (bytes * 8)) + )?, + _ => write!(fmt, " {:08X}", symbol.ordinal)?, + } + + // TODO find this in InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x409a49 + //let sym_kind = match arg8 { + // 0 => " ", + // 1 => "typedef ", + // 2 => "extern ", + // 3 => "static ", + // 4 => "register", + // 5 => "auto ", + // 6 => "friend ", + // 7 => "virtual ", + // _ => "?!", + //}; + let sym_kind = " "; + write!(fmt, " {} ", sym_kind)?; + + // TODO investiage this + let symbol_name = symbol.name.as_bytes(); + let name = if symbol.ordinal == 0 && symbol_name.first() == Some(&b'_') + { + // remove the first "_", if any + &symbol.name.as_bytes()[1..] + } else { + symbol.name.as_bytes() + }; + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x409a3a + print_til_type( + fmt, + section, + Some(name), + &symbol.tinfo, + false, + true, + false, + true, + )?; writeln!(fmt, ";")?; } Ok(()) } -fn print_types(fmt: &mut impl Write, section: &TILSection) -> Result<()> { - writeln!(fmt, "(enumerated by ordinals)")?; - print_types_by_ordinals(fmt, section)?; - writeln!(fmt, "(enumerated by names)")?; - print_types_by_name(fmt, section)?; +fn print_types( + fmt: &mut impl Write, + section: &TILSection, + solver: &mut TILTypeSizeSolver<'_>, +) -> Result<()> { + // TODO only print by ordinals if there are ordinals + if section.header.flags.has_ordinal() { + writeln!(fmt, "(enumerated by ordinals)")?; + print_types_by_ordinals(fmt, section, solver)?; + writeln!(fmt, "(enumerated by names)")?; + } + print_types_by_name(fmt, section, solver)?; Ok(()) } -fn print_types_by_ordinals(fmt: &mut impl Write, section: &TILSection) -> Result<()> { +fn print_types_by_ordinals( + fmt: &mut impl Write, + section: &TILSection, + solver: &mut TILTypeSizeSolver<'_>, +) -> Result<()> { enum OrdType<'a> { Alias(&'a (u32, u32)), Type { idx: usize, ty: &'a TILTypeInfo }, @@ -233,6 +303,7 @@ fn print_types_by_ordinals(fmt: &mut impl Write, section: &TILSection) -> Result .map(|(idx, ty)| OrdType::Type { idx, ty }) .chain( section + .header .type_ordinal_alias .iter() .flat_map(|x| x.iter()) @@ -260,27 +331,39 @@ fn print_types_by_ordinals(fmt: &mut impl Write, section: &TILSection) -> Result } OrdType::Type { idx, ty } => (idx, ty), }; - print_til_type_len(fmt, section, Some(idx), &final_type.tinfo).unwrap(); + print_til_type_len(fmt, Some(idx), &final_type.tinfo, solver).unwrap(); write!(fmt, "{:5}. ", ord_num)?; if let OrdType::Alias((_alias_ord, type_ord)) = ord_type { write!(fmt, "(aliased to {type_ord}) ")?; } - let name = std::str::from_utf8(&final_type.name).unwrap(); - print_til_type_root(fmt, section, Some(name), &final_type.tinfo)?; + print_til_type_root( + fmt, + section, + Some(final_type.name.as_bytes()), + &final_type.tinfo, + )?; writeln!(fmt, ";")?; } Ok(()) } -fn print_types_by_name(fmt: &mut impl Write, section: &TILSection) -> Result<()> { +fn print_types_by_name( + fmt: &mut impl Write, + section: &TILSection, + solver: &mut TILTypeSizeSolver<'_>, +) -> Result<()> { for (idx, symbol) in section.types.iter().enumerate() { - if symbol.name.is_empty() { + if symbol.name.as_bytes().is_empty() { continue; } - print_til_type_len(fmt, section, Some(idx), &symbol.tinfo).unwrap(); + print_til_type_len(fmt, Some(idx), &symbol.tinfo, solver).unwrap(); write!(fmt, " ")?; - let name = std::str::from_utf8(&symbol.name).unwrap(); - print_til_type_root(fmt, section, Some(name), &symbol.tinfo)?; + print_til_type_root( + fmt, + section, + Some(symbol.name.as_bytes()), + &symbol.tinfo, + )?; writeln!(fmt, ";")?; } Ok(()) @@ -289,127 +372,101 @@ fn print_types_by_name(fmt: &mut impl Write, section: &TILSection) -> Result<()> fn print_til_type_root( fmt: &mut impl Write, section: &TILSection, - name: Option<&str>, + name: Option<&[u8]>, til_type: &Type, ) -> Result<()> { + // TODO: InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x4438d1 + // TODO: if a is a typedef and ComplexRef or something like it, also print typedef match &til_type.type_variant { - TypeVariant::Struct(_) | TypeVariant::Union(_) | TypeVariant::Enum(_) => {} + TypeVariant::Struct(_) + | TypeVariant::Union(_) + | TypeVariant::Enum(_) => {} + TypeVariant::Typeref(Typeref { + typeref_value: TyperefValue::UnsolvedName(None), + ref_type: Some(_), + }) => {} + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x443906 _ => write!(fmt, "typedef ")?, } - print_til_type(fmt, section, name, til_type, true, true) + print_til_type(fmt, section, name, til_type, false, true, true, true) } fn print_til_type( fmt: &mut impl Write, section: &TILSection, - name: Option<&str>, + name: Option<&[u8]>, til_type: &Type, + is_vft: bool, print_pointer_space: bool, print_type_prefix: bool, + print_name: bool, ) -> Result<()> { - if til_type.is_volatile { - write!(fmt, "volatile ")?; - } - if til_type.is_const { - write!(fmt, "const ")?; - } match &til_type.type_variant { - TypeVariant::Basic(til_basic) => print_til_type_basic(fmt, section, name, til_basic), + TypeVariant::Basic(til_basic) => { + print_til_type_basic(fmt, section, name, til_type, til_basic) + } TypeVariant::Pointer(pointer) => print_til_type_pointer( fmt, section, name, + til_type, pointer, + is_vft, print_pointer_space, print_type_prefix, ), - TypeVariant::Function(function) => { - print_til_type_function(fmt, section, name, function, false) - } + TypeVariant::Function(function) => print_til_type_function( + fmt, section, name, til_type, function, false, + ), TypeVariant::Array(array) => print_til_type_array( fmt, section, name, + til_type, array, print_pointer_space, print_type_prefix, ), - TypeVariant::Typedef(typedef) => print_til_type_typedef(fmt, section, name, typedef), - TypeVariant::StructRef(ref_type) => print_til_type( + TypeVariant::Typeref(ref_type) => print_til_type_typedef( fmt, section, name, + til_type, ref_type, - print_pointer_space, print_type_prefix, ), - TypeVariant::Struct(til_struct) => print_til_type_struct(fmt, section, name, til_struct), - TypeVariant::UnionRef(ref_type) => { - print_til_type(fmt, section, name, ref_type, true, print_type_prefix) - } - TypeVariant::Union(til_union) => print_til_type_union(fmt, section, name, til_union), - TypeVariant::EnumRef(ref_type) => print_til_type( - fmt, - section, - name, - ref_type, - print_pointer_space, - print_type_prefix, + TypeVariant::Struct(til_struct) => print_til_type_struct( + fmt, section, name, til_type, til_struct, print_name, ), - TypeVariant::Enum(til_enum) => print_til_type_enum(fmt, section, name, til_enum), - TypeVariant::Bitfield(_bitfield) => write!(fmt, "todo!(\"Bitfield\")"), + TypeVariant::Union(til_union) => print_til_type_union( + fmt, section, name, til_type, til_union, print_name, + ), + TypeVariant::Enum(til_enum) => { + print_til_type_enum(fmt, section, name, til_type, til_enum) + } + TypeVariant::Bitfield(bitfield) => { + print_til_type_bitfield(fmt, name, til_type, bitfield) + } } } fn print_til_type_basic( fmt: &mut impl Write, _section: &TILSection, - name: Option<&str>, + name: Option<&[u8]>, + til_type: &Type, til_basic: &Basic, ) -> Result<()> { - const fn signed_name(is_signed: Option) -> &'static str { - match is_signed { - Some(true) | None => "", - Some(false) => "unsigned ", - } + if til_type.is_volatile { + write!(fmt, "volatile ")?; } - - let name_space = if name.is_some() { " " } else { "" }; - let name = name.unwrap_or(""); - match til_basic { - Basic::Bool => write!(fmt, "bool{name_space}{name}",)?, - Basic::Char => write!(fmt, "char{name_space}{name}",)?, - Basic::Short { is_signed } => { - write!(fmt, "{}short{name_space}{name}", signed_name(*is_signed))? - } - Basic::Void => write!(fmt, "void{name_space}{name}",)?, - Basic::SegReg => write!(fmt, "SegReg{name_space}{name}")?, - Basic::Unknown { bytes: 1 } => write!(fmt, "_BYTE")?, - Basic::Unknown { bytes: 2 } => write!(fmt, "_WORD")?, - Basic::Unknown { bytes: 4 } => write!(fmt, "_DWORD")?, - Basic::Unknown { bytes: 8 } => write!(fmt, "_QWORD")?, - Basic::Unknown { bytes } => write!(fmt, "unknown{bytes}{name_space}{name}")?, - Basic::Int { is_signed } => { - write!(fmt, "{}int{name_space}{name}", signed_name(*is_signed))? - } - Basic::Long { is_signed } => { - write!(fmt, "{}long{name_space}{name}", signed_name(*is_signed))? - } - Basic::LongLong { is_signed } => { - write!(fmt, "{}longlong{name_space}{name}", signed_name(*is_signed))? - } - Basic::IntSized { bytes, is_signed } => { - if let Some(false) = is_signed { - write!(fmt, "unsigned ")?; - } - write!(fmt, "__int{}{name_space}{name}", bytes.get() * 8)? - } - Basic::LongDouble => write!(fmt, "longfloat{name_space}{name}")?, - Basic::Float { bytes } if bytes.get() == 4 => write!(fmt, "float{name_space}{name}")?, - Basic::Float { bytes } if bytes.get() == 8 => write!(fmt, "double{name_space}{name}")?, - Basic::Float { bytes } => write!(fmt, "float{bytes}{name_space}{name}")?, - Basic::BoolSized { bytes } if bytes.get() == 1 => write!(fmt, "bool{name_space}{name}")?, - Basic::BoolSized { bytes } => write!(fmt, "bool{bytes}{name_space}{name}")?, + if til_type.is_const { + write!(fmt, "const ")?; + } + print_basic_type(fmt, til_basic)?; + if let Some(name) = name { + write!(fmt, " ")?; + fmt.write_all(name)?; } Ok(()) } @@ -417,14 +474,16 @@ fn print_til_type_basic( fn print_til_type_pointer( fmt: &mut impl Write, section: &TILSection, - name: Option<&str>, + name: Option<&[u8]>, + til_type: &Type, pointer: &Pointer, + is_vft_parent: bool, print_pointer_space: bool, print_type_prefix: bool, ) -> Result<()> { if let TypeVariant::Function(inner_fun) = &pointer.typ.type_variant { // How to handle modifier here? - print_til_type_function(fmt, section, name, inner_fun, true)?; + print_til_type_function(fmt, section, name, til_type, inner_fun, true)?; } else { // TODO name print_til_type( @@ -432,25 +491,51 @@ fn print_til_type_pointer( section, None, &pointer.typ, + is_vft_parent, print_pointer_space, print_type_prefix, + true, )?; - if print_pointer_space { + // if the innertype is also a pointer, don't print the space + if print_pointer_space + && !matches!(&pointer.typ.type_variant, TypeVariant::Pointer(_)) + { write!(fmt, " ")?; } - let modifier = match pointer.modifier { - None => "", - Some(idb_rs::til::pointer::PointerModifier::Ptr32) => "__ptr32 ", - Some(idb_rs::til::pointer::PointerModifier::Ptr64) => "__ptr64 ", - Some(idb_rs::til::pointer::PointerModifier::Restricted) => "__restricted ", - }; - write!(fmt, "*{modifier}")?; + write!(fmt, "*")?; + if til_type.is_volatile { + write!(fmt, "volatile ")?; + } + if til_type.is_const { + write!(fmt, "const ")?; + } + match pointer.modifier { + None => {} + Some(idb_rs::til::pointer::PointerModifier::Ptr32) => { + write!(fmt, "__ptr32 ")? + } + Some(idb_rs::til::pointer::PointerModifier::Ptr64) => { + write!(fmt, "__ptr64 ")? + } + Some(idb_rs::til::pointer::PointerModifier::Restricted) => { + write!(fmt, "__restricted ")? + } + } if let Some((ty, value)) = &pointer.shifted { write!(fmt, "__shifted(")?; print_til_type_only(fmt, section, ty)?; write!(fmt, ",{value:#X}) ")?; } - write!(fmt, "{}", name.unwrap_or(""))?; + if let Some(name) = name { + fmt.write_all(name)?; + } + + // if the pointed type itself is a VFT then the pointer need to print that + // TODO maybe the above is not ture, it it was inheritec from the + // struct member att + if is_vft_parent || is_vft(section, &pointer.typ) { + write!(fmt, " /*VFT*/")?; + } } Ok(()) } @@ -458,52 +543,95 @@ fn print_til_type_pointer( fn print_til_type_function( fmt: &mut impl Write, section: &TILSection, - name: Option<&str>, - til_type: &Function, + name: Option<&[u8]>, + til_type: &Type, + til_function: &Function, is_pointer: bool, ) -> Result<()> { + if til_type.is_volatile { + write!(fmt, "volatile ")?; + } + if til_type.is_const { + write!(fmt, "const ")?; + } // return type - print_til_type(fmt, section, None, &til_type.ret, false, true)?; + print_til_type( + fmt, + section, + None, + &til_function.ret, + false, + true, + true, + true, + )?; + if !matches!(&til_function.ret.type_variant, TypeVariant::Pointer(_)) { + write!(fmt, " ")?; + } + + let cc = match (section.header.cc, til_function.calling_convention) { + // don't print if using the til section default cc + | (_, None) + // if elipsis just print the '...' as last param + | (_, Some(CallingConvention::Ellipsis)) + // if void arg, just don't print the args (there will be none) + | (_, Some(CallingConvention::Voidarg)) => None, - let cc = match (section.cc, til_type.calling_convention) { - // don't print if using the default cc, or if elipsis just print the '...' as last param - (_, None | Some(CallingConvention::Ellipsis)) => None, - // if uses the default cc, don't print anything - (Some(scc), Some(tcc)) if scc == tcc => None, (_, Some(cc)) => Some(calling_convention_to_str(cc)), }; - // print name and calling convention - let name = name.unwrap_or(""); + // print name and calling convention and some flags match (is_pointer, cc) { - (true, None) => write!(fmt, " (*{name})")?, - (false, None) => write!(fmt, " {name}")?, - (true, Some(cc)) => write!(fmt, " ({cc} *{name})")?, - (false, Some(cc)) => write!(fmt, " {cc} {name}")?, + (true, None) => write!(fmt, "(")?, + (false, None) => {} + (true, Some(cc)) => write!(fmt, "(__{cc} ")?, + (false, Some(cc)) => write!(fmt, "__{cc} ")?, + } + + // between the name and cc print some flags + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x442ccf + if til_function.is_noret { + write!(fmt, "__noreturn ")?; + } + if til_function.is_pure { + write!(fmt, "__pure ")?; + } + if til_function.is_high { + write!(fmt, "__high ")?; + } + + if is_pointer { + write!(fmt, "*")?; + } + + if let Some(name) = name { + fmt.write_all(name)?; + } + if is_pointer { + write!(fmt, ")")?; } write!(fmt, "(")?; - for (i, (param_name, param, _argloc)) in til_type.args.iter().enumerate() { + for (i, (param_name, param, _argloc)) in + til_function.args.iter().enumerate() + { if i != 0 { write!(fmt, ", ")?; } - let param_name = param_name - .as_ref() - .map(|name| String::from_utf8_lossy(&name[..])); + let param_name = param_name.as_ref().map(IDBString::as_bytes); print_til_type( - fmt, - section, - param_name.as_ref().map(|name| name.borrow()), - param, - true, - false, + fmt, section, param_name, param, false, true, false, true, )?; } - if til_type.calling_convention == Some(CallingConvention::Ellipsis) { - if !til_type.args.is_empty() { - write!(fmt, ", ")?; + match til_function.calling_convention { + Some(CallingConvention::Voidarg) => write!(fmt, "void")?, + Some(CallingConvention::Ellipsis) => { + if !til_function.args.is_empty() { + write!(fmt, ", ")?; + } + write!(fmt, "...")?; } - write!(fmt, "...")?; + _ => {} } write!(fmt, ")") } @@ -511,24 +639,38 @@ fn print_til_type_function( fn print_til_type_array( fmt: &mut impl Write, section: &TILSection, - name: Option<&str>, + name: Option<&[u8]>, + til_type: &Type, til_array: &Array, print_pointer_space: bool, - print_type_prefix: bool, + _print_type_prefix: bool, ) -> Result<()> { + if til_type.is_volatile { + write!(fmt, "volatile ")?; + } + if til_type.is_const { + write!(fmt, "const ")?; + } print_til_type( fmt, section, None, &til_array.elem_type, + false, print_pointer_space, - print_type_prefix, + true, + true, )?; - let name_space = if name.is_some() { " " } else { "" }; - let name = name.unwrap_or(""); - write!(fmt, "{name_space}{name}")?; - if til_array.nelem != 0 { - write!(fmt, "[{}]", til_array.nelem)?; + if let Some(name) = name { + // only print space if not a pointer + match &til_array.elem_type.type_variant { + TypeVariant::Pointer(_) => {} + _ => write!(fmt, " ")?, + } + fmt.write_all(name)?; + } + if let Some(nelem) = til_array.nelem { + write!(fmt, "[{nelem}]")?; } else { write!(fmt, "[]")?; } @@ -538,64 +680,128 @@ fn print_til_type_array( fn print_til_type_typedef( fmt: &mut impl Write, section: &TILSection, - name: Option<&str>, - typedef: &Typedef, + name: Option<&[u8]>, + til_type: &Type, + typedef: &Typeref, + print_prefix: bool, ) -> Result<()> { - // only print prefix, if is root - match typedef { - idb_rs::til::Typedef::Ordinal(ord) => { - let ty = section - .get_ord(idb_rs::id0::Id0TilOrd { ord: (*ord).into() }) - .unwrap(); - print_til_type_name(fmt, &ty.name, &ty.tinfo, false)?; - } - idb_rs::til::Typedef::Name(name) => { - let ty = section.get_name(name); - match ty { - Some(ty) => print_til_type_name(fmt, &ty.name, &ty.tinfo, false)?, - // if we can't find the type, just print the name - None => write!(fmt, "{}", core::str::from_utf8(name).unwrap())?, + if til_type.is_volatile { + write!(fmt, "volatile ")?; + } + if til_type.is_const { + write!(fmt, "const ")?; + } + let mut need_space = false; + if print_prefix { + if let Some(ref_prefix) = typedef.ref_type { + print_typeref_type_prefix(fmt, ref_prefix)?; + need_space = true; + } + } + // get the type referenced by the typdef + match &typedef.typeref_value { + TyperefValue::Ref(idx) => { + if need_space { + write!(fmt, " ")?; + } + let inner_ty = §ion.types[*idx]; + fmt.write_all(inner_ty.name.as_bytes())?; + need_space = true; + } + TyperefValue::UnsolvedName(Some(name)) => { + if need_space { + write!(fmt, " ")?; } + fmt.write_all(name.as_bytes())?; + need_space = true; } + // Nothing to print + TyperefValue::UnsolvedName(None) | TyperefValue::UnsolvedOrd(_) => {} + }; + // print the type name, if some + if let Some(name) = name { + if need_space { + write!(fmt, " ")?; + } + fmt.write_all(name)?; } - let name_space = if name.is_some() { " " } else { "" }; - let name = name.unwrap_or(""); - write!(fmt, "{name_space}{name}") + Ok(()) } fn print_til_type_struct( fmt: &mut impl Write, section: &TILSection, - name: Option<&str>, + name: Option<&[u8]>, + _til_type: &Type, til_struct: &Struct, + print_name: bool, ) -> Result<()> { - let name = name.unwrap_or(""); + // TODO check innerref, maybe baseclass don't need to be the first, nor + // need to only one + let is_cppobj = til_struct.is_cppobj + || matches!(til_struct.members.first(), Some(first) if first.is_baseclass); + write!(fmt, "struct ")?; if til_struct.is_unaligned { - write!(fmt, "__unaligned ")?; + if til_struct.is_uknown_8 { + write!(fmt, "__attribute__((packed)) ")?; + } else { + write!(fmt, "__unaligned ")?; + } } if til_struct.is_msstruct { write!(fmt, "__attribute__((msstruct)) ")?; } - if til_struct.is_cpp_obj { + if is_cppobj { write!(fmt, "__cppobj ")?; } - if til_struct.is_vftable { + if til_struct.is_vft { write!(fmt, "/*VFT*/ ")?; } if let Some(align) = til_struct.alignment { write!(fmt, "__attribute__((aligned({align}))) ")?; } - if let Some(others) = til_struct.others { - write!(fmt, "__other({others:04x}) ")?; + if let Some(name) = name { + if print_name { + fmt.write_all(name)?; + write!(fmt, " ")?; + } + } + let mut members = &til_struct.members[..]; + if is_cppobj { + match members.first() { + Some(baseclass) if baseclass.is_baseclass => { + members = &members[1..]; + write!(fmt, ": ")?; + print_til_type( + fmt, + section, + None, + &baseclass.member_type, + baseclass.is_vft, + true, + true, + false, + )?; + write!(fmt, " ")?; + } + _ => {} + } } - write!(fmt, "{name} {{")?; - for member in &til_struct.members { - let name = member - .name - .as_ref() - .map(|x| core::str::from_utf8(x).unwrap()); - print_til_type(fmt, section, name, &member.member_type, true, false)?; + + write!(fmt, "{{")?; + for member in members { + let member_name = member.name.as_ref().map(IDBString::as_bytes); + print_til_type_complex_member( + fmt, + section, + name, + member_name, + &member.member_type, + member.is_vft, + true, + true, + )?; if let Some(att) = &member.att { print_til_struct_member_att(fmt, &member.member_type, att)?; } @@ -607,69 +813,197 @@ fn print_til_type_struct( fn print_til_type_union( fmt: &mut impl Write, section: &TILSection, - name: Option<&str>, + name: Option<&[u8]>, + _til_type: &Type, til_union: &Union, + print_name: bool, ) -> Result<()> { - let name = name.unwrap_or(""); write!(fmt, "union ")?; if let Some(align) = til_union.alignment { write!(fmt, "__attribute__((aligned({align}))) ")?; } - write!(fmt, "{name} {{")?; + if let Some(name) = &name { + if print_name { + fmt.write_all(name)?; + write!(fmt, " ")?; + } + } + write!(fmt, "{{")?; for (member_name, member) in &til_union.members { - let member_name = member_name - .as_ref() - .map(|x| core::str::from_utf8(x).unwrap()); - print_til_type(fmt, section, member_name, member, true, false)?; + let member_name = member_name.as_ref().map(IDBString::as_bytes); + print_til_type_complex_member( + fmt, + section, + name, + member_name, + member, + false, + true, + true, + )?; write!(fmt, ";")?; } write!(fmt, "}}") } +// just print the type, unless we want to embed it +fn print_til_type_complex_member( + fmt: &mut impl Write, + section: &TILSection, + parent_name: Option<&[u8]>, + name: Option<&[u8]>, + til: &Type, + is_vft: bool, + print_pointer_space: bool, + print_name: bool, +) -> Result<()> { + // TODO make closure that print member atts: VFT, align, unaligned, packed, etc + // if parent is not named, don't embeded it, because we can verify if it's part + // of the parent + let Some(parent_name) = parent_name else { + return print_til_type( + fmt, + section, + name, + til, + is_vft, + print_pointer_space, + true, + print_name, + ); + }; + + // TODO if the field is named, don't embeded it? + if name.is_some() { + return print_til_type( + fmt, + section, + name, + til, + is_vft, + print_pointer_space, + true, + print_name, + ); + } + + // if typedef of complex ref, we may want to embed the definition inside the type + // otherwise just print the type regularly + let typedef = match &til.type_variant { + TypeVariant::Typeref(typedef) => typedef, + _ => { + return print_til_type( + fmt, + section, + name, + til, + is_vft, + print_pointer_space, + true, + print_name, + ); + } + }; + + let inner_type = match &typedef.typeref_value { + TyperefValue::Ref(idx) => §ion.types[*idx], + TyperefValue::UnsolvedName(Some(name)) => { + if let Some(ref_type) = &typedef.ref_type { + print_typeref_type_prefix(fmt, *ref_type)?; + } + fmt.write_all(name.as_bytes())?; + return Ok(()); + } + TyperefValue::UnsolvedOrd(_) | TyperefValue::UnsolvedName(None) => { + return print_til_type( + fmt, + section, + name, + til, + is_vft, + print_pointer_space, + true, + print_name, + ); + } + }; + + // if the inner_type name is in the format `parent_name::something_else` then + // we embed it + let qualified_parent_name: Vec<_> = + parent_name.iter().chain(b"::").copied().collect(); + if !inner_type + .name + .as_bytes() + .starts_with(&qualified_parent_name) + { + return print_til_type( + fmt, + section, + name, + til, + is_vft, + print_pointer_space, + true, + print_name, + ); + } + + print_til_type( + fmt, + section, + Some(inner_type.name.as_bytes()), + &inner_type.tinfo, + is_vft, + print_pointer_space, + true, + false, + ) +} + fn print_til_type_enum( fmt: &mut impl Write, section: &TILSection, - name: Option<&str>, + name: Option<&[u8]>, + _til_type: &Type, til_enum: &Enum, ) -> Result<()> { use idb_rs::til::r#enum::EnumFormat::*; - let name = name.unwrap_or(""); let output_fmt_name = match til_enum.output_format { Char => "__char ", Hex => "", SignedDecimal => "__dec ", UnsignedDecimal => "__udec ", }; - write!(fmt, "enum {output_fmt_name}{name} ")?; - match (til_enum.storage_size, section.size_enum) { - (None, None) => {} - (Some(storage_size), Some(size_enum)) => { - if storage_size != size_enum { - let bits_required = til_enum - .members - .iter() - .map(|(_, value)| u64::BITS - value.leading_zeros()) - .max() - .map(|x| x.max(1)) //can't have a value being represented in 0bits - .unwrap_or(8); - if bits_required / 8 < storage_size.get().into() { - write!(fmt, ": __int{} ", storage_size.get() as usize * 8)?; - } - } - } - (None, Some(_)) => {} - (Some(_), None) => {} + write!(fmt, "enum {output_fmt_name}")?; + if let Some(name) = name { + fmt.write_all(name)?; + write!(fmt, " ")?; + } + // InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x4443b0 + if til_enum.storage_size.is_some() + || til_enum.is_signed + || til_enum.is_unsigned + { + let bytes = til_enum.storage_size.or(section.header.size_enum).unwrap(); + let signed = if til_enum.is_unsigned { + "unsigned " + } else { + "" + }; + write!(fmt, ": {signed}__int{} ", bytes.get() as usize * 8)?; } write!(fmt, "{{")?; for (member_name, value) in &til_enum.members { - let name = member_name - .as_ref() - .map(|x| core::str::from_utf8(x).unwrap()) - .unwrap_or("_"); - write!(fmt, "{name} = ")?; + if let Some(member_name) = member_name { + fmt.write_all(member_name.as_bytes())?; + } + write!(fmt, " = ")?; match til_enum.output_format { - Char if *value <= 0xFF => write!(fmt, "'{}'", (*value) as u8 as char)?, + Char if *value <= 0xFF => { + write!(fmt, "'{}'", (*value) as u8 as char)? + } Char => write!(fmt, "'\\xu{value:X}'")?, Hex => write!(fmt, "{value:#X}")?, SignedDecimal => write!(fmt, "{}", (*value) as i64)?, @@ -684,18 +1018,49 @@ fn print_til_type_enum( write!(fmt, "}}") } +fn print_til_type_bitfield( + fmt: &mut impl Write, + name: Option<&[u8]>, + _til_type: &Type, + bitfield: &Bitfield, +) -> Result<()> { + print_basic_type( + fmt, + &Basic::IntSized { + bytes: bitfield.nbytes, + is_signed: Some(!bitfield.unsigned), + }, + )?; + if let Some(name) = name { + write!(fmt, " ")?; + fmt.write_all(name)?; + } + write!(fmt, " : {}", bitfield.width)?; + Ok(()) +} + +// InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x423c20 fn print_til_struct_member_att( fmt: &mut impl Write, tinfo: &Type, att: &StructMemberAtt, ) -> Result<()> { match &tinfo.type_variant { + TypeVariant::Basic(_) => print_til_struct_member_basic_att(fmt, att)?, TypeVariant::Pointer(pointer) => match &pointer.typ.type_variant { - TypeVariant::Basic(Basic::Char) => print_til_struct_member_string_att(fmt, att)?, + TypeVariant::Basic(Basic::Char) => { + print_til_struct_member_string_att(fmt, att)? + } + // TODO is valid for other then void? + TypeVariant::Basic(Basic::Void) => { + print_til_struct_member_void_pointer_att(fmt, att)? + } _ => {} }, TypeVariant::Array(array) => match &array.elem_type.type_variant { - TypeVariant::Basic(Basic::Char) => print_til_struct_member_string_att(fmt, att)?, + TypeVariant::Basic(Basic::Char) => { + print_til_struct_member_string_att(fmt, att)? + } _ => {} }, _ => {} @@ -703,43 +1068,144 @@ fn print_til_struct_member_att( Ok(()) } -fn print_til_struct_member_string_att(fmt: &mut impl Write, att: &StructMemberAtt) -> Result<()> { +// InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x4872f0 +fn print_til_struct_member_string_att( + fmt: &mut impl Write, + att: &StructMemberAtt, +) -> Result<()> { let Some(value) = att.str_type() else { - // todo att is unknown + // TODO don't ignore errors return Ok(()); }; - write!(fmt, " __strlit(0x{:08X})", value.as_strlib()) + write!(fmt, " __strlit(0x{:08X})", value.as_strlib())?; + Ok(()) } -fn print_til_type_name( +fn print_til_struct_member_void_pointer_att( fmt: &mut impl Write, - name: &[u8], - tinfo: &Type, - print_prefix: bool, + att: &StructMemberAtt, +) -> Result<()> { + let Some(value) = att.offset_type() else { + // TODO don't ignore errors + return Ok(()); + }; + write!(fmt, " __offset({:#X}", value.offset)?; + // InnerRef InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x720aa0 + if value.is_rvaoff() { + write!(fmt, "|RVAOFF")?; + } + if value.is_pastend() { + write!(fmt, "|PASTEND")?; + } + if value.is_nobase() { + write!(fmt, "|NOBASE")?; + } + if value.is_subtract() { + write!(fmt, "|SUBTRACT")?; + } + if value.is_signedop() { + write!(fmt, "|SIGNEDOP")?; + } + if value.is_nozeroes() { + write!(fmt, "|NOZEROES")?; + } + if value.is_noones() { + write!(fmt, "|NOONES")?; + } + if value.is_selfref() { + write!(fmt, "|SELFREF")?; + } + write!(fmt, ")")?; + Ok(()) +} + +fn print_til_struct_member_basic_att( + fmt: &mut impl Write, + att: &StructMemberAtt, ) -> Result<()> { - let name = String::from_utf8_lossy(name); - let prefix = match &tinfo.type_variant { - TypeVariant::Basic(_) - | TypeVariant::Pointer(_) - | TypeVariant::Function(_) - | TypeVariant::Array(_) - | TypeVariant::Typedef(_) - | TypeVariant::Bitfield(_) => "", - TypeVariant::UnionRef(_) | TypeVariant::Union(_) => "union ", - TypeVariant::StructRef(_) | TypeVariant::Struct(_) => "struct ", - TypeVariant::EnumRef(_) | TypeVariant::Enum(_) => "enum ", + // TODO incomplete implementation + if let Some((val, is_auto)) = att.basic_offset_type() { + write!( + fmt, + " __offset({val:#x}{})", + if is_auto { "|AUTO" } else { "" } + )?; + return Ok(()); + } + + let Some(basic_att) = att.basic() else { + // TODO don't ignore errors + return Ok(()); }; - write!(fmt, "{}{name}", if print_prefix { prefix } else { "" }) + + use idb_rs::til::r#struct::ExtAttBasicFmt::*; + if basic_att.is_inv_bits { + write!(fmt, " __invbits")? + } + if basic_att.is_inv_sign { + write!(fmt, " __invsign")? + } + if basic_att.is_lzero { + write!(fmt, " __lzero")? + } + match (basic_att.fmt, basic_att.is_signed) { + (Bin, true) => write!(fmt, " __sbin")?, + (Bin, false) => write!(fmt, " __bin")?, + (Oct, true) => write!(fmt, " __soct")?, + (Oct, false) => write!(fmt, " __oct")?, + (Hex, true) => write!(fmt, " __shex")?, + (Hex, false) => write!(fmt, " __hex")?, + (Dec, true) => write!(fmt, " __dec")?, + (Dec, false) => write!(fmt, " __udec")?, + (Float, _) => write!(fmt, " __float")?, + (Char, _) => write!(fmt, " __char")?, + (Segm, _) => write!(fmt, " __segm")?, + (Off, _) => write!(fmt, " __off")?, + }; + match (basic_att.fmt, basic_att.is_signed) { + (_, false) => {} + // already included on the name + (Bin | Dec | Oct | Hex, _) => {} + (Float | Char | Segm | Off, true) => write!(fmt, " __signed")?, + }; + + if let Some(tabform) = basic_att.tabform { + // InnerRef InnerRef fb47f2c2-3c08-4d40-b7ab-3c7736dce31d 0x48857f + let val1 = match tabform.val1 { + idb_rs::til::r#struct::ExtAttBasicTabformVal1::NODUPS => "NODUPS", + idb_rs::til::r#struct::ExtAttBasicTabformVal1::HEX => "HEX", + idb_rs::til::r#struct::ExtAttBasicTabformVal1::DEC => "DEC", + idb_rs::til::r#struct::ExtAttBasicTabformVal1::OCT => "OCT", + idb_rs::til::r#struct::ExtAttBasicTabformVal1::BIN => "BIN", + }; + write!(fmt, " __tabform({val1},{})", tabform.val2)?; + } + Ok(()) } -fn print_til_type_only(fmt: &mut impl Write, section: &TILSection, tinfo: &Type) -> Result<()> { +fn print_til_type_only( + fmt: &mut impl Write, + section: &TILSection, + tinfo: &Type, +) -> Result<()> { match &tinfo.type_variant { - TypeVariant::Typedef(Typedef::Name(name)) => { - write!(fmt, "{}", String::from_utf8_lossy(name))?; + TypeVariant::Typeref(Typeref { + typeref_value: TyperefValue::UnsolvedName(Some(name)), + ref_type: _, + }) => { + fmt.write_all(name.as_bytes())?; } - TypeVariant::Typedef(Typedef::Ordinal(ord)) => { - let ty = section.get_ord(Id0TilOrd { ord: (*ord).into() }).unwrap(); - write!(fmt, "{}", String::from_utf8_lossy(&ty.name))?; + TypeVariant::Typeref(Typeref { + typeref_value: TyperefValue::UnsolvedName(None), + ref_type: _, + }) => {} + TypeVariant::Typeref(Typeref { + typeref_value: TyperefValue::Ref(idx), + ref_type: _, + }) => { + //TypeVariant::Typeref(Typeref::Ordinal(ord)) => { + let ty = §ion.types[*idx]; + fmt.write_all(ty.name.as_bytes())?; } _ => {} }; @@ -748,15 +1214,17 @@ fn print_til_type_only(fmt: &mut impl Write, section: &TILSection, tinfo: &Type) fn print_til_type_len( fmt: &mut impl Write, - section: &TILSection, idx: Option, tinfo: &Type, + size_solver: &mut TILTypeSizeSolver<'_>, ) -> Result<()> { if let TypeVariant::Function(_function) = &tinfo.type_variant { write!(fmt, "FFFFFFFF")?; } else { // if the type is unknown it just prints "FFFFFFF" - let len = section.type_size_bytes(idx, tinfo).unwrap_or(0xFFFF_FFFF); + let len = size_solver + .type_size_bytes(idx, tinfo) + .unwrap_or(0xFFFF_FFFF); write!(fmt, "{len:08X}")?; } Ok(()) @@ -765,19 +1233,19 @@ fn print_til_type_len( fn calling_convention_to_str(cc: CallingConvention) -> &'static str { use idb_rs::til::function::CallingConvention::*; match cc { - Voidarg => "__voidarg", - Cdecl => "__cdecl", - Ellipsis => "__ellipsis", - Stdcall => "__stdcall", - Pascal => "__pascal", - Fastcall => "__fastcall", - Thiscall => "__thiscall", - Swift => "__swift", - Golang => "__golang", - Userpurge => "__userpurge", - Uservars => "__uservars", - Usercall => "__usercall", - Reserved3 => "__ccreserved3", + Voidarg => "voidarg", + Cdecl => "cdecl", + Ellipsis => "ellipsis", + Stdcall => "stdcall", + Pascal => "pascal", + Fastcall => "fastcall", + Thiscall => "thiscall", + Swift => "swift", + Golang => "golang", + Userpurge => "userpurge", + Uservars => "uservars", + Usercall => "usercall", + Reserved3 => "ccreserved3", } } @@ -816,6 +1284,7 @@ fn print_types_total(fmt: &mut impl Write, section: &TILSection) -> Result<()> { .map(|macros| macros.len()) .unwrap_or(0); let alias_num = section + .header .type_ordinal_alias .as_ref() .map(Vec::len) @@ -827,3 +1296,78 @@ fn print_types_total(fmt: &mut impl Write, section: &TILSection) -> Result<()> { "Total {symbols_num} symbols, {types_num} types, {macros_num} macros" ) } + +fn is_vft(section: &TILSection, typ: &Type) -> bool { + match &typ.type_variant { + // propagate the search? + //TypeVariant::Pointer(pointer) => todo!(), + // TODO struct with only function-pointers is also vftable? + TypeVariant::Struct(ty) => ty.is_vft, + TypeVariant::Typeref(typedef) => { + let inner_type = match &typedef.typeref_value { + TyperefValue::Ref(idx) => §ion.types[*idx], + TyperefValue::UnsolvedOrd(_) + | TyperefValue::UnsolvedName(_) => return false, + }; + is_vft(section, &inner_type.tinfo) + } + _ => false, + } +} + +fn print_basic_type(fmt: &mut impl Write, til_basic: &Basic) -> Result<()> { + const fn signed_name(is_signed: Option) -> &'static str { + match is_signed { + Some(true) | None => "", + Some(false) => "unsigned ", + } + } + + match til_basic { + Basic::Bool => write!(fmt, "bool")?, + Basic::Char => write!(fmt, "char")?, + Basic::Short { is_signed } => { + write!(fmt, "{}short", signed_name(*is_signed))? + } + Basic::Void => write!(fmt, "void")?, + Basic::SegReg => write!(fmt, "SegReg")?, + Basic::Unknown { bytes: 1 } => write!(fmt, "_BYTE")?, + Basic::Unknown { bytes: 2 } => write!(fmt, "_WORD")?, + Basic::Unknown { bytes: 4 } => write!(fmt, "_DWORD")?, + Basic::Unknown { bytes: 8 } => write!(fmt, "_QWORD")?, + Basic::Unknown { bytes } => write!(fmt, "unknown{bytes}")?, + Basic::Int { is_signed } => { + write!(fmt, "{}int", signed_name(*is_signed))? + } + Basic::Long { is_signed } => { + write!(fmt, "{}long", signed_name(*is_signed))? + } + Basic::LongLong { is_signed } => { + write!(fmt, "{}longlong", signed_name(*is_signed))? + } + Basic::IntSized { bytes, is_signed } => { + if let Some(false) = is_signed { + write!(fmt, "unsigned ")?; + } + write!(fmt, "__int{}", bytes.get() * 8)? + } + Basic::LongDouble => write!(fmt, "longfloat")?, + Basic::Float { bytes } if bytes.get() == 4 => write!(fmt, "float")?, + Basic::Float { bytes } if bytes.get() == 8 => write!(fmt, "double")?, + Basic::Float { bytes } => write!(fmt, "float{bytes}")?, + Basic::BoolSized { bytes } if bytes.get() == 1 => write!(fmt, "bool")?, + Basic::BoolSized { bytes } => write!(fmt, "bool{bytes}")?, + } + Ok(()) +} + +fn print_typeref_type_prefix( + fmt: &mut impl Write, + ref_type: TyperefType, +) -> Result<()> { + match ref_type { + idb_rs::til::TyperefType::Union => write!(fmt, "union"), + idb_rs::til::TyperefType::Struct => write!(fmt, "struct"), + idb_rs::til::TyperefType::Enum => write!(fmt, "enum"), + } +} diff --git a/src/tools/tools.rs b/src/tools/tools.rs index 9028f9b..8a7c08b 100644 --- a/src/tools/tools.rs +++ b/src/tools/tools.rs @@ -148,9 +148,9 @@ fn get_id0_section(args: &Args) -> Result { FileType::Idb => { let input = BufReader::new(File::open(&args.input)?); let mut parser = IDBParser::new(input)?; - let id0_offset = parser - .id0_section_offset() - .ok_or_else(|| anyhow!("IDB file don't contains a TIL sector"))?; + let id0_offset = parser.id0_section_offset().ok_or_else(|| { + anyhow!("IDB file don't contains a TIL sector") + })?; parser.read_id0_section(id0_offset) } } @@ -163,7 +163,9 @@ fn main() -> Result<()> { Operation::DumpTil => dump_til(&args), Operation::DumpID0 => dump_id0(&args), //Operation::SplitIDB(split_idbargs) => split_idb(&args, split_idbargs), - Operation::DecompressTil(decompress_til_args) => decompress_til(&args, decompress_til_args), + Operation::DecompressTil(decompress_til_args) => { + decompress_til(&args, decompress_til_args) + } Operation::DumpFunctions => dump_functions(&args), Operation::DumpSegments => dump_segments(&args), Operation::DumpLoaderNames => dump_loader_name(&args), @@ -176,9 +178,15 @@ fn main() -> Result<()> { Operation::DumpDirtreeNames => dump_dirtree_names(&args), Operation::DumpDirtreeImports => dump_dirtree_imports(&args), Operation::DumpDirtreeBpts => dump_dirtree_bpts(&args), - Operation::DumpDirtreeBookmarksIdaplace => dump_dirtree_bookmarks_idaplace(&args), - Operation::DumpDirtreeBookmarksStructplace => dump_dirtree_bookmarks_structplace(&args), - Operation::DumpDirtreeBookmarksTiplace => dump_dirtree_bookmarks_tiplace(&args), + Operation::DumpDirtreeBookmarksIdaplace => { + dump_dirtree_bookmarks_idaplace(&args) + } + Operation::DumpDirtreeBookmarksStructplace => { + dump_dirtree_bookmarks_structplace(&args) + } + Operation::DumpDirtreeBookmarksTiplace => { + dump_dirtree_bookmarks_tiplace(&args) + } Operation::PrintTilib => tilib_print(&args), } }