diff --git a/lib/xixanta/src/instruction.rs b/lib/xixanta/src/instruction.rs index 97da0a3..ced452a 100644 --- a/lib/xixanta/src/instruction.rs +++ b/lib/xixanta/src/instruction.rs @@ -36,6 +36,10 @@ impl PString { } } + pub fn is_valid(&self) -> bool { + !(self.value.is_empty() || self.range.is_empty()) + } + pub fn is_valid_identifier(&self) -> Result<(), String> { if self.value.trim().is_empty() { return Err(format!("empty identifier")); @@ -120,7 +124,7 @@ impl Bundle { #[derive(Eq, Hash, PartialEq, Debug, Clone)] pub enum AddressingMode { - Unknown, + Unknown, // TODO: is this really used? Implied, Immediate, Absolute, diff --git a/lib/xixanta/src/opcodes.rs b/lib/xixanta/src/opcodes.rs index a716545..e849126 100644 --- a/lib/xixanta/src/opcodes.rs +++ b/lib/xixanta/src/opcodes.rs @@ -19,6 +19,12 @@ pub struct Entry { pub affected_on_page: bool, } +#[derive(Debug)] +pub struct Control { + pub has_identifier: bool, + pub required_args: Option, +} + lazy_static! { pub static ref INSTRUCTIONS: HashMap> = { let mut instrs = HashMap::new(); @@ -669,4 +675,25 @@ lazy_static! { opcodes }; + + pub static ref CONTROL_FUNCTIONS: HashMap = { + let mut functions = HashMap::new(); + + functions.insert(String::from(".hibyte"), Control { has_identifier: false, required_args: Some(1) }); + functions.insert(String::from(".lobyte"), Control { has_identifier: false, required_args: Some(1) }); + functions.insert(String::from(".macro"), Control { has_identifier: true, required_args: None }); + functions.insert(String::from(".proc"), Control { has_identifier: true, required_args: Some(0) }); + functions.insert(String::from(".scope"), Control { has_identifier: true, required_args: Some(0) }); + functions.insert(String::from(".end"), Control { has_identifier: false, required_args: Some(0) }); + functions.insert(String::from(".endscope"), Control { has_identifier: false, required_args: Some(0) }); + functions.insert(String::from(".endproc"), Control { has_identifier: false, required_args: Some(0) }); + functions.insert(String::from(".endmacro"), Control { has_identifier: false, required_args: Some(0) }); + functions.insert(String::from(".segment"), Control { has_identifier: false, required_args: Some(1) }); + functions.insert(String::from(".byte"), Control { has_identifier: false, required_args: None }); + functions.insert(String::from(".db"), Control { has_identifier: false, required_args: None }); + functions.insert(String::from(".word"), Control { has_identifier: false, required_args: None }); + functions.insert(String::from(".dw"), Control { has_identifier: false, required_args: None }); + + functions + }; } diff --git a/lib/xixanta/src/parser.rs b/lib/xixanta/src/parser.rs index dad95d3..d11e98c 100644 --- a/lib/xixanta/src/parser.rs +++ b/lib/xixanta/src/parser.rs @@ -1,9 +1,11 @@ use crate::errors::ParseError; use crate::instruction::PString; -use crate::opcodes::INSTRUCTIONS; +use crate::opcodes::{CONTROL_FUNCTIONS, INSTRUCTIONS}; use std::io::{self, BufRead, Read}; use std::ops::Range; +// TODO: add cargo-fuzz + type Result = std::result::Result; #[derive(Debug, Clone, PartialEq)] @@ -16,6 +18,7 @@ pub enum NodeType { Literal, Identifier, Label, + Call, Empty, } @@ -28,12 +31,25 @@ pub struct PNode { pub args: Option>>, } +impl PNode { + pub fn empty() -> PNode { + Self { + node_type: NodeType::Empty, + value: PString::new(), + left: None, + right: None, + args: None, + } + } +} + #[derive(Debug, Clone, PartialEq)] pub struct Parser { line: usize, column: usize, offset: usize, pub nodes: Vec>, + pub errors: Vec, } impl Parser { @@ -43,55 +59,99 @@ impl Parser { column: 0, offset: 0, nodes: Vec::new(), + errors: Vec::new(), } } - pub fn parse(&mut self, reader: impl Read) -> Result<()> { - self.from_reader(reader)?; - Ok(()) - } - pub fn reset(&mut self) { self.line = 0; self.column = 0; self.offset = 0; self.nodes = Vec::new(); + self.errors = Vec::new(); } - fn from_reader(&mut self, reader: R) -> Result<()> { + pub fn parse(&mut self, reader: impl Read) -> Result<()> { for line in io::BufReader::new(reader).lines() { - // TODO: instead of this, accumulate errors so to give as many - // errors as possible. - let node = self.parse_line(line?.as_str())?; - if node.node_type != NodeType::Empty { - self.nodes.push(node); + if let Err(err) = self.parse_line(line?.as_str()) { + self.errors.push(err); } self.line += 1; } - Ok(()) + + println!("NODES: {:#?}", self.nodes); + + match self.errors.last() { + Some(err) => Err(err.clone()), + None => Ok(()), + } } - fn parse_line(&mut self, line: &str) -> Result> { + fn parse_line(&mut self, line: &str) -> Result<()> { self.column = 0; + // Skip until the first non-whitespace character. If that's not + // possible, then it's an empty line and we can return early. if !self.skip_whitespace(line) { - return Ok(Box::new(PNode { - node_type: NodeType::Empty, - value: PString::new(), + return Ok(()); + } + + // Let's pin point the last character we need to care for parsing. This + // can be either the start position of an inline comment (i.e. ';'), or + // the real line end. + let end = if let Some(comment) = line.find(|c: char| c == ';') { + comment + } else { + line.len() + }; + + // It's safe to trim the end of the resulting string. Moreover, doing so + // can already show lines which are actually empty (e.g. a line which + // simply contains a comment). If this is the case, just return an empty + // node. + let mut l = line.get(self.column..end).unwrap_or_default().trim_end(); + if l.is_empty() { + return Ok(()); + } + + // Fetch the first element of the line, which we will call it an + // "identifier" but might be a label or a statement. The label might be + // followed by more code. Hence, push it first, then fetch the next + // identifier and finally fall through. + self.offset = 0; + let (mut id, mut nt) = self.parse_identifier(l)?; + if nt == NodeType::Label { + self.nodes.push(Box::new(PNode { + node_type: nt, + value: id, left: None, right: None, args: None, })); + + self.skip_whitespace(l); + + // Is it the label alone? If so return early. + l = line.get(self.column..end).unwrap_or_default().trim_end(); + if l.is_empty() { + return Ok(()); + } + + // The label is followed by a statement. Let's parse the identifier + // for it and fall through. + self.offset = 0; + (id, nt) = self.parse_identifier(l)?; + if nt == NodeType::Label { + return Err(self.parser_error("cannot have multiple labels at the same location")); + } + + self.skip_whitespace(l); } - self.parse_expression(line) + self.parse_statement(l, id) } - // Returns a PString object which holds the information for an identifier. - // - // NOTE: this function assumes that `self.column` points to a non-whitespace - // character. - fn parse_identifier(&mut self, line: &str) -> Option { + fn parse_identifier(&mut self, line: &str) -> Result<(PString, NodeType)> { let start = self.column; let base_offset = self.offset; @@ -99,555 +159,487 @@ impl Parser { // character or an inline comment is found. Then our PString object // is merely whatever is on the column..self.column range. for c in line.get(self.offset..).unwrap_or("").chars() { - if c.is_whitespace() || c == ';' || c == '(' || c == '=' { - return Some(PString { - value: String::from(line.get(base_offset..self.offset).unwrap_or("").trim()), - line: self.line, - range: Range { - start, - end: self.column, + if c.is_whitespace() || c == ':' || c == '(' || c == ')' || c == '=' { + let val = String::from(line.get(base_offset..self.offset).unwrap_or("").trim()); + let nt = if c == ':' { + NodeType::Label + } else { + NodeType::Value + }; + + // TODO + // self.next(); + let end = if c == ':' { + self.next(); + self.column - 1 + } else { + self.column + }; + + return Ok(( + PString { + value: val, + line: self.line, + range: Range { + start, + end, // TODO + // end: self.column - 1, + }, }, - }); + nt, + )); + } else if !c.is_alphanumeric() && c != '_' { + // TODO: on the contrary, if alphanumeric or _, just follow + // through. Otherwise always break. TODO NOT REALLY + // return Err(self.parser_error("bad character for possible identifier")); } self.next(); } - // Otherwise, we might be at a point whether there is nothing (e.g. an - // empty line), or the line is merely the identifier (e.g. instruction - // with implied addressing). + // The line is merely the identifier (e.g. instruction with implied + // addressing). let id = String::from(line.get(base_offset..).unwrap_or("").trim()); - if id.is_empty() { - None - } else { - Some(PString { + Ok(( + PString { value: id, line: self.line, range: Range { start, end: self.column, }, - }) + }, + NodeType::Value, + )) + } + + fn parse_statement(&mut self, line: &str, id: PString) -> Result<()> { + // There are only two top-level statements: instructions and + // assignments. Other kinds of expressions can also be used in the + // middle of assignments or instructions, and so they have to be handled + // as common expressions. Whether expressions make sense at the + // different levels is something to be figured out by the assembler. + match INSTRUCTIONS.get(&id.value) { + Some(_) => self.parse_instruction(line, id), + None => { + if line.contains('=') { + self.parse_assignment(line, id) + } else { + let node = self.parse_expression_with_identifier(id, line)?; + self.nodes.push(node); + Ok(()) + } + } } } - fn parse_expression(&mut self, line: &str) -> Result> { - self.offset = 0; + fn parse_instruction(&mut self, line: &str, id: PString) -> Result<()> { + let mut paren = 0; - match self.parse_identifier(line) { - Some(identifier) => self.parse_expression_with_identifier(identifier, line), - None => Err(self.parser_error("bad empty line")), + self.skip_whitespace(line); + + if line.contains("=") { + return Err(self.parser_error( + format!("cannot used reserved name for the mnemonic '{}'", id.value).as_str(), + )); } - } - fn parse_expression_with_identifier( - &mut self, - identifier: PString, - line: &str, - ) -> Result> { - if identifier.value.starts_with('.') { - // If the statement starts with a '.', it's guaranteed to be a - // control statement. - self.parse_control(identifier, line) + let indirect = line.chars().nth(self.offset).unwrap_or(',') == '('; + let l = if indirect { + self.next(); + self.skip_whitespace(line); + paren = self.find_matching_paren(line, self.offset)?; + line.get(self.offset..paren).unwrap_or_default() + } else { + line.get(self.offset..).unwrap_or_default() + }; + + self.offset = 0; + let mut left = if l.is_empty() { + None } else { - // Otherwise, we will parse it either as an instruction or a general - // statement depending on whether the parsed identifier is a valid - // instruction mnemonic or not. - match INSTRUCTIONS.get(&identifier.value) { - Some(_instr) => self.parse_instruction(identifier, line), - None => self.parse_statement(identifier, line), + Some(self.parse_left_arm(l)?) + }; + + self.skip_whitespace(l); + + // The parsing of the left arm should have advanced the offset right + // into the right arm. If there is nothing there, then we have no right + // arm. Otherwise we have to parse the expression. + // TODO + let mut right_str = l.get(self.offset..).unwrap_or_default(); + let mut right = if right_str.is_empty() { + None + } else { + self.offset = 0; + Some(self.parse_expression(right_str)?) + }; + + if indirect { + if left.is_none() { + return Err(self.parser_error("empty indirect addressing")); } - } - } - // TODO: parenthesis are actually optional - // TODO: check argument names on declaration (left is Some) - fn parse_control(&mut self, id: PString, line: &str) -> Result> { - self.skip_whitespace(line); + left = Some(Box::new(PNode { + node_type: NodeType::Indirection, + value: PString::new(), + left, + right: right.clone(), + args: None, + })); - let left = self.fetch_control_name(line)?; - - // Try to handle arguments passed to the control statement. - let mut args = None; - if let Some(open) = line.find(|c: char| c == '(') { - match line.find(|c: char| c == ')') { - Some(_) => { - let diff = self.offset - open; - self.column += diff + 1; - self.offset += diff + 1; - args = Some(self.parse_arguments(line)?); - } - None => { - return Err( - self.parser_error(format!("open parenthesis on macro call").as_str()) - ) - } + right_str = line.get(paren..).unwrap_or_default(); + if !(right_str.is_empty() || right_str == ")") && right.is_some() { + return Err(self.parser_error("bad indirect addressing")); } + + right = if right_str.is_empty() || right_str == ")" { + None + } else { + self.offset = 0; + + // TODO: ") ," + self.next(); + self.skip_whitespace(right_str); + + // TODO: ", " + self.next(); + self.skip_whitespace(right_str); + + Some(self.parse_expression(right_str)?) + }; } - Ok(Box::new(PNode { - node_type: NodeType::Control, + self.nodes.push(Box::new(PNode { + node_type: NodeType::Instruction, value: id, left, - right: None, - args, - })) - } + right, + args: None, + })); - fn fetch_control_name(&mut self, line: &str) -> Result>> { - let start = self.column; - let base_offset = self.offset; - let string = line.get(base_offset..).unwrap_or_default(); + Ok(()) + } - // If the name starts with an open parenthesis, then this is a macro - // call, not a named definition. - if string.is_empty() || string.starts_with('(') { - return Ok(None); + fn parse_assignment(&mut self, line: &str, id: PString) -> Result<()> { + if let Err(msg) = id.is_valid_identifier() { + return Err(self.parser_error(&msg)); } - // Let's advance the column counter until there is no longer an - // identifier. - for c in string.chars() { - if c.is_whitespace() || c == ';' || c == '(' { - break; - } - if !c.is_alphanumeric() && c != '_' { - return Err(self.parser_error( - format!("'{}' is not a valid character for an identifier", c).as_str(), - )); - } - self.next(); + // Skip whitespaces and make sure that we have a '=' sign. + self.skip_whitespace(line); + if line.chars().nth(self.offset).unwrap_or(' ') != '=' { + return Err(self.parser_error(format!("unknown instruction '{}'", id.value).as_str())); } - // And return the identifier if possible. - let name = line.get(base_offset..self.offset).unwrap_or_default(); - if name.is_empty() { - Ok(None) - } else { - let value = PString { - value: name.to_string(), - line: self.line, - range: Range { - start, - end: self.column, - }, - }; + // Skip the '=' sign and any possible whitespaces. + self.next(); + self.skip_whitespace(line); - // Finally, check that the parsed identifier is a valid one. - if let Err(msg) = value.is_valid_identifier() { - return Err(self.parser_error(&msg)); - } + // Parse the expression on the right side of the assignment. + let rest = line.get(self.offset..).unwrap_or("").trim_end(); + if rest.is_empty() { + return Err(self.parser_error("incomplete assignment")); + }; + self.offset = 0; + let left = Some(self.parse_expression(rest)?); - Ok(Some(Box::new(PNode { - node_type: NodeType::Identifier, - value, - left: None, - right: None, - args: None, - }))) - } + self.nodes.push(Box::new(PNode { + node_type: NodeType::Assignment, + value: id.clone(), + left, + right: None, + args: None, + })); + + Ok(()) } fn parse_arguments(&mut self, line: &str) -> Result>> { - let mut range = Range { - start: self.offset, - end: self.offset, - }; - let mut idx = self.offset; - let mut res = vec![]; - let mut found = false; + // Skip any possible whitespace before the optional opening paren. + self.skip_whitespace(line); - for c in line.get(self.offset..).unwrap_or_default().chars() { - // println!("C: {}", c); - if !found && c.is_whitespace() { - // self.next(); - range.start += 1; - range.end += 1; - idx += 1; - continue; - } - found = true; - // println!("ACTUAL: {:#?}", line.get(range.start..range.end)); + // Scope the end of the argument list. If the arguments are enclosed on + // parenthesis, take that into account, otherwise we will parse until + // the end of the cleaned line. + let paren = line.chars().nth(self.offset).unwrap_or_default() == '('; + let end = if paren { + self.next(); + self.skip_whitespace(line); + self.find_matching_paren(line, self.offset)? + } else { + line.len() + }; - if c == ')' { - range.end -= 1; + let mut args = Vec::new(); + + loop { + // TODO: trimmed_str out? + let trimmed_str = line.get(..end).unwrap_or_default().trim_end(); + println!("TRIMME: {:#?}", trimmed_str.get(self.offset..)); + + let (arg_end, comma) = self.find_left_end(trimmed_str, false)?; + let arg_untrimmed = line.get(self.offset..arg_end).unwrap_or_default(); + let arg = arg_untrimmed.trim_end(); + let diff = arg_untrimmed.len() - arg.len(); + // .trim_end(); + println!( + "ARG_END: {:#?} -- ARG: {:#?} - DIFF: {}", + arg_end, arg, diff + ); + if arg.is_empty() { break; - } else if c == ',' { - // println!("SENT: {:#?}", line.get(range.clone())); - res.push(self.parse_expression(line.get(range.clone()).unwrap_or_default())?); - // println!("GOT: {:#?}", res.last()); - range.start = idx + 1; - range.end = idx + 1; - self.column += 2; - // self.column += idx + 1; - found = false; } - if !c.is_whitespace() { - range.end += 1; - } else if found { - self.column += 1; + // if !comma { + // s = arg.to_owned() + " "; + // arg = s.as_str(); + // } + + self.offset = 0; + args.push(self.parse_expression(arg)?); + + self.offset = arg_end; + self.column += diff; + println!("{:#?}", line.get(self.offset..end)); + self.skip_whitespace(line); // TODO + if comma { + self.next(); + self.skip_whitespace(line); } - idx += 1; } - // And evaluate the last argument if available. - let s = line.get(range.start..range.end).unwrap_or_default(); - // println!("END: {:#?}", s); - if s.len() > 0 { - res.push(self.parse_expression(s)?); - } + println!("ARGS: {:#?}", args); - Ok(res) + Ok(args) } - fn parse_instruction(&mut self, id: PString, line: &str) -> Result> { - self.skip_whitespace(line); - - if line.contains("=") { - return Err(self.parser_error( - format!("cannot used reserved name for the mnemonic '{}'", id.value).as_str(), - )); - } - - let left = if line.chars().nth(self.offset).unwrap_or(',') == '(' { - println!("INDIRECTIOn"); - self.parse_indirection(line)? - } else { - println!("LEFT"); - self.parse_left_arm(line)? - }; - println!("OUT: {:#?}", left); + fn parse_left_arm(&mut self, line: &str) -> Result> { + let start_column = self.column; - self.skip_whitespace(line); + // We track the start value of the offset and we will keep track of the + // movement of it on `end`. This allows us to preserve the value on + // inner calls that might modify the offset value. + // TODO + let (end, comma) = self.find_left_end(line, false)?; - let mut right = None; - if let Some(c) = line.chars().nth(self.offset) { - if c == ',' { - self.next(); + // Set the offset to 0 since we are constraining the string to be + // parsed. + let str = line.get(..end).unwrap_or_default().trim_end(); + self.offset = 0; - let end = match line - .get(self.offset..) - .unwrap_or("") - .find(|c: char| c == ';') - { - Some(semi) => semi, - None => line.len(), - }; + // Parse the expression that we can get from the current offset to the + // computed end. + let expr = self.parse_expression(str); - self.skip_whitespace(line); - let string = line.get(self.offset..end).unwrap_or_default(); - right = Some(self.parse_expression(string)?); - // PString { - // value: string.to_string(), - // line: self.line, - // range: Range { - // start: self.column, - // end, - // }, - // }, - // string, - // )?); - } + // Set the offset to the end of the line that is shared with the caller. + // let diff_column = (end - start) - (self.column - start_column); + self.offset = end; + self.column = start_column + end; + if comma { + self.next(); } - - Ok(Box::new(PNode { - node_type: NodeType::Instruction, - value: id, - left: if left.node_type == NodeType::Empty { - None - } else { - Some(left) - }, - right, - args: None, - })) + expr } - fn parse_indirection(&mut self, line: &str) -> Result> { - let mut parens = 0; - let start = self.column + 1; - let base_offset = self.offset + 1; - let mut comma = 0; + // TODO: revisit inside_paren + fn find_left_end(&self, line: &str, inside_paren: bool) -> Result<(usize, bool)> { + let mut idx = self.offset; + let mut parens = if inside_paren { 1 } else { 0 }; + let mut comma = false; - for c in line.get(self.offset..).unwrap_or("").chars() { + for c in line.get(self.offset..).unwrap_or_default().chars() { if c == ',' { - if parens == 1 { - comma = self.offset; + if parens == 0 { + comma = true; + break; } } else if c == '(' { parens += 1; } else if c == ')' { parens -= 1; - } else if c == ';' { - break; } - self.next(); + idx += 1; if parens < 0 { return Err(self.parser_error("too many closing parenthesis")); - } else if parens == 0 { - break; } } if parens > 0 { return Err(self.parser_error("unclosed parenthesis")); } - let left; - let mut right = None; - - if comma > 0 { - let string = line.get(base_offset..comma).unwrap_or_default(); - left = Some(self.parse_expression(string)?); - // PString { - // value: string.to_string(), - // line: self.line, - // range: Range { - // start: init, - // end: comma, - // }, - // }, - // string, - // )?); - - comma += 1; - for c in line.get(comma..line.len()).unwrap_or_default().chars() { - if c.is_whitespace() { - comma += 1; - } else { - break; - } - } - - let rstring = line.get(comma..line.len() - 1).unwrap_or_default(); - assert!(!rstring.is_empty()); - right = Some(self.parse_expression(rstring)?); - // PString { - // value: rstring.to_string(), - // line: self.line, - // range: Range { - // start: comma + 1, - // end: self.column - 1, - // }, - // }, - // rstring, - // )?); - } else { - let string = line.get(base_offset..self.offset - 1).unwrap_or_default(); - left = Some(self.parse_expression(string)?); - // PString { - // value: string.to_string(), - // line: self.line, - // range: Range { - // start: init, - // end: self.column - 1, - // }, - // }, - // string, - // )?); - } - - Ok(Box::new(PNode { - node_type: NodeType::Indirection, - value: PString::new(), - left, - right, - args: None, - })) + Ok((idx, comma)) } - fn parse_left_arm(&mut self, line: &str) -> Result> { - let mut parens = 0; - let base_offset = self.offset; + fn find_matching_paren(&self, line: &str, init: usize) -> Result { + let mut idx = init; + let mut parens = 1; - let left = line.get(self.offset..).unwrap_or_default(); - if left.is_empty() { - return Ok(Box::new(PNode { - node_type: NodeType::Empty, - value: PString::new(), - left: None, - right: None, - args: None, - })); - } - - for c in line.get(self.offset..).unwrap_or("").chars() { - if c == ',' { - if parens == 0 { - break; - } - } else if c == '(' { + for c in line.get(init..).unwrap_or_default().chars() { + if c == '(' { parens += 1; } else if c == ')' { parens -= 1; - } else if c == ';' { - break; } - self.next(); - - if parens < 0 { + if parens == 0 { + return Ok(idx); + } else if parens < 0 { return Err(self.parser_error("too many closing parenthesis")); } + + idx += 1; } if parens > 0 { return Err(self.parser_error("unclosed parenthesis")); } - let string = line.get(base_offset..self.offset).unwrap_or_default(); - self.parse_expression(string) - // PString { - // value: string.to_string(), - // line: self.line, - // range: Range { - // start, - // end: self.column, - // }, - // }, - // string, - // ) + Ok(idx) } - fn parse_statement(&mut self, identifier: PString, line: &str) -> Result> { - println!("STATEMENT: {:#?}", line); - let start = identifier.value.chars().nth(0).unwrap_or(' '); - let end = identifier.value.chars().nth(self.offset - 1).unwrap_or(' '); + // Parse the expression under `line`. Indeces such as `self.column` and + // `self.offset` are assumed to be correct at this point for the given + // `line` (e.g. the line might not be a full line but rather a limited range + // and the offset has been set accordingly). Returns a new node for the + // expression at hand. + fn parse_expression(&mut self, line: &str) -> Result> { + let (id, nt) = self.parse_identifier(line)?; - if end == ':' { - // TODO - // self.parse_label(id, line) + if nt == NodeType::Label { + Err(self.parser_error("not expecting a label defined here")) + } else { + self.parse_expression_with_identifier(id, line) + } + } - Ok(Box::new(PNode { - node_type: NodeType::Label, - value: PString::new(), - left: None, - right: None, - args: None, - })) - } else if start == '$' || start == '#' || start == '%' { - let inner = line.get(1..).unwrap_or(""); - let inner_id = PString { - value: String::from(inner), - line: self.line, - range: Range { - start: identifier.range.start + 1, - end: identifier.range.end, - }, - }; + // Parse the expression under `line` by taking into consideration that a + // part of it has already been parsed and evaluated as the given `id`. + // Indeces such as `self.column` and `self.offset` are assumed to be correct + // at this point. Returns a new node for the expression at hand. + fn parse_expression_with_identifier(&mut self, id: PString, line: &str) -> Result> { + // Reaching this condition is usually a bad sign, but there is so many + // ways in which it could go wrong, that an `assert!` wouldn't be fair + // either. Hence, just error out. + if !id.is_valid() { + return Err(self.parser_error("invalid identifier")); + } + if id.value.starts_with(".") { + self.parse_control(id, line) + } else if line.starts_with('$') || line.starts_with('#') || line.starts_with('%') { + self.parse_literal(id, line) + } else { + // If there is an indication that it might be a macro call, process + // it as such. + self.skip_whitespace(line); + if !line + .get(self.offset..) + .unwrap_or_default() + .trim_end() + .is_empty() + { + let args = self.parse_arguments(line)?; + return Ok(Box::new(PNode { + node_type: NodeType::Call, + value: id, + left: None, + right: None, + args: if args.is_empty() { None } else { Some(args) }, + })); + } + + // Blindly return the identifier as a PNode. This might be either a + // value as-is, or a macro call which we can't make sense at the + // moment. Eitherway, let the assembler decide. Ok(Box::new(PNode { - node_type: NodeType::Literal, - value: identifier, - left: Some(self.parse_expression_with_identifier(inner_id, inner)?), + node_type: NodeType::Value, + value: id, + left: None, right: None, args: None, })) - } else { - // TODO: macros - - if line.contains('=') { - self.parse_assignment(identifier, line) - } else { - // Let's double check that this is not an uknown instruction. - if let Some(idx) = line.find(|c: char| c.is_whitespace()) { - let end = if let Some(comment) = line.find(|c: char| c == ';') { - comment - } else { - line.len() - }; - if idx < end { - return Err(self.parser_error( - format!("unknown instruction '{}'", identifier.value).as_str(), - )); - } - } - println!("END: {:#?}!", identifier); + } + } - // At this point we will assume that this is a regular value. - // Fetch it as is and let the evaluator make sense of it. - Ok(Box::new(PNode { + // Returns a NodeType::Control node with whatever could be parsed + // considering the given `id` and rest of the `line`. + fn parse_control(&mut self, id: PString, line: &str) -> Result> { + let mut left = None; + let required; + + // Ensure that this is a function that we know of. In the past this was + // not done and it brought too many problems that made the more + // "abstract" way of handling this just too complicated. + if let Some(control) = CONTROL_FUNCTIONS.get(&id.value.to_lowercase()) { + required = control.required_args; + + // If this control function has an identifier (e.g. `.macro + // Identifier(args...)`), let's parse it now. + if control.has_identifier { + self.skip_whitespace(line); + left = Some(Box::new(PNode { node_type: NodeType::Value, - value: identifier, + value: self.parse_identifier(line)?.0, left: None, right: None, args: None, - })) + })); } + } else { + return Err(self.parser_error(format!("unknown function '{}'", id.value).as_str())); } - } - fn parse_assignment(&mut self, id: PString, line: &str) -> Result> { - if let Err(msg) = id.is_valid_identifier() { - return Err(self.parser_error(&msg)); + // At this point we reached the arguments (i.e. any identifier required + // by the control function has already been parsed and set in `left`). + // Then, just parse the arguments and ensure that it matches the amount + // required by the function. + let args = self.parse_arguments(line)?; + if let Some(args_required) = required { + if args.len() != args_required { + return Err(self.parser_error( + format!("wrong number of arguments for function '{}'", id.value).as_str(), + )); + } } - // Skip whitespaces and make sure that we have a '=' sign. - self.skip_whitespace(line); - if line.chars().nth(self.offset).unwrap_or(' ') != '=' { - return Err(self.parser_error(format!("unknown instruction '{}'", id.value).as_str())); - } + Ok(Box::new(PNode { + node_type: NodeType::Control, + value: id, + left, + right: None, + args: if args.is_empty() { None } else { Some(args) }, + })) + } - // Skip the '=' sign and any possible whitespaces. + // Returns a NodeType::Literal node with whatever could be parsed + // considering the given `id` and rest of the `line`. + fn parse_literal(&mut self, id: PString, line: &str) -> Result> { + // Force the column to point to the literal character just in case + // of expressions like '#.hibyte'. Then skip whitespaces for super + // ugly statements such as '# 20'. This is ugly but we should permit + // it. A later linter can yell at a programmer for this. + self.column = id.range.start; + self.offset = 0; self.next(); - if !self.skip_whitespace(line) { - return Err(self.parser_error("incomplete assignment")); - } - - let rest = line.get(self.offset..).unwrap_or("").trim(); - let l = String::from(rest); - if l.is_empty() { - return Err(self.parser_error("incomplete assignment")); - } + self.skip_whitespace(line); - // The `Context` struct pretty much guarantees that `current` and - // `current_mut` will return something, so it's safe to ignore a - // `None`. - // if let Some(entry) = self.context.current_mut() { - // match entry.entry(id.value.clone()) { - // Entry::Occupied(e) => { - // return Err(ParseError { - // line: self.line, - // message: format!( - // "variable '{}' is being re-assigned: it was previously defined in line {}", - // id.value, e.get().node.line), - // }) - // } - // Entry::Vacant(e) => e.insert(PValue { - // node: PString { - // value: l, - // line: self.line, - // range: Range { - // start: id.range.start, - // end: line.len(), - // }, - // }, - // value: 0, - // label: false, - // }), - // }; - // } + // With this, just fetch the inner expression and return the literal + // node. + let inner = line.get(self.offset..).unwrap_or(""); + self.offset = 0; + let left = self.parse_expression(inner)?; Ok(Box::new(PNode { - node_type: NodeType::Assignment, - value: id.clone(), - // TODO: has to be None - left: Some(self.parse_expression(rest)?), - // PString { - // value: l, - // line: self.line, - // range: Range { - // start: self.column, - // end: line.len(), - // }, - // }, - // line, - // )?), + node_type: NodeType::Literal, + value: id, + left: Some(left), right: None, args: None, })) } + // Returns a new ParseError by using the current line. fn parser_error(&self, msg: &str) -> ParseError { ParseError { message: String::from(msg), @@ -656,14 +648,15 @@ impl Parser { } } - // Advances `self.column` until a non-whitespace character is found. Returns + // Advances `self.column` and `self.offset` until a non-whitespace character + // is found. Note that the initial index is bound to `self.offset`. Returns // false if the line can be skipped entirely, true otherwise. fn skip_whitespace(&mut self, line: &str) -> bool { if line.is_empty() { return false; } - for c in line.get(self.column..).unwrap_or("").chars() { + for c in line.get(self.offset..).unwrap_or("").chars() { if !c.is_whitespace() { if c == ';' { return false; @@ -677,6 +670,7 @@ impl Parser { true } + // Increment `self.column` and `self.offset` by one. fn next(&mut self) { self.column += 1; self.offset += 1; @@ -687,143 +681,816 @@ impl Parser { mod tests { use super::*; + fn assert_one_valid(parser: &mut Parser, line: &str) { + assert!(parser.parse(line.as_bytes()).is_ok()); + assert!(parser.nodes.len() == 1); + } + + fn assert_node(node: &Box, nt: NodeType, line: &str, value: &str) { + assert_eq!(node.node_type, nt); + assert_eq!( + node.value.value.as_str(), + line.get(node.value.range.clone()).unwrap() + ); + assert_eq!(node.value.value.as_str(), value); + } + + // Empty + #[test] - fn indirect_addressing() { + fn empty_line() { let mut parser = Parser::new(); - assert!(!parser.parse("lda ($20, x)".as_bytes()).is_err()); - // TODO - // println!("{:#?}", parser.nodes); + assert!(!parser.parse("".as_bytes()).is_err()); + assert_eq!(parser.nodes.len(), 0); } - fn assert_control_call_with_arg( - line: &str, - node: &Box, - name: &str, - literal: &str, - value: &str, - ) { - assert_eq!(node.node_type, NodeType::Control); + #[test] + fn spaced_line() { + let mut parser = Parser::new(); + assert!(!parser.parse(" ".as_bytes()).is_err()); + assert_eq!(parser.nodes.len(), 0); + } - // Value: name of the macro. - assert_eq!(node.value.value.as_str(), name); - assert_eq!(line.get(node.value.range.clone()).unwrap(), name); + #[test] + fn just_a_comment_line() { + for line in vec![";; This is a comment", " ;; Comment"].into_iter() { + let mut parser = Parser::new(); + assert!(!parser.parse(line.as_bytes()).is_err()); + assert_eq!(parser.nodes.len(), 0); + } + } - // Literal node (e.g. '$2002'). - assert_eq!(node.args.as_ref().unwrap()[0].node_type, NodeType::Literal); - assert_eq!(node.args.as_ref().unwrap()[0].value.value, literal); - assert_eq!( - line.get(node.args.as_ref().unwrap()[0].value.range.clone()) - .unwrap(), - literal, - ); + // Labels - // Value node (e.g. '2002'). - assert_eq!( - node.args.as_ref().unwrap()[0] - .left - .clone() - .unwrap() - .value - .value, - value, - ); - assert_eq!( - line.get( - node.args.as_ref().unwrap()[0] - .left - .clone() - .unwrap() - .value - .range - .clone() - ) - .unwrap(), - value, - ); + #[test] + fn anonymous_label() { + let mut parser = Parser::new(); + assert!(parser.parse(":".as_bytes()).is_ok()); + assert_eq!(parser.nodes.len(), 1); + assert!(parser.nodes.first().unwrap().value.value.is_empty()); + assert_eq!(parser.nodes.first().unwrap().value.range.start, 0); + assert_eq!(parser.nodes.first().unwrap().value.range.end, 0); + + parser = Parser::new(); + assert!(parser.parse(" :".as_bytes()).is_ok()); + assert_eq!(parser.nodes.len(), 1); + assert!(parser.nodes.first().unwrap().value.value.is_empty()); + assert_eq!(parser.nodes.first().unwrap().value.range.start, 2); + assert_eq!(parser.nodes.first().unwrap().value.range.end, 2); } #[test] - fn parse_control_no_args() { + fn named_label() { let mut parser = Parser::new(); - let line = ".scope One ; comment"; + assert!(parser.parse("label:".as_bytes()).is_ok()); + assert_eq!(parser.nodes.len(), 1); + assert_eq!(parser.nodes.first().unwrap().value.value, "label"); + assert_eq!(parser.nodes.first().unwrap().value.range.start, 0); + assert_eq!(parser.nodes.first().unwrap().value.range.end, 5); + + parser = Parser::new(); + assert!(parser.parse(" label:".as_bytes()).is_ok()); + assert_eq!(parser.nodes.len(), 1); + assert_eq!(parser.nodes.first().unwrap().value.value, "label"); + assert_eq!(parser.nodes.first().unwrap().value.range.start, 2); + assert_eq!(parser.nodes.first().unwrap().value.range.end, 7); + } + #[test] + fn label_with_instruction() { + let line = "label: dex"; + + let mut parser = Parser::new(); assert!(parser.parse(line.as_bytes()).is_ok()); - assert_eq!(parser.nodes[0].node_type, NodeType::Control); - assert_eq!(parser.nodes[0].value.value.as_str(), ".scope"); - assert_eq!( - line.get(parser.nodes[0].value.range.clone()).unwrap(), - ".scope" - ); + assert_eq!(parser.nodes.len(), 2); + + // Label. + assert_eq!(parser.nodes.first().unwrap().value.value, "label"); + assert_eq!(parser.nodes.first().unwrap().value.range.start, 0); + assert_eq!(parser.nodes.first().unwrap().value.range.end, 5); + + // Instruction + assert_node( + parser.nodes.last().unwrap(), + NodeType::Instruction, + line, + "dex", + ) } + // Literals + #[test] - fn parse_control_one_paren_arg() { - let mut parser = Parser::new(); - let line = ".hibyte($2002)"; + fn parse_pound_literal() { + for line in vec!["#20", " #20 ", " #20 ; Comment", " label: # 20"].into_iter() { + let mut parser = Parser::new(); + assert!(parser.parse(line.as_bytes()).is_ok()); + + let node = parser.nodes.last().unwrap(); + assert_eq!(node.node_type, NodeType::Literal); + assert!(node.right.is_none()); + assert!(node.args.is_none()); + + let left = node.left.clone().unwrap(); + assert_eq!(left.node_type, NodeType::Value); + assert_eq!(left.value.value, "20"); + assert_eq!(line.get(left.value.range).unwrap(), "20"); + } + } + #[test] + fn parse_compound_literal() { + let line = "#$20"; + let mut parser = Parser::new(); assert!(parser.parse(line.as_bytes()).is_ok()); - assert_control_call_with_arg(line, &parser.nodes[0], ".hibyte", "$2002", "2002"); + + let node = parser.nodes.last().unwrap(); + assert_eq!(node.node_type, NodeType::Literal); + assert!(node.right.is_none()); + assert!(node.args.is_none()); + + let inner = node.left.clone().unwrap(); + assert_eq!(inner.node_type, NodeType::Literal); + assert_eq!(inner.value.value, "$20"); + assert_eq!(line.get(inner.value.range).unwrap(), "$20"); + + let innerinner = inner.left.clone().unwrap(); + assert_eq!(innerinner.node_type, NodeType::Value); + assert_eq!(innerinner.value.value, "20"); + assert_eq!(line.get(innerinner.value.range).unwrap(), "20"); } #[test] - fn parse_control_one_paren_arg_with_spaces() { + fn parse_variable_in_literal() { + let line = "#Variable"; let mut parser = Parser::new(); - let line = ".hibyte( $2002 )"; - assert!(parser.parse(line.as_bytes()).is_ok()); - assert_control_call_with_arg(line, &parser.nodes[0], ".hibyte", "$2002", "2002"); + + let node = parser.nodes.last().unwrap(); + assert_eq!(node.node_type, NodeType::Literal); + assert!(node.right.is_none()); + assert!(node.args.is_none()); + + let inner = node.left.clone().unwrap(); + assert_eq!(inner.node_type, NodeType::Value); + assert_eq!(inner.value.value, "Variable"); + assert_eq!(line.get(inner.value.range).unwrap(), "Variable"); + } + + // Regular instructions. + + #[test] + fn instruction_with_implied() { + for line in vec![ + "dex", + " dex", + " dex ", + " dex ; Comment", + " label: dex", + ] + .into_iter() + { + let mut parser = Parser::new(); + assert!(parser.parse(line.as_bytes()).is_ok()); + + let node = parser.nodes.last().unwrap(); + assert_node(node, NodeType::Instruction, line, "dex"); + assert!(node.left.is_none()); + assert!(node.right.is_none()); + assert!(node.args.is_none()); + } + } + + #[test] + fn instruction_with_implied_explicit() { + for line in vec!["inc a", " inc a", " inc a "].into_iter() { + let mut parser = Parser::new(); + assert_one_valid(&mut parser, line); + + let node = parser.nodes.first().unwrap(); + assert_node(node, NodeType::Instruction, line, "inc"); + assert!(node.right.is_none()); + assert!(node.args.is_none()); + + assert_node(&node.left.clone().unwrap(), NodeType::Value, line, "a"); + } + } + + #[test] + fn instruction_with_zeropage() { + for line in vec!["inc $20", " inc $20", " inc $20 "].into_iter() { + let mut parser = Parser::new(); + assert_one_valid(&mut parser, line); + + let node = parser.nodes.first().unwrap(); + assert_node(node, NodeType::Instruction, line, "inc"); + assert!(node.right.is_none()); + assert!(node.args.is_none()); + + assert_node(&node.left.clone().unwrap(), NodeType::Literal, line, "$20"); + } + } + + #[test] + fn instruction_with_immediate() { + for line in vec!["adc #$20", " adc #$20 ", " adc #$20 "].into_iter() { + let mut parser = Parser::new(); + assert_one_valid(&mut parser, line); + + let node = parser.nodes.first().unwrap(); + assert_node(node, NodeType::Instruction, line, "adc"); + assert!(node.right.is_none()); + assert!(node.args.is_none()); + + assert_node(&node.left.clone().unwrap(), NodeType::Literal, line, "#$20"); + } + } + + #[test] + fn instruction_with_absolute() { + for line in vec!["inc $2002", " inc $2002", " inc $2002 "].into_iter() { + let mut parser = Parser::new(); + assert_one_valid(&mut parser, line); + + let node = parser.nodes.first().unwrap(); + assert_node(node, NodeType::Instruction, line, "inc"); + assert!(node.right.is_none()); + assert!(node.args.is_none()); + + assert_node( + &node.left.clone().unwrap(), + NodeType::Literal, + line, + "$2002", + ); + } + } + + #[test] + fn instruction_with_absolute_x() { + for line in vec![ + "inc $2002, x", + " inc $2002, x", + " inc $2002, x ", + " label: inc $2002, x ; Comment", + ] + .into_iter() + { + let mut parser = Parser::new(); + assert!(parser.parse(line.as_bytes()).is_ok()); + + let node = parser.nodes.last().unwrap(); + assert_node(node, NodeType::Instruction, line, "inc"); + assert!(node.args.is_none()); + + assert_node( + &node.left.clone().unwrap(), + NodeType::Literal, + line, + "$2002", + ); + assert_node(&node.right.clone().unwrap(), NodeType::Value, line, "x"); + } + } + + #[test] + fn indirect_addressing_bare() { + for line in vec![ + "lda ($2000)", + " lda ( $2000 ) ; Comment", + " : lda ( $2000)", + "lda($2000)", + ] + .into_iter() + { + let mut parser = Parser::new(); + assert!(parser.parse(line.as_bytes()).is_ok()); + + let node = parser.nodes.last().unwrap(); + assert_node(node, NodeType::Instruction, line, "lda"); + assert!(node.right.is_none()); + + let left = node.left.clone().unwrap(); + assert_eq!(left.node_type, NodeType::Indirection); + assert_node(&left.left.unwrap(), NodeType::Literal, line, "$2000"); + assert!(left.right.is_none()); + } } #[test] - fn parse_control_in_assignment() { + fn indirect_addressing_x() { + for line in vec![ + "lda ($20, x)", + " lda ($20, x)", + " lda ($20,x) ", + " : lda ($20 , x) ; Comment", + " lda ( $20 , x ) ", + ] + .into_iter() + { + let mut parser = Parser::new(); + assert!(parser.parse(line.as_bytes()).is_ok()); + + let node = parser.nodes.last().unwrap(); + assert_node(node, NodeType::Instruction, line, "lda"); + assert!(node.right.is_none()); + + let left = node.left.clone().unwrap(); + assert_eq!(left.node_type, NodeType::Indirection); + assert_node(&left.left.unwrap(), NodeType::Literal, line, "$20"); + assert_node(&left.right.unwrap(), NodeType::Value, line, "x"); + } + } + + #[test] + fn bad_indirect_addressing_x() { let mut parser = Parser::new(); - let line = "variable = .hibyte($2002)"; + let err = parser.parse("lda (Variable, x), y".as_bytes()); + assert_eq!(err.unwrap_err().message, "bad indirect addressing"); + } + + #[test] + fn indirect_addressing_y() { + for line in vec!["lda ($20), y"].into_iter() { + let mut parser = Parser::new(); + assert!(parser.parse(line.as_bytes()).is_ok()); + + let node = parser.nodes.last().unwrap(); + assert_node(node, NodeType::Instruction, line, "lda"); + + let left = node.left.clone().unwrap(); + assert_eq!(left.node_type, NodeType::Indirection); + assert_node(&left.left.unwrap(), NodeType::Literal, line, "$20"); + assert!(left.right.is_none()); + + let right = node.right.clone().unwrap(); + assert_node(&right, NodeType::Value, line, "y"); + } + } + + #[test] + fn variable_in_instruction() { + let line = "lda Variable, x"; + let mut parser = Parser::new(); assert!(parser.parse(line.as_bytes()).is_ok()); - assert_eq!(parser.nodes[0].node_type, NodeType::Assignment); - assert_eq!(parser.nodes[0].value.value.as_str(), "variable"); - assert_eq!( - line.get(parser.nodes[0].value.range.clone()).unwrap(), - "variable" + + let node = parser.nodes.last().unwrap(); + assert_node(node, NodeType::Instruction, line, "lda"); + assert!(node.args.is_none()); + + assert_node( + &node.left.clone().unwrap(), + NodeType::Value, + line, + "Variable", ); - assert_control_call_with_arg( + assert_node(&node.right.clone().unwrap(), NodeType::Value, line, "x"); + } + + #[test] + fn variable_literal_in_instruction() { + let line = "lda #Variable, x"; + let mut parser = Parser::new(); + assert!(parser.parse(line.as_bytes()).is_ok()); + + let node = parser.nodes.last().unwrap(); + assert_node(node, NodeType::Instruction, line, "lda"); + assert!(node.args.is_none()); + + assert_node( + &node.left.clone().unwrap(), + NodeType::Literal, line, - &parser.nodes[0].left.clone().unwrap(), - ".hibyte", - "$2002", - "2002", + "#Variable", ); + assert_node(&node.right.clone().unwrap(), NodeType::Value, line, "x"); } + // Assignments + #[test] - fn parse_control_many_paren_args() { + fn bad_assignments() { let mut parser = Parser::new(); - let line = ".macro Whatever( arg1 , arg2 )"; - assert!(parser.parse(line.as_bytes()).is_ok()); + let mut err = parser.parse("abc = $10".as_bytes()); + assert_eq!( + err.unwrap_err().message, + "cannot use names which are valid hexadecimal values such as 'abc'" + ); - // Node. - let node = parser.nodes[0].clone(); - assert_eq!(node.node_type, NodeType::Control); - assert_eq!(node.value.value.as_str(), ".macro"); - assert_eq!(line.get(node.value.range.clone()).unwrap(), ".macro"); - - // Left: identifier. - let left = node.left.clone().unwrap(); - assert_eq!(left.node_type, NodeType::Identifier); - assert_eq!(left.value.value.as_str(), "Whatever"); - assert_eq!(line.get(left.value.range.clone()).unwrap(), "Whatever"); - - // arg1 - let arg1 = node.args.as_ref().unwrap()[0].clone(); - assert_eq!(arg1.node_type, NodeType::Value); - assert_eq!(arg1.value.value.as_str(), "arg1"); - assert_eq!(line.get(arg1.value.range.clone()).unwrap(), "arg1"); - - // arg2 - let arg2 = node.args.as_ref().unwrap()[1].clone(); - assert_eq!(arg2.node_type, NodeType::Value); - assert_eq!(arg2.value.value.as_str(), "arg2"); - assert_eq!(line.get(arg2.value.range.clone()).unwrap(), "arg2"); + parser = Parser::new(); + err = parser.parse("var =".as_bytes()); + assert_eq!(err.unwrap_err().message, "incomplete assignment"); + + parser = Parser::new(); + err = parser.parse("var = ".as_bytes()); + assert_eq!(err.unwrap_err().message, "incomplete assignment"); + + parser = Parser::new(); + err = parser.parse("var = ; Comment".as_bytes()); + assert_eq!(err.unwrap_err().message, "incomplete assignment"); + } + + // Control statements. + + #[test] + fn parse_control_no_args() { + for line in vec![".end", " .end", " label: .end ; Comment"].into_iter() { + let mut parser = Parser::new(); + assert!(parser.parse(line.as_bytes()).is_ok()); + + let node = parser.nodes.last().unwrap(); + assert_node(node, NodeType::Control, line, ".end"); + assert!(node.left.is_none()); + assert!(node.right.is_none()); + assert!(node.args.is_none()); + } + } + + #[test] + fn parse_control_one_arg() { + for line in vec![ + ".hibyte $2000", + " .hibyte $2000", + " label: .hibyte $2000 ; Comment", + " .hibyte($2000)", + " .hibyte ( $2000 )", + ] + .into_iter() + { + let mut parser = Parser::new(); + assert!(parser.parse(line.as_bytes()).is_ok()); + + let node = parser.nodes.last().unwrap(); + assert_node(node, NodeType::Control, line, ".hibyte"); + assert!(node.left.is_none()); + assert!(node.right.is_none()); + + let args = node.args.clone().unwrap(); + assert_eq!(args.len(), 1); + assert_node(args.first().unwrap(), NodeType::Literal, line, "$2000"); + } + } + + #[test] + fn parse_control_multiple_args() { + for line in vec![ + ".byte $10, $20", + " .byte $10, $20", + " label: .byte $10, $20 ; Comment", + " .byte($10, $20)", + " .byte ( $10 , $20 )", + ] + .into_iter() + { + let mut parser = Parser::new(); + assert!(parser.parse(line.as_bytes()).is_ok()); + + let node = parser.nodes.last().unwrap(); + assert_node(node, NodeType::Control, line, ".byte"); + assert!(node.left.is_none()); + assert!(node.right.is_none()); + + let args = node.args.clone().unwrap(); + assert_eq!(args.len(), 2); + assert_node(args.first().unwrap(), NodeType::Literal, line, "$10"); + assert_node(args.last().unwrap(), NodeType::Literal, line, "$20"); + } + } + + #[test] + fn parse_control_id_no_args() { + for line in vec![ + ".scope Scope", + " .scope Scope", + " label: .scope Scope ; Comment", + " .scope Scope", + ] + .into_iter() + { + let mut parser = Parser::new(); + assert!(parser.parse(line.as_bytes()).is_ok()); + + let node = parser.nodes.last().unwrap(); + assert_node(node, NodeType::Control, line, ".scope"); + assert!(node.right.is_none()); + assert!(node.args.is_none()); + + let left = node.left.clone().unwrap(); + assert_node(&left, NodeType::Value, line, "Scope"); + } + } + + #[test] + fn parse_control_id_one_arg() { + for line in vec![ + ".macro Macro(arg1)", + ".macro Macro arg1 ", + " .macro Macro(arg1)", + " label: .macro Macro(arg1) ; Comment", + " .macro Macro ( arg1 )", + ] + .into_iter() + { + let mut parser = Parser::new(); + assert!(parser.parse(line.as_bytes()).is_ok()); + + let node = parser.nodes.last().unwrap(); + assert_node(node, NodeType::Control, line, ".macro"); + assert!(node.right.is_none()); + + let left = node.left.clone().unwrap(); + assert_node(&left, NodeType::Value, line, "Macro"); + + let args = node.args.clone().unwrap(); + assert_eq!(args.len(), 1); + assert_node(args.first().unwrap(), NodeType::Value, line, "arg1"); + } + } + + #[test] + fn parse_control_id_multiple_args() { + for line in vec![ + ".macro Macro(arg1, arg2)", + ".macro Macro arg1, arg2 ", + " .macro Macro(arg1, arg2)", + " label: .macro Macro(arg1, arg2) ; Comment", + " .macro Macro ( arg1 , arg2 )", + ] + .into_iter() + { + let mut parser = Parser::new(); + assert!(parser.parse(line.as_bytes()).is_ok()); + + let node = parser.nodes.last().unwrap(); + assert_node(node, NodeType::Control, line, ".macro"); + assert!(node.right.is_none()); + + let left = node.left.clone().unwrap(); + assert_node(&left, NodeType::Value, line, "Macro"); + + let args = node.args.clone().unwrap(); + assert_eq!(args.len(), 2); + assert_node(args.first().unwrap(), NodeType::Value, line, "arg1"); + assert_node(args.last().unwrap(), NodeType::Value, line, "arg2"); + } + } + + #[test] + fn parse_control_bad_number_args() { + for line in vec![".hibyte", ".hibyte($20, $22)"].into_iter() { + let mut parser = Parser::new(); + assert_eq!( + parser.parse(line.as_bytes()).unwrap_err().message, + "wrong number of arguments for function '.hibyte'" + ); + } + } + + #[test] + fn parse_control_in_instructions() { + for line in vec!["lda #.hibyte($2010)", " label: lda #.hibyte $2010 "].into_iter() { + let mut parser = Parser::new(); + assert!(parser.parse(line.as_bytes()).is_ok()); + + let node = parser.nodes.last().unwrap(); + assert_node(node, NodeType::Instruction, line, "lda"); + assert!(node.right.is_none()); + assert!(node.args.is_none()); + + let left = node.left.clone().unwrap(); + assert_node(&left, NodeType::Literal, line, "#.hibyte"); + assert!(left.right.is_none()); + assert!(left.args.is_none()); + + let control = left.left.clone().unwrap(); + assert_node(&control, NodeType::Control, line, ".hibyte"); + assert!(control.left.is_none()); + assert!(control.right.is_none()); + + let args = control.args.clone().unwrap(); + assert_eq!(args.len(), 1); + assert_node(args.first().unwrap(), NodeType::Literal, line, "$2010"); + } + } + + #[test] + fn parse_control_in_indirect_x_instructions() { + for line in vec![ + "lda (#.hibyte($2010), x)", + " label: lda (#.hibyte ( $2010 ) , x)", + ] + .into_iter() + { + let mut parser = Parser::new(); + assert!(parser.parse(line.as_bytes()).is_ok()); + + let node = parser.nodes.last().unwrap(); + assert_node(node, NodeType::Instruction, line, "lda"); + assert!(node.right.is_none()); + assert!(node.args.is_none()); + + let ind = node.left.clone().unwrap(); + assert_eq!(ind.node_type, NodeType::Indirection); + assert!(ind.args.is_none()); + + let left = ind.left.clone().unwrap(); + assert_node(&left, NodeType::Literal, line, "#.hibyte"); + assert!(left.right.is_none()); + assert!(left.args.is_none()); + + let control = left.left.clone().unwrap(); + assert_node(&control, NodeType::Control, line, ".hibyte"); + assert!(control.left.is_none()); + assert!(control.right.is_none()); + + let args = control.args.clone().unwrap(); + assert_eq!(args.len(), 1); + assert_node(args.first().unwrap(), NodeType::Literal, line, "$2010"); + + let right = ind.right.clone().unwrap(); + assert_node(&right, NodeType::Value, line, "x"); + } + } + + #[test] + fn parse_control_in_indirect_y_instructions() { + for line in vec![ + "lda (#.hibyte($2010)), y", + " label: lda ( #.hibyte( $2010 ) ) , y", + ] + .into_iter() + { + let mut parser = Parser::new(); + assert!(parser.parse(line.as_bytes()).is_ok()); + + let node = parser.nodes.last().unwrap(); + assert_node(node, NodeType::Instruction, line, "lda"); + assert!(node.args.is_none()); + + let ind = node.left.clone().unwrap(); + assert_eq!(ind.node_type, NodeType::Indirection); + assert!(ind.right.is_none()); + assert!(ind.args.is_none()); + + let left = ind.left.clone().unwrap(); + assert_node(&left, NodeType::Literal, line, "#.hibyte"); + assert!(left.right.is_none()); + assert!(left.args.is_none()); + + let control = left.left.clone().unwrap(); + assert_node(&control, NodeType::Control, line, ".hibyte"); + assert!(control.left.is_none()); + assert!(control.right.is_none()); + + let args = control.args.clone().unwrap(); + assert_eq!(args.len(), 1); + assert_node(args.first().unwrap(), NodeType::Literal, line, "$2010"); + + let right = node.right.clone().unwrap(); + assert_node(&right, NodeType::Value, line, "y"); + } + } + + #[test] + fn parse_control_in_assignments() { + for line in vec![ + "lala = #.hibyte($2010)", + " lala = #.hibyte($2010)", + "label: lala = #.hibyte($2010) ; comment", + ] + .into_iter() + { + let mut parser = Parser::new(); + assert!(parser.parse(line.as_bytes()).is_ok()); + + let node = parser.nodes.last().unwrap(); + assert_node(node, NodeType::Assignment, line, "lala"); + assert!(node.right.is_none()); + assert!(node.args.is_none()); + + let left = node.left.clone().unwrap(); + assert_node(&left, NodeType::Literal, line, "#.hibyte"); + assert!(left.right.is_none()); + assert!(left.args.is_none()); + + let control = left.left.clone().unwrap(); + assert_node(&control, NodeType::Control, line, ".hibyte"); + assert!(control.left.is_none()); + assert!(control.right.is_none()); + + let args = control.args.clone().unwrap(); + assert_eq!(args.len(), 1); + assert_node(args.first().unwrap(), NodeType::Literal, line, "$2010"); + } + } + + #[test] + fn parse_unknown_control() { + let mut parser = Parser::new(); + assert_eq!( + parser.parse(".".as_bytes()).unwrap_err().message, + "unknown function '.'" + ); + + parser = Parser::new(); + assert_eq!( + parser.parse(".whatever".as_bytes()).unwrap_err().message, + "unknown function '.whatever'" + ); + } + + // Macro calls. + + #[test] + fn parse_macro_call_no_args_variable_lookalike() { + for line in vec![ + "MACRO_CALL", + " MACRO_CALL ", + " label: MACRO_CALL ; comment", + ] + .into_iter() + { + let mut parser = Parser::new(); + assert!(parser.parse(line.as_bytes()).is_ok()); + + let node = parser.nodes.last().unwrap(); + assert_node(node, NodeType::Value, line, "MACRO_CALL"); + assert!(node.left.is_none()); + assert!(node.right.is_none()); + assert!(node.args.is_none()); + } + } + + #[test] + fn parse_macro_call_no_args() { + for line in vec![ + "MACRO_CALL()", + " MACRO_CALL() ", + " MACRO_CALL () ", + " MACRO_CALL ( ) ", + " label: MACRO_CALL () ; comment", + ] + .into_iter() + { + let mut parser = Parser::new(); + assert!(parser.parse(line.as_bytes()).is_ok()); + + let node = parser.nodes.last().unwrap(); + assert_node(node, NodeType::Call, line, "MACRO_CALL"); + assert!(node.left.is_none()); + assert!(node.right.is_none()); + assert!(node.args.is_none()); + } + } + + #[test] + fn parse_macro_call_one_arg() { + for line in vec![ + "MACRO_CALL(arg1)", + "MACRO_CALL arg1 ", + " MACRO_CALL (arg1)", + " label: MACRO_CALL( arg1 ) ; Comment", + ] + .into_iter() + { + let mut parser = Parser::new(); + assert!(parser.parse(line.as_bytes()).is_ok()); + + let node = parser.nodes.last().unwrap(); + assert_node(node, NodeType::Call, line, "MACRO_CALL"); + assert!(node.left.is_none()); + assert!(node.right.is_none()); + + let args = node.args.clone().unwrap(); + assert_eq!(args.len(), 1); + assert_node(args.first().unwrap(), NodeType::Value, line, "arg1"); + } + } + + #[test] + fn parse_macro_call_multiple_args() { + for line in vec![ + "MACRO_CALL(arg1, arg2)", + "MACRO_CALL arg1, arg2 ", + " MACRO_CALL (arg1,arg2)", + " label: MACRO_CALL( arg1 , arg2 ) ; Comment", + ] + .into_iter() + { + let mut parser = Parser::new(); + assert!(parser.parse(line.as_bytes()).is_ok()); + + let node = parser.nodes.last().unwrap(); + assert_node(node, NodeType::Call, line, "MACRO_CALL"); + assert!(node.left.is_none()); + assert!(node.right.is_none()); + + let args = node.args.clone().unwrap(); + assert_eq!(args.len(), 2); + assert_node(args.first().unwrap(), NodeType::Value, line, "arg1"); + assert_node(args.last().unwrap(), NodeType::Value, line, "arg2"); + } } }