From 95013ba30c4780f02c37041fb03c8be7d746284e Mon Sep 17 00:00:00 2001 From: Mark VanderVoord Date: Tue, 14 Jan 2025 21:15:10 -0500 Subject: [PATCH] Fix parsing issues with line continuations, macro detection, and comment handling. (#978, #981, #985) --- lib/ceedling/objects.yml | 5 ++ lib/ceedling/parsing_parcels.rb | 77 +++++++++++++++++++ lib/ceedling/preprocessinator_extractor.rb | 13 +++- lib/ceedling/test_context_extractor.rb | 65 +++------------- spec/parsing_parcels_spec.rb | 66 ++++++++++++++++ spec/preprocessinator_extractor_spec.rb | 88 ++++++++++++++-------- spec/test_context_extractor_spec.rb | 36 +-------- 7 files changed, 227 insertions(+), 123 deletions(-) create mode 100644 lib/ceedling/parsing_parcels.rb create mode 100644 spec/parsing_parcels_spec.rb diff --git a/lib/ceedling/objects.yml b/lib/ceedling/objects.yml index f4e22481..af20521a 100644 --- a/lib/ceedling/objects.yml +++ b/lib/ceedling/objects.yml @@ -162,11 +162,14 @@ file_finder: file_finder_helper: compose: loginator +parsing_parcels: + test_context_extractor: compose: - configurator - file_wrapper - loginator + - parsing_parcels include_pathinator: compose: @@ -287,6 +290,8 @@ preprocessinator_file_handler: - loginator preprocessinator_extractor: + compose: + - parsing_parcels build_batchinator: compose: diff --git a/lib/ceedling/parsing_parcels.rb b/lib/ceedling/parsing_parcels.rb new file mode 100644 index 00000000..62182b6f --- /dev/null +++ b/lib/ceedling/parsing_parcels.rb @@ -0,0 +1,77 @@ +# ========================================================================= +# Ceedling - Test-Centered Build System for C +# ThrowTheSwitch.org +# Copyright (c) 2010-25 Mike Karlesky, Mark VanderVoord, & Greg Williams +# SPDX-License-Identifier: MIT +# ========================================================================= + +require 'ceedling/encodinator' + +# This is a collection of parsing aids to be used in other modules +class ParsingParcels + + # This parser accepts a collection of lines which it will sweep through and tidy, giving the purified + # lines to the block (one line at a time) for further analysis. It analyzes a single line at a time, + # which is far more memory efficient and faster for large files. However, this requires it to also + # handle backslash line continuations as a single line at this point. + def code_lines(input) + comment_block = false + full_line = '' + input.each_line do |line| + m = line.match /(.*)\\\s*$/ + if (!m.nil?) + full_line += m[1] + elsif full_line.empty? + _line, comment_block = clean_code_line( line, comment_block ) + yield( _line ) + else + _line, comment_block = clean_code_line( full_line + line, comment_block ) + yield( _line ) + full_line = '' + end + end + end + + private ###################################################################### + + def clean_code_line(line, comment_block) + _line = line.clean_encoding + + # Remove line comments + _line.gsub!(/\/\/.*$/, '') + + # Handle end of previously begun comment block + if comment_block + if _line.include?( '*/' ) + # Turn off comment block handling state + comment_block = false + + # Remove everything up to end of comment block + _line.gsub!(/^.*\*\//, '') + else + # Ignore contents of the line if its entirely within a comment block + return '', comment_block + end + + end + + # Block comments inside a C string are valid C, but we remove to simplify other parsing. + # No code we care about will be inside a C string. + # Note that we're not attempting the complex case of multiline string enclosed comment blocks + _line.gsub!(/"\s*\/\*.*"/, '') + + # Remove single-line block comments + _line.gsub!(/\/\*.*\*\//, '') + + # Handle beginning of any remaining multiline comment block + if _line.include?( '/*' ) + comment_block = true + + # Remove beginning of block comment + _line.gsub!(/\/\*.*/, '') + end + + return _line, comment_block + end + +end diff --git a/lib/ceedling/preprocessinator_extractor.rb b/lib/ceedling/preprocessinator_extractor.rb index 3fe5ca0b..ce0ce2f3 100644 --- a/lib/ceedling/preprocessinator_extractor.rb +++ b/lib/ceedling/preprocessinator_extractor.rb @@ -7,9 +7,12 @@ require 'ceedling/constants' require 'ceedling/encodinator' +require 'ceedling/parsing_parcels' class PreprocessinatorExtractor + constructor :parsing_parcels + ## ## Preprocessor Expansion Output Handling ## ====================================== @@ -138,8 +141,8 @@ def extract_test_directive_macro_calls(file_contents) # Look for TEST_SOURCE_FILE("...") and TEST_INCLUDE_PATH("...") in a string (i.e. a file's contents as a string) regexes = [ - /#{UNITY_TEST_SOURCE_FILE}.+?"\)/, - /#{UNITY_TEST_INCLUDE_PATH}.+?"\)/ + /#{UNITY_TEST_SOURCE_FILE}\(\s*\"\s*[^"]+\s*\"\s*\)/, + /#{UNITY_TEST_INCLUDE_PATH}\(\s*\"\s*[^"]+\s*\"\s*\)/ ] return extract_tokens_by_regex_list( file_contents, *regexes ) @@ -199,7 +202,7 @@ def extract_multiline_directives(file_contents, directive) # - Captures all text (non-greedily) after '#' on a first line through 0 or more line continuations up to a final newline. # - Line continuations comprise a final '\' on a given line followed by whitespace & newline, wrapping to the next # line up to a final '\' on that next line. - regex = /(#\s*#{directive}\s+.*?(\\\s*\n.*?)*)\n/ + regex = /(#\s*#{directive}[^\n]*)\n/ tokens = extract_tokens_by_regex_list( file_contents, regex ) @@ -227,7 +230,9 @@ def extract_tokens_by_regex_list(file_contents, *regexes) # For each regex provided, extract all matches from the source string regexes.each do |regex| - tokens += file_contents.scan( regex ) + @parsing_parcels.code_lines( file_contents ) do |line| + tokens += line.scan( regex ) + end end return tokens diff --git a/lib/ceedling/test_context_extractor.rb b/lib/ceedling/test_context_extractor.rb index 7e848824..48277de5 100644 --- a/lib/ceedling/test_context_extractor.rb +++ b/lib/ceedling/test_context_extractor.rb @@ -12,7 +12,7 @@ class TestContextExtractor - constructor :configurator, :file_wrapper, :loginator + constructor :configurator, :file_wrapper, :loginator, :parsing_parcels def setup # Per test-file lookup hashes @@ -53,7 +53,10 @@ def collect_simple_context( filepath, input, *args ) source_extras = [] includes = [] - code_lines( input ) do |line| + @parsing_parcels.code_lines( input ) do |line| + # Strip out comments + line.gsub!(/\/\/.*/,'') + if args.include?( :build_directive_include_paths ) # Scan for build directives: TEST_INCLUDE_PATH() include_paths += extract_build_directive_include_paths( line ) @@ -99,7 +102,7 @@ def collect_test_runner_details(test_filepath, input_filepath=nil) def extract_includes(input) includes = [] - code_lines( input ) {|line| includes += _extract_includes( line ) } + @parsing_parcels.code_lines( input ) {|line| includes += _extract_includes( line ) } return includes.uniq end @@ -235,16 +238,6 @@ def ingest_includes(filepath, includes) end end - # Exposed for testing - def code_lines(input) - comment_block = false - # Far more memory efficient and faster (for large files) than slurping entire file into memory - input.each_line do |line| - _line, comment_block = clean_code_line( line, comment_block ) - yield( _line ) - end - end - private ################################# def collect_build_directive_source_files(filepath, files) @@ -293,8 +286,8 @@ def _collect_test_runner_details(filepath, test_content, input_content=nil) def extract_build_directive_source_files(line) source_extras = [] - # Look for TEST_SOURCE_FILE("<*>.<*>") statement - results = line.scan(/#{UNITY_TEST_SOURCE_FILE}\(\s*\"\s*(.+?\.\w+)*?\s*\"\s*\)/) + # Look for TEST_SOURCE_FILE("<*>") statement + results = line.scan(/#{UNITY_TEST_SOURCE_FILE}\(\s*\"\s*([^"]+)\s*\"\s*\)/) results.each do |result| source_extras << FilePathUtils.standardize( result[0] ) end @@ -306,7 +299,7 @@ def extract_build_directive_include_paths(line) include_paths = [] # Look for TEST_INCLUDE_PATH("<*>") statements - results = line.scan(/#{UNITY_TEST_INCLUDE_PATH}\(\s*\"\s*(.+?)\s*\"\s*\)/) + results = line.scan(/#{UNITY_TEST_INCLUDE_PATH}\(\s*\"\s*([^"]+)\s*\"\s*\)/) results.each do |result| include_paths << FilePathUtils.standardize( result[0] ) end @@ -371,46 +364,6 @@ def form_file_key( filepath ) return filepath.to_s.to_sym end - def clean_code_line(line, comment_block) - _line = line.clean_encoding - - # Remove line comments - _line.gsub!(/\/\/.*$/, '') - - # Handle end of previously begun comment block - if comment_block - if _line.include?( '*/' ) - # Turn off comment block handling state - comment_block = false - - # Remove everything up to end of comment block - _line.gsub!(/^.*\*\//, '') - else - # Ignore contents of the line if its entirely within a comment block - return '', comment_block - end - - end - - # Block comments inside a C string are valid C, but we remove to simplify other parsing. - # No code we care about will be inside a C string. - # Note that we're not attempting the complex case of multiline string enclosed comment blocks - _line.gsub!(/"\s*\/\*.*"/, '') - - # Remove single-line block comments - _line.gsub!(/\/\*.*\*\//, '') - - # Handle beginning of any remaining multiline comment block - if _line.include?( '/*' ) - comment_block = true - - # Remove beginning of block comment - _line.gsub!(/\/\*.*/, '') - end - - return _line, comment_block - end - def debug_log_list(message, filepath, list) msg = "#{message} in #{filepath}:" if list.empty? diff --git a/spec/parsing_parcels_spec.rb b/spec/parsing_parcels_spec.rb new file mode 100644 index 00000000..19e1f1db --- /dev/null +++ b/spec/parsing_parcels_spec.rb @@ -0,0 +1,66 @@ +# ========================================================================= +# Ceedling - Test-Centered Build System for C +# ThrowTheSwitch.org +# Copyright (c) 2010-25 Mike Karlesky, Mark VanderVoord, & Greg Williams +# SPDX-License-Identifier: MIT +# ========================================================================= + +require 'spec_helper' +require 'ceedling/parsing_parcels' + +describe ParsingParcels do + before(:each) do + + @parsing_parcels = described_class.new() + end + + context "#code_lines" do + it "should clean code of encoding problems and comments" do + file_contents = <<~CONTENTS + /* TEST_SOURCE_FILE("foo.c") */ // Eliminate single line comment block + // TEST_SOURCE_FILE("bar.c") // Eliminate single line comment + Some text⛔️ + /* // /* // Eliminate tricky comment block enclosing comments + TEST_SOURCE_FILE("boom.c") + */ // // Eliminate trailing single line comment following block comment + More text + #define STR1 "/* comment " // Strip out (single line) C string containing block comment + #define STR2 " /* comment " // Strip out (single line) C string containing block comment + CONTENTS + + got = [] + + @parsing_parcels.code_lines( StringIO.new( file_contents ) ) do |line| + line.strip! + got << line if !line.empty? + end + + expected = [ + 'Some text', # ⛔️ removed with encoding sanitizing + 'More text', + "#define STR1", + "#define STR2" + ] + + expect( got ).to eq expected + end + + it "should treat continuations as a single line" do + file_contents = "// TEST_SOURCE_FILE(\"foo.c\") \\ \nTEST_SOURCE_FILE(\"bar.c\")\nSome text⛔️ \\\nMore text\n" + got = [] + + @parsing_parcels.code_lines( StringIO.new( file_contents ) ) do |line| + line.strip! + got << line if !line.empty? + end + + expected = [ + 'Some text More text' + ] + + expect( got ).to eq expected + end + + end + +end diff --git a/spec/preprocessinator_extractor_spec.rb b/spec/preprocessinator_extractor_spec.rb index 6ed30ab4..843c6f48 100644 --- a/spec/preprocessinator_extractor_spec.rb +++ b/spec/preprocessinator_extractor_spec.rb @@ -5,9 +5,20 @@ # SPDX-License-Identifier: MIT # ========================================================================= +require 'spec_helper' require 'ceedling/preprocessinator_extractor' +require 'ceedling/parsing_parcels' describe PreprocessinatorExtractor do + before(:each) do + @parsing_parcels = ParsingParcels.new() + @extractor = described_class.new( + { + :parsing_parcels => @parsing_parcels + } + ) + end + context "#extract_file_as_array_from_expansion" do it "should simply extract text of original file from preprocessed expansion" do filepath = "path/to/WANT.c" @@ -32,7 +43,7 @@ input = StringIO.new( file_contents.join( "\n" ) ) - expect( subject.extract_file_as_array_from_expansion( input, filepath ) ).to eq expected + expect( @extractor.extract_file_as_array_from_expansion( input, filepath ) ).to eq expected end it "should extract text of original file from preprocessed expansion preserving #directives and cleaning up whitespace)" do @@ -62,7 +73,7 @@ input = StringIO.new( file_contents.join( "\n" ) ) - expect( subject.extract_file_as_array_from_expansion( input, filepath ) ).to eq expected + expect( @extractor.extract_file_as_array_from_expansion( input, filepath ) ).to eq expected end it "should extract text of original file from preprocessed expansion with complex preprocessor directive sequence" do @@ -97,7 +108,7 @@ input = StringIO.new( file_contents.join( "\n" ) ) - expect( subject.extract_file_as_array_from_expansion(input, filepath) ).to eq expected + expect( @extractor.extract_file_as_array_from_expansion(input, filepath) ).to eq expected end end @@ -125,7 +136,7 @@ input = StringIO.new( file_contents.join( "\n" ) ) - expect( subject.extract_file_as_string_from_expansion( input, filepath ) ).to eq expected + expect( @extractor.extract_file_as_string_from_expansion( input, filepath ) ).to eq expected end end @@ -146,7 +157,27 @@ 'TEST_INCLUDE_PATH("hello/there")' ] - expect( subject.extract_test_directive_macro_calls( file_text ) ).to eq expected + expect( @extractor.extract_test_directive_macro_calls( file_text ) ).to eq expected + end + end + + context "#extract_test_directive_macro_calls" do + it "should extract only uncommented calls" do + file_text = <<~FILE_TEXT + TEST_SOURCE_FILE("foo/bar/file.c")//TEST_SOURCE_FILE("yo/data.c") + + TEST_INCLUDE_PATH("some/inc/dir") + SOME_MACRO(TEST_INCLUDE_PATH("another/dir")) TEST_INCLUDE_PATH("hello/there") + FILE_TEXT + + expected = [ + 'TEST_SOURCE_FILE("foo/bar/file.c")', + 'TEST_INCLUDE_PATH("some/inc/dir")', + 'TEST_INCLUDE_PATH("another/dir")', + 'TEST_INCLUDE_PATH("hello/there")' + ] + + expect( @extractor.extract_test_directive_macro_calls( file_text ) ).to eq expected end end @@ -177,15 +208,15 @@ expected = [ "#pragma pack(1)", [ - "#pragma TOOL command \\", - " with_some_args \\", + "#pragma TOOL command ", + " with_some_args ", " that wrap" - ], + ].join, "#pragma warning(disable : 4996)", "#pragma GCC optimize(\"O3\")" ] - expect( subject.extract_pragmas( file_text ) ).to eq expected + expect( @extractor.extract_pragmas( file_text ) ).to eq expected end end @@ -200,7 +231,7 @@ #endif // _HEADER_INCLUDE_GUARD_ FILE_TEXT - expect( subject.extract_include_guard( file_text ) ).to eq '_HEADER_INCLUDE_GUARD_' + expect( @extractor.extract_include_guard( file_text ) ).to eq '_HEADER_INCLUDE_GUARD_' end it "should extract the first text that looks like an include guard from among file text" do @@ -216,7 +247,7 @@ #endif // HEADER_INCLUDE_GUARD FILE_TEXT - expect( subject.extract_include_guard( file_text ) ).to eq 'HEADER_INCLUDE_GUARD' + expect( @extractor.extract_include_guard( file_text ) ).to eq 'HEADER_INCLUDE_GUARD' end it "should not extract an include guard from among file text" do @@ -229,7 +260,7 @@ #endif // SOME_GUARD_NAME FILE_TEXT - expect( subject.extract_include_guard( file_text ) ).to eq nil + expect( @extractor.extract_include_guard( file_text ) ).to eq nil end end @@ -279,26 +310,26 @@ "#define SQUARE(x) ((x) * (x))", "#define MAX(a, b) ((a) > (b) ? (a) : (b))", [ - "#define MACRO(num, str) {\\", - " printf(\"%d\", num);\\", - " printf(\" is\"); \\", - " printf(\" %s number\", str);\\", - " printf(\"\\n\");\\", + "#define MACRO(num, str) {", + " printf(\"%d\", num);", + " printf(\" is\"); ", + " printf(\" %s number\", str);", + " printf(\"\\n\");", " }" - ], + ].join, [ - "#define LONG_STRING \"This is a very long string that \\", + "#define LONG_STRING \"This is a very long string that ", " continues on the next line\"" - ], + ].join, [ - "#define MULTILINE_MACRO do { \\", - " something(); \\", - " something_else(); \\", + "#define MULTILINE_MACRO do { ", + " something(); ", + " something_else(); ", " } while(0)" - ] + ].join ] - expect( subject.extract_macro_defs( file_text, nil ) ).to eq expected + expect( @extractor.extract_macro_defs( file_text, nil ) ).to eq expected end it "should ignore include guard among macro defintions in file text" do @@ -316,13 +347,10 @@ expected = [ "#define PI 3.14159", - [ - "#define LONG_STRING \"This is a very long string that \\", - " continues on the next line\"" - ] + "#define LONG_STRING \"This is a very long string that continues on the next line\"" ] - expect( subject.extract_macro_defs( file_text, '_INCLUDE_GUARD_' ) ).to eq expected + expect( @extractor.extract_macro_defs( file_text, '_INCLUDE_GUARD_' ) ).to eq expected end end diff --git a/spec/test_context_extractor_spec.rb b/spec/test_context_extractor_spec.rb index 1c7a8833..ab0d123a 100644 --- a/spec/test_context_extractor_spec.rb +++ b/spec/test_context_extractor_spec.rb @@ -7,11 +7,13 @@ require 'spec_helper' require 'ceedling/test_context_extractor' +require 'ceedling/parsing_parcels' require 'ceedling/exceptions' describe TestContextExtractor do before(:each) do # Mock injected dependencies + @parsing_parcels = ParsingParcels.new() @configurator = double( "Configurator" ) # Use double() so we can mock needed methods that are added dynamically at startup @file_wrapper = double( "FileWrapper" ) # Not actually exercised in these test cases loginator = instance_double( "Loginator" ) @@ -41,6 +43,7 @@ { :configurator => @configurator, :file_wrapper => @file_wrapper, + :parsing_parcels => @parsing_parcels, :loginator => loginator } ) @@ -94,39 +97,6 @@ end end - context "#code_lines" do - it "should clean code of encoding problems and comments" do - file_contents = <<~CONTENTS - /* TEST_SOURCE_FILE("foo.c") */ // Eliminate single line comment block - // TEST_SOURCE_FILE("bar.c") // Eliminate single line comment - Some text⛔️ - /* // /* // Eliminate tricky comment block enclosing comments - TEST_SOURCE_FILE("boom.c") - */ // // Eliminate trailing single line comment following block comment - More text - #define STR1 "/* comment " // Strip out (single line) C string containing block comment - #define STR2 " /* comment " // Strip out (single line) C string containing block comment - CONTENTS - - got = [] - - @extractor.code_lines( StringIO.new( file_contents ) ) do |line| - line.strip! - got << line if !line.empty? - end - - expected = [ - 'Some text', # ⛔️ removed with encoding sanitizing - 'More text', - "#define STR1", - "#define STR2" - ] - - expect( got ).to eq expected - end - - end - context "#extract_includes" do it "should extract #include directives from code" do # Complex comments tested in `clean_code_line()` test case