Skip to content

Commit

Permalink
Fix parsing issues with line continuations, macro detection, and comm…
Browse files Browse the repository at this point in the history
…ent handling. (#978, #981, #985)
  • Loading branch information
mvandervoord committed Jan 15, 2025
1 parent b4f0ad3 commit 95013ba
Show file tree
Hide file tree
Showing 7 changed files with 227 additions and 123 deletions.
5 changes: 5 additions & 0 deletions lib/ceedling/objects.yml
Original file line number Diff line number Diff line change
Expand Up @@ -162,11 +162,14 @@ file_finder:
file_finder_helper:
compose: loginator

parsing_parcels:

test_context_extractor:
compose:
- configurator
- file_wrapper
- loginator
- parsing_parcels

include_pathinator:
compose:
Expand Down Expand Up @@ -287,6 +290,8 @@ preprocessinator_file_handler:
- loginator

preprocessinator_extractor:
compose:
- parsing_parcels

build_batchinator:
compose:
Expand Down
77 changes: 77 additions & 0 deletions lib/ceedling/parsing_parcels.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# =========================================================================
# Ceedling - Test-Centered Build System for C
# ThrowTheSwitch.org
# Copyright (c) 2010-25 Mike Karlesky, Mark VanderVoord, & Greg Williams
# SPDX-License-Identifier: MIT
# =========================================================================

require 'ceedling/encodinator'

# This is a collection of parsing aids to be used in other modules
class ParsingParcels

# This parser accepts a collection of lines which it will sweep through and tidy, giving the purified
# lines to the block (one line at a time) for further analysis. It analyzes a single line at a time,
# which is far more memory efficient and faster for large files. However, this requires it to also
# handle backslash line continuations as a single line at this point.
def code_lines(input)
comment_block = false
full_line = ''
input.each_line do |line|
m = line.match /(.*)\\\s*$/
if (!m.nil?)
full_line += m[1]
elsif full_line.empty?
_line, comment_block = clean_code_line( line, comment_block )
yield( _line )
else
_line, comment_block = clean_code_line( full_line + line, comment_block )
yield( _line )
full_line = ''
end
end
end

private ######################################################################

def clean_code_line(line, comment_block)
_line = line.clean_encoding

# Remove line comments
_line.gsub!(/\/\/.*$/, '')

# Handle end of previously begun comment block
if comment_block
if _line.include?( '*/' )
# Turn off comment block handling state
comment_block = false

# Remove everything up to end of comment block
_line.gsub!(/^.*\*\//, '')
else
# Ignore contents of the line if its entirely within a comment block
return '', comment_block
end

end

# Block comments inside a C string are valid C, but we remove to simplify other parsing.
# No code we care about will be inside a C string.
# Note that we're not attempting the complex case of multiline string enclosed comment blocks
_line.gsub!(/"\s*\/\*.*"/, '')

# Remove single-line block comments
_line.gsub!(/\/\*.*\*\//, '')

# Handle beginning of any remaining multiline comment block
if _line.include?( '/*' )
comment_block = true

# Remove beginning of block comment
_line.gsub!(/\/\*.*/, '')
end

return _line, comment_block
end

end
13 changes: 9 additions & 4 deletions lib/ceedling/preprocessinator_extractor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,12 @@

require 'ceedling/constants'
require 'ceedling/encodinator'
require 'ceedling/parsing_parcels'

class PreprocessinatorExtractor

constructor :parsing_parcels

##
## Preprocessor Expansion Output Handling
## ======================================
Expand Down Expand Up @@ -138,8 +141,8 @@ def extract_test_directive_macro_calls(file_contents)
# Look for TEST_SOURCE_FILE("...") and TEST_INCLUDE_PATH("...") in a string (i.e. a file's contents as a string)

regexes = [
/#{UNITY_TEST_SOURCE_FILE}.+?"\)/,
/#{UNITY_TEST_INCLUDE_PATH}.+?"\)/
/#{UNITY_TEST_SOURCE_FILE}\(\s*\"\s*[^"]+\s*\"\s*\)/,
/#{UNITY_TEST_INCLUDE_PATH}\(\s*\"\s*[^"]+\s*\"\s*\)/
]

return extract_tokens_by_regex_list( file_contents, *regexes )
Expand Down Expand Up @@ -199,7 +202,7 @@ def extract_multiline_directives(file_contents, directive)
# - Captures all text (non-greedily) after '#<directive>' on a first line through 0 or more line continuations up to a final newline.
# - Line continuations comprise a final '\' on a given line followed by whitespace & newline, wrapping to the next
# line up to a final '\' on that next line.
regex = /(#\s*#{directive}\s+.*?(\\\s*\n.*?)*)\n/
regex = /(#\s*#{directive}[^\n]*)\n/

tokens = extract_tokens_by_regex_list( file_contents, regex )

Expand Down Expand Up @@ -227,7 +230,9 @@ def extract_tokens_by_regex_list(file_contents, *regexes)

# For each regex provided, extract all matches from the source string
regexes.each do |regex|
tokens += file_contents.scan( regex )
@parsing_parcels.code_lines( file_contents ) do |line|
tokens += line.scan( regex )
end
end

return tokens
Expand Down
65 changes: 9 additions & 56 deletions lib/ceedling/test_context_extractor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

class TestContextExtractor

constructor :configurator, :file_wrapper, :loginator
constructor :configurator, :file_wrapper, :loginator, :parsing_parcels

def setup
# Per test-file lookup hashes
Expand Down Expand Up @@ -53,7 +53,10 @@ def collect_simple_context( filepath, input, *args )
source_extras = []
includes = []

code_lines( input ) do |line|
@parsing_parcels.code_lines( input ) do |line|
# Strip out comments
line.gsub!(/\/\/.*/,'')

if args.include?( :build_directive_include_paths )
# Scan for build directives: TEST_INCLUDE_PATH()
include_paths += extract_build_directive_include_paths( line )
Expand Down Expand Up @@ -99,7 +102,7 @@ def collect_test_runner_details(test_filepath, input_filepath=nil)
def extract_includes(input)
includes = []

code_lines( input ) {|line| includes += _extract_includes( line ) }
@parsing_parcels.code_lines( input ) {|line| includes += _extract_includes( line ) }

return includes.uniq
end
Expand Down Expand Up @@ -235,16 +238,6 @@ def ingest_includes(filepath, includes)
end
end

# Exposed for testing
def code_lines(input)
comment_block = false
# Far more memory efficient and faster (for large files) than slurping entire file into memory
input.each_line do |line|
_line, comment_block = clean_code_line( line, comment_block )
yield( _line )
end
end

private #################################

def collect_build_directive_source_files(filepath, files)
Expand Down Expand Up @@ -293,8 +286,8 @@ def _collect_test_runner_details(filepath, test_content, input_content=nil)
def extract_build_directive_source_files(line)
source_extras = []

# Look for TEST_SOURCE_FILE("<*>.<*>") statement
results = line.scan(/#{UNITY_TEST_SOURCE_FILE}\(\s*\"\s*(.+?\.\w+)*?\s*\"\s*\)/)
# Look for TEST_SOURCE_FILE("<*>") statement
results = line.scan(/#{UNITY_TEST_SOURCE_FILE}\(\s*\"\s*([^"]+)\s*\"\s*\)/)
results.each do |result|
source_extras << FilePathUtils.standardize( result[0] )
end
Expand All @@ -306,7 +299,7 @@ def extract_build_directive_include_paths(line)
include_paths = []

# Look for TEST_INCLUDE_PATH("<*>") statements
results = line.scan(/#{UNITY_TEST_INCLUDE_PATH}\(\s*\"\s*(.+?)\s*\"\s*\)/)
results = line.scan(/#{UNITY_TEST_INCLUDE_PATH}\(\s*\"\s*([^"]+)\s*\"\s*\)/)
results.each do |result|
include_paths << FilePathUtils.standardize( result[0] )
end
Expand Down Expand Up @@ -371,46 +364,6 @@ def form_file_key( filepath )
return filepath.to_s.to_sym
end

def clean_code_line(line, comment_block)
_line = line.clean_encoding

# Remove line comments
_line.gsub!(/\/\/.*$/, '')

# Handle end of previously begun comment block
if comment_block
if _line.include?( '*/' )
# Turn off comment block handling state
comment_block = false

# Remove everything up to end of comment block
_line.gsub!(/^.*\*\//, '')
else
# Ignore contents of the line if its entirely within a comment block
return '', comment_block
end

end

# Block comments inside a C string are valid C, but we remove to simplify other parsing.
# No code we care about will be inside a C string.
# Note that we're not attempting the complex case of multiline string enclosed comment blocks
_line.gsub!(/"\s*\/\*.*"/, '')

# Remove single-line block comments
_line.gsub!(/\/\*.*\*\//, '')

# Handle beginning of any remaining multiline comment block
if _line.include?( '/*' )
comment_block = true

# Remove beginning of block comment
_line.gsub!(/\/\*.*/, '')
end

return _line, comment_block
end

def debug_log_list(message, filepath, list)
msg = "#{message} in #{filepath}:"
if list.empty?
Expand Down
66 changes: 66 additions & 0 deletions spec/parsing_parcels_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# =========================================================================
# Ceedling - Test-Centered Build System for C
# ThrowTheSwitch.org
# Copyright (c) 2010-25 Mike Karlesky, Mark VanderVoord, & Greg Williams
# SPDX-License-Identifier: MIT
# =========================================================================

require 'spec_helper'
require 'ceedling/parsing_parcels'

describe ParsingParcels do
before(:each) do

@parsing_parcels = described_class.new()
end

context "#code_lines" do
it "should clean code of encoding problems and comments" do
file_contents = <<~CONTENTS
/* TEST_SOURCE_FILE("foo.c") */ // Eliminate single line comment block
// TEST_SOURCE_FILE("bar.c") // Eliminate single line comment
Some text⛔️
/* // /* // Eliminate tricky comment block enclosing comments
TEST_SOURCE_FILE("boom.c")
*/ // // Eliminate trailing single line comment following block comment
More text
#define STR1 "/* comment " // Strip out (single line) C string containing block comment
#define STR2 " /* comment " // Strip out (single line) C string containing block comment
CONTENTS

got = []

@parsing_parcels.code_lines( StringIO.new( file_contents ) ) do |line|
line.strip!
got << line if !line.empty?
end

expected = [
'Some text', # ⛔️ removed with encoding sanitizing
'More text',
"#define STR1",
"#define STR2"
]

expect( got ).to eq expected
end

it "should treat continuations as a single line" do
file_contents = "// TEST_SOURCE_FILE(\"foo.c\") \\ \nTEST_SOURCE_FILE(\"bar.c\")\nSome text⛔️ \\\nMore text\n"
got = []

@parsing_parcels.code_lines( StringIO.new( file_contents ) ) do |line|
line.strip!
got << line if !line.empty?
end

expected = [
'Some text More text'
]

expect( got ).to eq expected
end

end

end
Loading

0 comments on commit 95013ba

Please sign in to comment.