Merge pull request #243 from rgalonso/fix/regex-characters-in-identifier

fix: handle identifier with regex characters
alstr · Nov 12, 2024 · 5157ba9 · 5157ba9
2 parents 59c6b53 + 2d98b5c
commit 5157ba9
Show file tree

Hide file tree

Showing 5 changed files with 30 additions and 10 deletions.
diff --git a/TodoParser.py b/TodoParser.py
@@ -563,13 +563,14 @@ def _get_title(self, comment):
         title_identifier_actual = None
         title_identifier = None
         for identifier in self.identifiers:
-            title_pattern = re.compile(fr'(^.*?)(\s*?)(\b{re.escape(identifier)}\b)(\(([^)]+)\))?\s*:?\s*(.+)', re.IGNORECASE)
+            title_pattern = re.compile(fr'(^|(^.*?)(\s*?)\s)({re.escape(identifier)})(\(([^)]+)\))?\s*(:|\s)\s*(.+)',
+                                       re.IGNORECASE)
             title_search = title_pattern.search(comment)
             if title_search:
-                title_identifier_actual = title_search.group(3)
+                title_identifier_actual = title_search.group(4)
                 title_identifier = identifier
-                ref = title_search.group(5) # may be empty, which is OK
-                title = title_search.group(6)
+                ref = title_search.group(6) # may be empty, which is OK
+                title = title_search.group(8)
                 break
         return title, ref, title_identifier, title_identifier_actual
 

diff --git a/main.py b/main.py
@@ -76,7 +76,7 @@ def process_diff(diff, client=Client(), insert_issue_urls=False, parser=TodoPars
                     if line_number < len(file_lines):
                         # Duplicate the line to retain the comment syntax.
                         old_line = file_lines[line_number]
-                        remove = fr'(?i:{raw_issue.identifier}).*{re.escape(raw_issue.title)}'
+                        remove = fr'(?i:{re.escape(raw_issue.identifier)}).*{re.escape(raw_issue.title)}'
                         insert = f'Issue URL: {client.get_issue_url(new_issue_number)}'
                         new_line = re.sub('^.*'+remove, raw_issue.prefix + insert, old_line)
                         # make sure the above operation worked as intended

diff --git a/tests/test_new.diff b/tests/test_new.diff
@@ -58,7 +58,7 @@ index 0000000..525e25d
 +    # TODO: Come up with a more imaginative greeting
 +    print('Hello world')
 +
-+    # TODO: Do more stuff
++    # [TODO]: Do more stuff
 +    #  This function should probably do something more interesting
 +    #  labels: help wanted
 +

diff --git a/tests/test_process_diff.py b/tests/test_process_diff.py
@@ -78,7 +78,7 @@ def run(self, result=None):
                      "Skipping because 'SKIP_PROCESS_DIFF_TEST' is 'true'")
     def test_url_insertion(self):
         self._setUp(['test_new.diff'])
-        self._standardTest(80)
+        self._standardTest(79)
 
     def test_line_numbering_with_deletions(self):
         self._setUp(['test_new_py.diff', 'test_edit_py.diff'])

diff --git a/tests/test_todo_parser.py b/tests/test_todo_parser.py
@@ -45,7 +45,7 @@ def setUp(self):
 
     def test_python_issues(self):
         # Includes 4 tests for Starlark.
-        self.assertEqual(count_issues_for_file_type(self.raw_issues, 'python'), 8)
+        self.assertEqual(count_issues_for_file_type(self.raw_issues, 'python'), 7)
 
     def test_yaml_issues(self):
         self.assertEqual(count_issues_for_file_type(self.raw_issues, 'yaml'), 2)
@@ -80,7 +80,7 @@ def test_julia_issues(self):
         self.assertEqual(count_issues_for_file_type(self.raw_issues, 'julia'), 2)
 
     def test_starlark_issues(self):
-        self.assertEqual(count_issues_for_file_type(self.raw_issues, 'python'), 8)
+        self.assertEqual(count_issues_for_file_type(self.raw_issues, 'python'), 7)
 
     def test_autohotkey_issues(self):
         self.assertEqual(count_issues_for_file_type(self.raw_issues, 'autohotkey'), 1)
@@ -133,7 +133,9 @@ class CustomOptionsTest(unittest.TestCase):
     def setUp(self):
         parser = TodoParser(options={"identifiers":
                                      [{"name": "FIX", "labels": []},
-                                      {"name": "TODO", "labels": []}]})
+                                      {"name": "[TODO]", "labels": []},
+                                      {"name": "TODO", "labels": []}
+                                      ]})
         self.raw_issues = []
         with open('syntax.json', 'r') as syntax_json:
             parser.syntax_dict = json.load(syntax_json)
@@ -157,6 +159,23 @@ def test_exact_identifier_match(self):
         self.assertEqual(len(matching_issues), 0,
                          msg=print_unexpected_issues(matching_issues))
 
+    # See GitHub issue #242
+    def test_regex_identifier_chars(self):
+        """
+        Verify that the presence of regex characters in the identifier
+        doesn't confuse the parser
+
+        An identifier such as "[TODO]" should be matched literally, not treating
+        the "[" and "]" characters as part of a regular expression pattern.
+        """
+        matching_issues = get_issues_for_fields(self.raw_issues,
+                                                {
+                                                    "file_name": "example_file.py",
+                                                    "identifier": "[TODO]"
+                                                })
+        self.assertEqual(len(matching_issues), 1,
+                         msg=print_unexpected_issues(matching_issues))
+
     # See GitHub issue #235
     @unittest.expectedFailure
     def test_multiple_identifiers(self):