Merge pull request #246 from rgalonso/fix/handle-different-line-endings

fix: handle different line endings
alstr · Nov 22, 2024 · 6cf1955 · 6cf1955
2 parents 5157ba9 + 6f69995
commit 6cf1955
Show file tree

Hide file tree

Showing 2 changed files with 63 additions and 7 deletions.
diff --git a/TodoParser.py b/TodoParser.py
@@ -393,6 +393,13 @@ def _get_file_details(self, file):
                     return syntax_details, ace_mode
         return None, None
 
+    def _tabs_and_spaces(self, num_tabs: int, num_spaces: int) -> str:
+        """
+        Helper function which returns a string containing the
+        specified number of tabs and spaces (in that order)
+        """
+        return '\t'*num_tabs + ' '*num_spaces
+
     def _extract_issue_if_exists(self, comment_block, marker, hunk_info):
         """Check this comment for TODOs, and if found, build an Issue object."""
         curr_issue = None
@@ -403,7 +410,11 @@ def _extract_issue_if_exists(self, comment_block, marker, hunk_info):
         for line_number_within_comment_block, line in enumerate(comment_lines):
             line_status, committed_line = self._get_line_status(line)
             line_statuses.append(line_status)
-            cleaned_line, pre_marker_length, post_marker_length = self._clean_line(committed_line, marker)
+            (cleaned_line,
+             pre_marker_length,
+             num_pre_marker_tabs,
+             post_marker_length,
+             num_post_marker_tabs) = self._clean_line(committed_line, marker)
             line_title, ref, identifier, identifier_actual = self._get_title(cleaned_line)
             if line_title:
                 if prev_line_title and line_status == line_statuses[-2]:
@@ -423,7 +434,9 @@ def _extract_issue_if_exists(self, comment_block, marker, hunk_info):
                                 + comment_block['start'] + line_number_within_comment_block),
                     start_line_within_hunk=comment_block['start'] + line_number_within_comment_block + 1,
                     num_lines=1,
-                    prefix=(' '*pre_marker_length)+(marker['pattern'] if marker['type'] == 'line' else '')+(' '*post_marker_length),
+                    prefix=self._tabs_and_spaces(num_pre_marker_tabs, (pre_marker_length-num_pre_marker_tabs)) +
+                           str(marker['pattern'] if marker['type'] == 'line' else '') +
+                           self._tabs_and_spaces(num_post_marker_tabs, post_marker_length-num_post_marker_tabs),
                     markdown_language=hunk_info['markdown_language'],
                     status=line_status,
                     identifier=identifier,
@@ -534,6 +547,7 @@ def _get_line_status(self, comment):
     def _clean_line(comment, marker):
         """Remove unwanted symbols and whitespace."""
         post_marker_length = 0
+        num_post_marker_tabs = 0
         if marker['type'] == 'block':
             original_comment = comment
             comment = comment.strip()
@@ -546,15 +560,19 @@ def _clean_line(comment, marker):
                 comment = comment.lstrip('*')
             comment = comment.strip()
             pre_marker_length = original_comment.find(comment)
+            num_pre_marker_tabs = comment.count('\t', 0, pre_marker_length)
         else:
-            comment_segments = re.search(fr'^(.*?)({marker["pattern"]})(\s*)(.*)', comment)
+            comment_segments = re.search(fr'^(.*?)({marker["pattern"]})(\s*)(.*?)\s*$', comment)
             if comment_segments:
                 pre_marker_text, _, post_marker_whitespace, comment = comment_segments.groups()
                 pre_marker_length = len(pre_marker_text)
+                num_pre_marker_tabs = pre_marker_text.count('\t', 0, pre_marker_length)
                 post_marker_length = len(post_marker_whitespace)
+                num_post_marker_tabs = post_marker_whitespace.count('\t', 0, post_marker_length)
             else:
                 pre_marker_length = 0
-        return comment, pre_marker_length, post_marker_length
+                num_pre_marker_tabs = 0
+        return comment, pre_marker_length, num_pre_marker_tabs, post_marker_length, num_post_marker_tabs
 
     def _get_title(self, comment):
         """Check the passed comment for a new issue title (and reference, if specified)."""

diff --git a/main.py b/main.py
@@ -73,18 +73,56 @@ def process_diff(diff, client=Client(), insert_issue_urls=False, parser=TodoPars
                     line_number = raw_issue.start_line - 1
                     with open(raw_issue.file_name, 'r') as issue_file:
                         file_lines = issue_file.readlines()
+
+                        # Get style of newlines used in this file, so that we
+                        # use the same type when writing the file back out.
+                        # Note:
+                        #   - if only one newline type is detected, then
+                        #     'newlines' will be a string with that value
+                        #   - if no newlines are detected, 'newlines' will
+                        #     be 'None' and the platform-dependent default
+                        #     will be used when terminating lines on write
+                        #   - if multiple newline types are detected (e.g.
+                        #     a mix of Windows- and Unix-style newlines in
+                        #     the same file), then that is handled within
+                        #     the following if block...
+                        newline_style = issue_file.newlines
+
+                        if isinstance(issue_file.newlines, tuple):
+                            # A tuple being returned indicates that a mix of
+                            # line ending styles was found in the file. In
+                            # order to not perturb the file any more than
+                            # intended (i.e. inserting the issue URL comment(s))
+                            # we'll reread the file and keep the line endings.
+                            # On write, we'll tell writelines to not introduce
+                            # any explicit line endings. This modification
+                            # of the read and write behavior is handled by
+                            # passing '' to the newline argument of open().
+                            # Note: the line ending of the issue URLs line(s)
+                            # itself will be that of the TODO line above it
+                            # and is handled later in this function.
+                            newline_style = ''
+
+                            # reread the file without stripping off line endings
+                            with open(raw_issue.file_name, 'r',
+                                      newline=newline_style) as issue_file_reread:
+                                file_lines = issue_file_reread.readlines()
+                        else:
+                            newline_style = issue_file.newlines
                     if line_number < len(file_lines):
                         # Duplicate the line to retain the comment syntax.
                         old_line = file_lines[line_number]
-                        remove = fr'(?i:{re.escape(raw_issue.identifier)}).*{re.escape(raw_issue.title)}'
+                        remove = fr'(?i:{re.escape(raw_issue.identifier)}).*{re.escape(raw_issue.title)}.*?(\r|\r\n|\n)?$'
                         insert = f'Issue URL: {client.get_issue_url(new_issue_number)}'
-                        new_line = re.sub('^.*'+remove, raw_issue.prefix + insert, old_line)
+                        # note that the '\1' capture group is the line ending character sequence and
+                        # will only be non-empty in the case of a mixed line-endings file
+                        new_line = re.sub('^.*'+remove, fr'{raw_issue.prefix + insert}\1', old_line)
                         # make sure the above operation worked as intended
                         if new_line != old_line:
                             # Check if the URL line already exists, if so abort.
                             if line_number == len(file_lines) - 1 or file_lines[line_number + 1] != new_line:
                                 file_lines.insert(line_number + 1, new_line)
-                                with open(raw_issue.file_name, 'w') as issue_file:
+                                with open(raw_issue.file_name, 'w', newline=newline_style) as issue_file:
                                     issue_file.writelines(file_lines)
                                 print('Issue URL successfully inserted', file=output)
                         else: