From c10ca453d9c3722100e6d1a6be7f205ccc38905e Mon Sep 17 00:00:00 2001 From: Robert Alonso <17463757+rgalonso@users.noreply.github.com> Date: Thu, 14 Nov 2024 20:40:49 +0000 Subject: [PATCH 1/3] fix: handle issue title line with trailing whitespace Fix a regression that was accidentally introduced with v5.1.2 which could cause an issue URL to not be successfully added to the source file when dealing with a CRLF (Windows-style line endings) file on Linux. Partially addresses GitHub issue #245 --- TodoParser.py | 2 +- main.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/TodoParser.py b/TodoParser.py index 42173d0..6995aed 100644 --- a/TodoParser.py +++ b/TodoParser.py @@ -547,7 +547,7 @@ class TodoParser(object): comment = comment.strip() pre_marker_length = original_comment.find(comment) else: - comment_segments = re.search(fr'^(.*?)({marker["pattern"]})(\s*)(.*)', comment) + comment_segments = re.search(fr'^(.*?)({marker["pattern"]})(\s*)(.*?)\s*$', comment) if comment_segments: pre_marker_text, _, post_marker_whitespace, comment = comment_segments.groups() pre_marker_length = len(pre_marker_text) diff --git a/main.py b/main.py index 845d82b..d687cb4 100644 --- a/main.py +++ b/main.py @@ -76,7 +76,7 @@ def process_diff(diff, client=Client(), insert_issue_urls=False, parser=TodoPars if line_number < len(file_lines): # Duplicate the line to retain the comment syntax. old_line = file_lines[line_number] - remove = fr'(?i:{re.escape(raw_issue.identifier)}).*{re.escape(raw_issue.title)}' + remove = fr'(?i:{re.escape(raw_issue.identifier)}).*{re.escape(raw_issue.title)}.*?(\r|\r\n|\n)?$' insert = f'Issue URL: {client.get_issue_url(new_issue_number)}' new_line = re.sub('^.*'+remove, raw_issue.prefix + insert, old_line) # make sure the above operation worked as intended From ad4d65c8b848c56d5b8b91bd01d002ece0941ceb Mon Sep 17 00:00:00 2001 From: Robert Alonso <17463757+rgalonso@users.noreply.github.com> Date: Thu, 14 Nov 2024 20:45:00 +0000 Subject: [PATCH 2/3] fix: handle CRLF and mixed line-ending files Regardless of the OS on which the app is running, respect the original line endings of the file when writing back the issue URL. In the case of a mixed line-ending file (e.g. both Windows-style CRLF and Unix-style LF), the issue URL comment line will use the line ending style of the TODO comment line above it. Partially addresses GitHub issue #245 --- main.py | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index d687cb4..59a0c36 100644 --- a/main.py +++ b/main.py @@ -73,18 +73,56 @@ def process_diff(diff, client=Client(), insert_issue_urls=False, parser=TodoPars line_number = raw_issue.start_line - 1 with open(raw_issue.file_name, 'r') as issue_file: file_lines = issue_file.readlines() + + # Get style of newlines used in this file, so that we + # use the same type when writing the file back out. + # Note: + # - if only one newline type is detected, then + # 'newlines' will be a string with that value + # - if no newlines are detected, 'newlines' will + # be 'None' and the platform-dependent default + # will be used when terminating lines on write + # - if multiple newline types are detected (e.g. + # a mix of Windows- and Unix-style newlines in + # the same file), then that is handled within + # the following if block... + newline_style = issue_file.newlines + + if isinstance(issue_file.newlines, tuple): + # A tuple being returned indicates that a mix of + # line ending styles was found in the file. In + # order to not perturb the file any more than + # intended (i.e. inserting the issue URL comment(s)) + # we'll reread the file and keep the line endings. + # On write, we'll tell writelines to not introduce + # any explicit line endings. This modification + # of the read and write behavior is handled by + # passing '' to the newline argument of open(). + # Note: the line ending of the issue URLs line(s) + # itself will be that of the TODO line above it + # and is handled later in this function. + newline_style = '' + + # reread the file without stripping off line endings + with open(raw_issue.file_name, 'r', + newline=newline_style) as issue_file_reread: + file_lines = issue_file_reread.readlines() + else: + newline_style = issue_file.newlines if line_number < len(file_lines): # Duplicate the line to retain the comment syntax. old_line = file_lines[line_number] remove = fr'(?i:{re.escape(raw_issue.identifier)}).*{re.escape(raw_issue.title)}.*?(\r|\r\n|\n)?$' insert = f'Issue URL: {client.get_issue_url(new_issue_number)}' - new_line = re.sub('^.*'+remove, raw_issue.prefix + insert, old_line) + # note that the '\1' capture group is the line ending character sequence and + # will only be non-empty in the case of a mixed line-endings file + new_line = re.sub('^.*'+remove, fr'{raw_issue.prefix + insert}\1', old_line) # make sure the above operation worked as intended if new_line != old_line: # Check if the URL line already exists, if so abort. if line_number == len(file_lines) - 1 or file_lines[line_number + 1] != new_line: file_lines.insert(line_number + 1, new_line) - with open(raw_issue.file_name, 'w') as issue_file: + with open(raw_issue.file_name, 'w', newline=newline_style) as issue_file: issue_file.writelines(file_lines) print('Issue URL successfully inserted', file=output) else: From 6f699953740f4e8912297864a6fd07dd807aea70 Mon Sep 17 00:00:00 2001 From: Robert Alonso <17463757+rgalonso@users.noreply.github.com> Date: Thu, 14 Nov 2024 20:37:07 +0000 Subject: [PATCH 3/3] fix: handle presence of tab characters on TODO line Ensure alignment of issue URL line is correct even when the TODO line had any number of tab characters prior to the issue title Addresses remaining sub-issues of and closes GitHub issue #245 --- TodoParser.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/TodoParser.py b/TodoParser.py index 6995aed..eefd1bd 100644 --- a/TodoParser.py +++ b/TodoParser.py @@ -393,6 +393,13 @@ class TodoParser(object): return syntax_details, ace_mode return None, None + def _tabs_and_spaces(self, num_tabs: int, num_spaces: int) -> str: + """ + Helper function which returns a string containing the + specified number of tabs and spaces (in that order) + """ + return '\t'*num_tabs + ' '*num_spaces + def _extract_issue_if_exists(self, comment_block, marker, hunk_info): """Check this comment for TODOs, and if found, build an Issue object.""" curr_issue = None @@ -403,7 +410,11 @@ class TodoParser(object): for line_number_within_comment_block, line in enumerate(comment_lines): line_status, committed_line = self._get_line_status(line) line_statuses.append(line_status) - cleaned_line, pre_marker_length, post_marker_length = self._clean_line(committed_line, marker) + (cleaned_line, + pre_marker_length, + num_pre_marker_tabs, + post_marker_length, + num_post_marker_tabs) = self._clean_line(committed_line, marker) line_title, ref, identifier, identifier_actual = self._get_title(cleaned_line) if line_title: if prev_line_title and line_status == line_statuses[-2]: @@ -423,7 +434,9 @@ class TodoParser(object): + comment_block['start'] + line_number_within_comment_block), start_line_within_hunk=comment_block['start'] + line_number_within_comment_block + 1, num_lines=1, - prefix=(' '*pre_marker_length)+(marker['pattern'] if marker['type'] == 'line' else '')+(' '*post_marker_length), + prefix=self._tabs_and_spaces(num_pre_marker_tabs, (pre_marker_length-num_pre_marker_tabs)) + + str(marker['pattern'] if marker['type'] == 'line' else '') + + self._tabs_and_spaces(num_post_marker_tabs, post_marker_length-num_post_marker_tabs), markdown_language=hunk_info['markdown_language'], status=line_status, identifier=identifier, @@ -534,6 +547,7 @@ class TodoParser(object): def _clean_line(comment, marker): """Remove unwanted symbols and whitespace.""" post_marker_length = 0 + num_post_marker_tabs = 0 if marker['type'] == 'block': original_comment = comment comment = comment.strip() @@ -546,15 +560,19 @@ class TodoParser(object): comment = comment.lstrip('*') comment = comment.strip() pre_marker_length = original_comment.find(comment) + num_pre_marker_tabs = comment.count('\t', 0, pre_marker_length) else: comment_segments = re.search(fr'^(.*?)({marker["pattern"]})(\s*)(.*?)\s*$', comment) if comment_segments: pre_marker_text, _, post_marker_whitespace, comment = comment_segments.groups() pre_marker_length = len(pre_marker_text) + num_pre_marker_tabs = pre_marker_text.count('\t', 0, pre_marker_length) post_marker_length = len(post_marker_whitespace) + num_post_marker_tabs = post_marker_whitespace.count('\t', 0, post_marker_length) else: pre_marker_length = 0 - return comment, pre_marker_length, post_marker_length + num_pre_marker_tabs = 0 + return comment, pre_marker_length, num_pre_marker_tabs, post_marker_length, num_post_marker_tabs def _get_title(self, comment): """Check the passed comment for a new issue title (and reference, if specified)."""