From e42bca636e25e9c6db7e5d543831840f7a369d87 Mon Sep 17 00:00:00 2001 From: Robert Alonso <17463757+rgalonso@users.noreply.github.com> Date: Mon, 11 Nov 2024 17:52:38 +0000 Subject: [PATCH 1/9] refactor: add optional argument to TodoParser to set configuration Currently, this is just used to set identifiers without needing to modify the environment, but this could (should?) be extended to other options --- TodoParser.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/TodoParser.py b/TodoParser.py index 6101704..af4dc05 100644 --- a/TodoParser.py +++ b/TodoParser.py @@ -24,22 +24,30 @@ class TodoParser(object): ISSUE_URL_PATTERN = re.compile(r'(?<=Issue URL:\s).+', re.IGNORECASE) ISSUE_NUMBER_PATTERN = re.compile(r'/issues/(\d+)', re.IGNORECASE) - def __init__(self): + def __init__(self, options=dict()): # Determine if the issues should be escaped. self.should_escape = os.getenv('INPUT_ESCAPE', 'true') == 'true' - # Load any custom identifiers, otherwise use the default. + # Load any custom identifiers specified by the environment, + # falling back to any specified by the constructor argument, + # otherwise using the default. custom_identifiers = os.getenv('INPUT_IDENTIFIERS') self.identifiers = ['TODO'] self.identifiers_dict = None if custom_identifiers: try: custom_identifiers_dict = json.loads(custom_identifiers) + except json.JSONDecodeError: + print('Invalid identifiers dict, ignoring.') + else: + custom_identifiers_dict = options.get("identifiers", None) + if custom_identifiers_dict: + try: for identifier_dict in custom_identifiers_dict: if type(identifier_dict['name']) is not str or type(identifier_dict['labels']) is not list: raise TypeError self.identifiers = [identifier['name'] for identifier in custom_identifiers_dict] self.identifiers_dict = custom_identifiers_dict - except (json.JSONDecodeError, KeyError, TypeError): + except (KeyError, TypeError): print('Invalid identifiers dict, ignoring.') self.languages_dict = None From dd15f79ab860c08889d37f3a23572601ba06b8f2 Mon Sep 17 00:00:00 2001 From: Robert Alonso <17463757+rgalonso@users.noreply.github.com> Date: Mon, 11 Nov 2024 17:53:48 +0000 Subject: [PATCH 2/9] feat: add ability to print Issue object Facilitates debug and error handling --- Issue.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Issue.py b/Issue.py index 8a79d37..b7c8e6e 100644 --- a/Issue.py +++ b/Issue.py @@ -19,3 +19,9 @@ class Issue(object): self.issue_url = issue_url self.issue_number = issue_number + def __str__(self): + selflist = [] + for key in [x for x in vars(self).keys() if x not in ("hunk")]: + selflist.append(f'"{key}": "{getattr(self, key)}"') + selflist.append((f'"hunk": "{self.hunk}"')) + return '\n'.join(selflist) \ No newline at end of file From 872a997803f9dba03786bb744e33e7ca47a907df Mon Sep 17 00:00:00 2001 From: Robert Alonso <17463757+rgalonso@users.noreply.github.com> Date: Mon, 11 Nov 2024 18:16:28 +0000 Subject: [PATCH 3/9] test: add tests to capture additional known issues These tests capture the existing issues #234 and issue #235. As no solution is in place yet, they're marked as expected failures. --- tests/test_new.diff | 15 ++++++- tests/test_todo_parser.py | 82 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+), 1 deletion(-) diff --git a/tests/test_new.diff b/tests/test_new.diff index 88f647f..b63bc45 100644 --- a/tests/test_new.diff +++ b/tests/test_new.diff @@ -53,7 +53,7 @@ new file mode 100644 index 0000000..525e25d --- /dev/null +++ b/example_file.py -@@ -0,0 +1,23 @@ +@@ -0,0 +1,36 @@ +def hello_world(): + # TODO: Come up with a more imaginative greeting + print('Hello world') @@ -77,6 +77,19 @@ index 0000000..525e25d + kept together as one. + ''' + pass ++ ++""" ++The docstring documentation of MyClass ++ ++Using a docstring in order to generate autodocumentation ++""" ++def SuffixAdder: ++ def __init__(self, base_str): ++ self.base_str=base_str ++ ++ def add_suffix(self, suffix): ++ # add the suffix after the base string ++ return self.base_str + suffix \ No newline at end of file diff --git a/example.hs b/example.hs new file mode 100644 diff --git a/tests/test_todo_parser.py b/tests/test_todo_parser.py index 34b94c1..ad14e96 100644 --- a/tests/test_todo_parser.py +++ b/tests/test_todo_parser.py @@ -12,6 +12,22 @@ def count_issues_for_file_type(raw_issues, file_type): num_issues += 1 return num_issues +def get_issues_for_fields(raw_issues, fields): + matching_issues = [] + for issue in raw_issues: + for key in fields.keys(): + if getattr(issue, key) != fields.get(key): + break + else: + matching_issues.append(issue) + return matching_issues + +def print_unexpected_issues(unexpected_issues): + return '\n'.join([ + '', + 'Unexpected issues:', + '\n=========================\n'.join(map(str, unexpected_issues))]) + class NewIssueTest(unittest.TestCase): # Check for newly added TODOs across the files specified. @@ -112,6 +128,72 @@ class NewIssueTest(unittest.TestCase): def test_lua_issues(self): self.assertEqual(count_issues_for_file_type(self.raw_issues, 'lua'), 2) + +class CustomOptionsTest(unittest.TestCase): + def setUp(self): + parser = TodoParser(options={"identifiers": + [{"name": "FIX", "labels": []}, + {"name": "TODO", "labels": []}]}) + self.raw_issues = [] + with open('syntax.json', 'r') as syntax_json: + parser.syntax_dict = json.load(syntax_json) + with open('tests/test_new.diff', 'r') as diff_file: + self.raw_issues.extend(parser.parse(diff_file)) + + # See GitHub issue #234 + @unittest.expectedFailure + def test_exact_identifier_match(self): + """ + Verify that issues are only created when there's an exact identifier match + + Other than case-insensitivity, an issue should only be matched if the + identifier is exactly within the list of identifiers. For instances, if + "FIX" is an identifier, it should NOT accidentaly match comments with + the words "suffix" or "prefix". + """ + matching_issues = get_issues_for_fields(self.raw_issues, + { + "file_name": "example_file.py", + "identifier": "FIX" + }) + self.assertEqual(len(matching_issues), 0, + msg=print_unexpected_issues(matching_issues)) + + # See GitHub issue #235 + @unittest.expectedFailure + def test_multiple_identifiers(self): + """ + Verify that issues by matching the first identifier on the line + + Issues should be identified such that the priority is where the identifier + is found within the comment line, which is not necessarily the order they're + specified in the identifier dictionary. For instance, if the dictionary is + [{"name": "FIX", "labels": []}, + {"name": "TODO", "labels": []}]}) + then a comment line such as + # TODO: Fix this + should match because of the "TODO", not because of the "Fix". This is not + a trivial difference. If it matches for the "TODO", then the title will be + "Fix this", but if it matches for the "Fix", then the title will erroneously + be just "this". + """ + matching_issues = get_issues_for_fields(self.raw_issues, + { + "file_name": "init.lua", + "identifier": "FIX" + }) + self.assertEqual(len(matching_issues), 0, + msg=print_unexpected_issues(matching_issues)) + + matching_issues = get_issues_for_fields(self.raw_issues, + { + "file_name": "init.lua", + "identifier": "TODO" + }) + self.assertEqual(len(matching_issues), 2, + msg=print_unexpected_issues(matching_issues)) + + class ClosedIssueTest(unittest.TestCase): # Check for removed TODOs across the files specified. def setUp(self): From 69e2360329a6628319876691424f18c4d89a840d Mon Sep 17 00:00:00 2001 From: Robert Alonso <17463757+rgalonso@users.noreply.github.com> Date: Mon, 11 Nov 2024 20:45:26 +0000 Subject: [PATCH 4/9] test: allow multiple diff files to be consumed This sets up the ability to have one diff file create a simulated filesystem and a second to simulate an edit of an existing file --- tests/test_process_diff.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/tests/test_process_diff.py b/tests/test_process_diff.py index 576e8c1..fe32b61 100644 --- a/tests/test_process_diff.py +++ b/tests/test_process_diff.py @@ -16,21 +16,22 @@ class IssueUrlInsertionTest(unittest.TestCase): diff_file = None parser = None - def _setUp(self, diff_file): + def _setUp(self, diff_files): # get current working directory self.orig_cwd = os.getcwd() # Create temporary directory to hold simulated filesystem. self.tempdir = tempfile.TemporaryDirectory() - # run patch against the diff file to generate the simulated filesystem - subprocess.run(['patch', '-d', self.tempdir.name, - '-i', f'{os.getcwd()}/tests/{diff_file}'], - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - check=True) + for diff_file in diff_files: + # run patch against the diff file to generate/update the simulated filesystem + subprocess.run(['patch', '-d', self.tempdir.name, + '-i', f'{os.getcwd()}/tests/{diff_file}'], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=True) - self.diff_file = open(f'tests/{diff_file}', 'r') + self.diff_file = open(f'tests/{diff_files[-1]}', 'r') self.parser = TodoParser() with open('syntax.json', 'r') as syntax_json: self.parser.syntax_dict = json.load(syntax_json) @@ -54,7 +55,7 @@ class IssueUrlInsertionTest(unittest.TestCase): @unittest.skipIf(os.getenv('SKIP_PROCESS_DIFF_TEST', 'false') == 'true', "Skipping because 'SKIP_PROCESS_DIFF_TEST' is 'true'") def test_url_insertion(self): - self._setUp('test_new.diff') + self._setUp(['test_new.diff']) self._standardTest(80) # There is a known bug related to this issue, so until it's resolved @@ -62,7 +63,7 @@ class IssueUrlInsertionTest(unittest.TestCase): # See #225 and #224 @unittest.expectedFailure def test_same_title_in_same_file(self): - self._setUp('test_same_title_in_same_file.diff') + self._setUp(['test_same_title_in_same_file.diff']) self._standardTest(5) # There is a known bug related to this issue, so until it's resolved @@ -70,7 +71,7 @@ class IssueUrlInsertionTest(unittest.TestCase): # See #229 @unittest.expectedFailure def test_comment_suffix_after_source_line(self): - self._setUp('test_comment_suffix_after_source_line.diff') + self._setUp(['test_comment_suffix_after_source_line.diff']) self._standardTest(1) # get details about the issue and source file issue = self.raw_issues[0] From c643aecf02754c493ce8807eb1415f1ac1a8e459 Mon Sep 17 00:00:00 2001 From: Robert Alonso <17463757+rgalonso@users.noreply.github.com> Date: Mon, 11 Nov 2024 20:49:19 +0000 Subject: [PATCH 5/9] refactor: optionally output log if _standardTest fails Defaults True for backwards compatibility --- tests/test_process_diff.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test_process_diff.py b/tests/test_process_diff.py index fe32b61..fb02725 100644 --- a/tests/test_process_diff.py +++ b/tests/test_process_diff.py @@ -39,7 +39,7 @@ class IssueUrlInsertionTest(unittest.TestCase): # change to the simulated filesystem directory os.chdir(self.tempdir.name) - def _standardTest(self, expected_count): + def _standardTest(self, expected_count, output_log_on_failure=True): # create object to hold output output = io.StringIO() # process the diffs @@ -47,7 +47,9 @@ class IssueUrlInsertionTest(unittest.TestCase): # make sure the number of issue URL comments inserted is as expected self.assertEqual(output.getvalue().count('Issue URL successfully inserted'), expected_count, - msg='\nProcessing log\n--------------\n'+output.getvalue()) + msg=( + '\nProcessing log\n--------------\n'+output.getvalue() + if output_log_on_failure else None)) # this test can take a while and, as far as TodoParser is concerned, # redundant with the tests of test_todo_parser, so enable the means From f0ed571547c22e222014e593e7fe679d1522dbc3 Mon Sep 17 00:00:00 2001 From: Robert Alonso <17463757+rgalonso@users.noreply.github.com> Date: Mon, 11 Nov 2024 20:55:14 +0000 Subject: [PATCH 6/9] test: add test to track additional bugs Several related bugs that stem from a diff that contains both deletions and additions. Specifically, the line numbers aren't counted correctly, leading to - issue URL can't be inserted because it can't find the right line in the latest file - generated issue references the wrong line number - closed issue references the wrong line number See GitHub issue #236 The last item might not have any actual impact as (I think) it's just informational. But it'd still be better if it reported the correct line number of the deletion, which necessarily has to be relative to the _old_ file's line number, not the updated file's. As there is no solution in place yet for these bugs, the unittest is marked as an expected failure --- tests/test_edit_py.diff | 15 ++++++++++++ tests/test_new_py.diff | 10 ++++++++ tests/test_process_diff.py | 48 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+) create mode 100644 tests/test_edit_py.diff create mode 100644 tests/test_new_py.diff diff --git a/tests/test_edit_py.diff b/tests/test_edit_py.diff new file mode 100644 index 0000000..e6e2c0b --- /dev/null +++ b/tests/test_edit_py.diff @@ -0,0 +1,15 @@ +diff --git a/example_file.py b/example_file.py +index 6b0c6cf..b37e70a 100644 +@@ -1,3 +1,9 @@ ++def imaginative_string(): ++ return 'figment of my imagination' ++ + def hello_world(): +- # TODO: Come up with a more imaginative greeting +- print('Hello world') +\ No newline at end of file ++ print(f'Hello {imaginative_string()}') ++ ++ # TODO: Do more stuff ++ # This function should probably do something more interesting ++ # labels: help wanted diff --git a/tests/test_new_py.diff b/tests/test_new_py.diff new file mode 100644 index 0000000..91a74be --- /dev/null +++ b/tests/test_new_py.diff @@ -0,0 +1,10 @@ +diff --git a/example_file.py b/example_file.py +new file mode 100644 +index 0000000..525e25d +--- /dev/null ++++ b/example_file.py +@@ -0,0 +1,3 @@ ++def hello_world(): ++ # TODO: Come up with a more imaginative greeting ++ print('Hello world') +\ No newline at end of file diff --git a/tests/test_process_diff.py b/tests/test_process_diff.py index fb02725..4a71955 100644 --- a/tests/test_process_diff.py +++ b/tests/test_process_diff.py @@ -11,12 +11,17 @@ from main import process_diff class IssueUrlInsertionTest(unittest.TestCase): + _original_addSubTest = None + num_subtest_failures = 0 orig_cwd = None tempdir = None diff_file = None parser = None def _setUp(self, diff_files): + # reset counter + self.num_subtest_failures = 0 + # get current working directory self.orig_cwd = os.getcwd() @@ -44,6 +49,8 @@ class IssueUrlInsertionTest(unittest.TestCase): output = io.StringIO() # process the diffs self.raw_issues = process_diff(diff=self.diff_file, insert_issue_urls=True, parser=self.parser, output=output) + # store the log for later processing + self.output_log = output.getvalue() # make sure the number of issue URL comments inserted is as expected self.assertEqual(output.getvalue().count('Issue URL successfully inserted'), expected_count, @@ -51,6 +58,19 @@ class IssueUrlInsertionTest(unittest.TestCase): '\nProcessing log\n--------------\n'+output.getvalue() if output_log_on_failure else None)) + def _addSubTest(self, test, subtest, outcome): + if outcome: + self.num_subtest_failures+=1 + if self._original_addSubTest: + self._original_addSubTest(test, subtest, outcome) + + def run(self, result=None): + if result and getattr(result, "addSubTest", None): + self._original_addSubTest = result.addSubTest + result.addSubTest = self._addSubTest + + super().run(result) + # this test can take a while and, as far as TodoParser is concerned, # redundant with the tests of test_todo_parser, so enable the means # to skip it if desired @@ -60,6 +80,34 @@ class IssueUrlInsertionTest(unittest.TestCase): self._setUp(['test_new.diff']) self._standardTest(80) + # See GitHub issue #236 + @unittest.expectedFailure + def test_line_numbering_with_deletions(self): + self._setUp(['test_new_py.diff', 'test_edit_py.diff']) + with self.subTest("Issue URL insertion"): + # was issue URL successfully inserted? + self._standardTest(1, False) + with self.subTest("Issue insertion line numbering"): + # make sure the log reports having inserted the issue based on the + # correct line numbering of the updated file + self.assertIn("Processing issue 1 of 2: 'Do more stuff' @ example_file.py:7", + self.output_log) + with self.subTest("Issue deletion line numbering"): + # make sure the log reports having closed the issue based on the + # correct line numbering of the old (not the updated!) file + self.assertIn("Processing issue 2 of 2: 'Come up with a more imaginative greeting' @ example_file.py:2", + self.output_log) + + if self.num_subtest_failures > 0: + self.fail( + '\n'.join([ + '', + 'One or more subtests have failed', + 'Processing log', + '--------------', + ''])+ + self.output_log) + # There is a known bug related to this issue, so until it's resolved # this is an expected failure. # See #225 and #224 From 57928cd01e15fc55949ffa8b0648aa9e3d1b1e01 Mon Sep 17 00:00:00 2001 From: Robert Alonso <17463757+rgalonso@users.noreply.github.com> Date: Mon, 11 Nov 2024 23:23:27 +0000 Subject: [PATCH 7/9] fix: resolve numerous issue stemming from case-(in)sensitivity matching of TODOs Some parts of the code were using case-insensitive matches when searching for an identifier (i.e. TODO and todo would both be acceptable) whereas other parts of the code would search for a strict case-sensitive match (only TODO, not todo). This inconsistency led to many issues, among them - #216 - #224 - #225 Further, the identifier match wasn't being done using word breaks, meaning an identifier of "FIX" would match (case-insensitively) with "suffix" or "prefix" if those words happened to appear in a comment. (See #234). Issue #230 was also preventing issue labels from being applied properly if the identifier case did not match exactly the canonical version. i.e. "todo" would generate an issue, but the associated labels wouldn't be applied because the exact identifier used wasn't "TODO". This commit resolves all of these issues. --- Issue.py | 4 +- TodoParser.py | 269 ++++++++++++++++++++++++++++++-------------------- 2 files changed, 165 insertions(+), 108 deletions(-) diff --git a/Issue.py b/Issue.py index b7c8e6e..36b50f6 100644 --- a/Issue.py +++ b/Issue.py @@ -2,7 +2,7 @@ class Issue(object): """Basic Issue model for collecting the necessary info to send to GitHub.""" def __init__(self, title, labels, assignees, milestone, body, hunk, file_name, - start_line, num_lines, markdown_language, status, identifier, ref, issue_url, issue_number): + start_line, num_lines, markdown_language, status, identifier, identifier_actual, ref, issue_url, issue_number, start_line_within_hunk=1): self.title = title self.labels = labels self.assignees = assignees @@ -15,9 +15,11 @@ class Issue(object): self.markdown_language = markdown_language self.status = status self.identifier = identifier + self.identifier_actual = identifier_actual self.ref = ref self.issue_url = issue_url self.issue_number = issue_number + self.start_line_within_hunk = start_line_within_hunk def __str__(self): selflist = [] diff --git a/TodoParser.py b/TodoParser.py index af4dc05..801702e 100644 --- a/TodoParser.py +++ b/TodoParser.py @@ -6,6 +6,7 @@ from Issue import Issue import requests import json from urllib.parse import urlparse +import itertools class TodoParser(object): """Parser for extracting information from a given diff file.""" @@ -208,7 +209,13 @@ class TodoParser(object): # Now for each code block, check for comments, then those comments for TODOs. for block in code_blocks: + # convert hunk string into newline-separated list (excluding first element which is always null and not actually first line of hunk) + bl=block['hunk'].split('\n')[1:] + for marker in block['markers']: + # initialize list + contiguous_comments_and_positions = [] + # Check if there are line or block comments. if marker['type'] == 'line': # Add a negative lookup to include the second character from alternative comment patterns. @@ -240,32 +247,95 @@ class TodoParser(object): + (r'(?!(' + '|'.join(suff_escape_list) + r'))' if len(suff_escape_list) > 0 else '') + r'\s*.+$)') - comments = re.finditer(comment_pattern, block['hunk'], re.MULTILINE) - extracted_comments = [] - prev_comment = None - for i, comment in enumerate(comments): - if prev_comment and comment.start() == prev_comment.end() + 1: - extracted_comments[len(extracted_comments) - 1].append(comment) - else: - extracted_comments.append([comment]) - prev_comment = comment - for comment in extracted_comments: - extracted_issues = self._extract_issue_if_exists(comment, marker, block) - if extracted_issues: - issues.extend(extracted_issues) - else: - comment_pattern = (r'(?:[+\-\s]\s*' + marker['pattern']['start'] + r'.*?' - + marker['pattern']['end'] + ')') - comments = re.finditer(comment_pattern, block['hunk'], re.DOTALL) - extracted_comments = [] - for i, comment in enumerate(comments): - if re.search('|'.join(self.identifiers), comment.group(0)): - extracted_comments.append([comment]) - for comment in extracted_comments: - extracted_issues = self._extract_issue_if_exists(comment, marker, block) - if extracted_issues: - issues.extend(extracted_issues) + # create regex object to search for comments + compiled_pattern=re.compile(comment_pattern) + # for each element of list, enumerate it and if value is a regex match, include it in list that is returned, + # where each element of the list is a dictionary that is the start end end lines of the match (relative to + # start of the hunk) and the matching string itself + comments_and_positions = [{'start': i, 'end': i, 'comment': x} for i, x in enumerate(bl) if compiled_pattern.search(x)] + if len(comments_and_positions) > 0: + # create filtered list which consolidates contiguous lines + contiguous_comments_and_positions=[comments_and_positions[0]] + for j, x in enumerate(comments_and_positions[1:]): + if x['start'] == (comments_and_positions[j]['end'] + 1): + contiguous_comments_and_positions[-1]['end']+=1 + contiguous_comments_and_positions[-1]['comment'] += '\n' + x['comment'] + else: + contiguous_comments_and_positions.append(x) + else: + # pattern consists of one group, the first comment block encountered + pattern=(r'([+\-\s]\s*' + marker['pattern']['start'] + r'.*?' + + marker['pattern']['end'] + ')') + # compile above pattern + compiled_pattern = re.compile(pattern, re.DOTALL) + + # search for the pattern within the hunk and + # return a list of iterators to all of the matches + match_iters = compiled_pattern.finditer(block['hunk']) + + # split off into overlapping pairs. i.e. ['A', 'B', C'] => [('A', 'B'), ('B', 'C')] + pairs = itertools.pairwise(match_iters) + + for i, pair in enumerate(pairs): + # get start/end index (within hunk) of previous section + prev_span = pair[0].span() + + # if first iteration, special handling + if i == 0: + # set start line and comment string of first section + contiguous_comments_and_positions.append({ + # -1 to ignore first newline, which isn't actually part of the hunk + 'start': block['hunk'].count('\n', 0, prev_span[0]) - 1, + 'end': 0, + 'comment': pair[0].group(0) + }) + # get number of lines in first section + num_lines_in_first_section = block['hunk'].count('\n', prev_span[0], prev_span[1]) + # set end line of first section relative to its start + contiguous_comments_and_positions[-1]['end'] = contiguous_comments_and_positions[-1]['start'] + num_lines_in_first_section + + # get start/end index (within hunk) of current section + curr_span = pair[1].span() + # determine number of lines between previous end and current start + num_lines_from_prev_section_end_line = block['hunk'].count('\n', prev_span[1], curr_span[0]) + # set start line of current section based on previous end + contiguous_comments_and_positions.append({ + 'start': contiguous_comments_and_positions[-1]['end'] + num_lines_from_prev_section_end_line, + 'end': 0, + 'comment': pair[1].group(0) + }) + # get number of lines in current section + num_lines_in_curr_section = block['hunk'].count('\n', curr_span[0], curr_span[1]) + # set end line of current section relative to its start + contiguous_comments_and_positions[-1]['end'] = contiguous_comments_and_positions[-1]['start'] + num_lines_in_curr_section + + # handle potential corner case where there was only one match + # and therefore it couldn't be paired off + if len(contiguous_comments_and_positions) == 0: + # redo the search, this time returning the + # result directly rather than an iterator + match = compiled_pattern.search(block['hunk']) + + if match: + # get start/end index (within hunk) of this section + span = match.span() + # set start line and comment string of first section + contiguous_comments_and_positions.append({ + # -1 to ignore first newline, which isn't actually part of the hunk + 'start': block['hunk'].count('\n', 0, span[0]) - 1, + 'end': 0, + 'comment': match.group(0) + }) + # get number of lines in first section + num_lines_in_first_section = block['hunk'].count('\n', span[0], span[1]) + # set end line of first section relative to its start + contiguous_comments_and_positions[-1]['end'] = contiguous_comments_and_positions[-1]['start'] + num_lines_in_first_section + + for comment_and_position in contiguous_comments_and_positions: + extracted_issues = self._extract_issue_if_exists(comment_and_position, marker, block) + if extracted_issues: + issues.extend(extracted_issues) for i, issue in enumerate(issues): # Strip some of the diff symbols so it can be included as a code snippet in the issue body. @@ -304,80 +374,72 @@ class TodoParser(object): return syntax_details, ace_mode return None, None - def _extract_issue_if_exists(self, comment, marker, code_block): + def _extract_issue_if_exists(self, comment_block, marker, hunk_info): """Check this comment for TODOs, and if found, build an Issue object.""" curr_issue = None found_issues = [] line_statuses = [] prev_line_title = False - for match in comment: - comment_lines = match.group().split('\n') - for line in comment_lines: - line_status, committed_line = self._get_line_status(line) - line_statuses.append(line_status) - cleaned_line = self._clean_line(committed_line, marker) - line_title, ref, identifier = self._get_title(cleaned_line) - if line_title: - if prev_line_title and line_status == line_statuses[-2]: - # This means that there is a separate one-line TODO directly above this one. - # We need to store the previous one. - curr_issue.status = line_status - found_issues.append(curr_issue) - curr_issue = Issue( - title=line_title, - labels=[], - assignees=[], - milestone=None, - body=[], - hunk=code_block['hunk'], - file_name=code_block['file'], - start_line=code_block['start_line'], - num_lines=1, - markdown_language=code_block['markdown_language'], - status=line_status, - identifier=identifier, - ref=ref, - issue_url=None, - issue_number=None - ) - prev_line_title = True + comment_lines = comment_block['comment'].split('\n') + for line_number_within_comment_block, line in enumerate(comment_lines): + line_status, committed_line = self._get_line_status(line) + line_statuses.append(line_status) + cleaned_line = self._clean_line(committed_line, marker) + line_title, ref, identifier, identifier_actual = self._get_title(cleaned_line) + if line_title: + if prev_line_title and line_status == line_statuses[-2]: + # This means that there is a separate one-line TODO directly above this one. + # We need to store the previous one. + curr_issue.status = line_status + found_issues.append(curr_issue) + curr_issue = Issue( + title=line_title, + labels=[], + assignees=[], + milestone=None, + body=[], + hunk=hunk_info['hunk'], + file_name=hunk_info['file'], + start_line=hunk_info['start_line'] + comment_block['start'] + line_number_within_comment_block, + start_line_within_hunk=comment_block['start'] + line_number_within_comment_block + 1, + num_lines=1, + markdown_language=hunk_info['markdown_language'], + status=line_status, + identifier=identifier, + identifier_actual=identifier_actual, + ref=ref, + issue_url=None, + issue_number=None + ) + prev_line_title = True - # Calculate the file line number that this issue references. - hunk_lines = re.finditer(self.LINE_PATTERN, code_block['hunk'], re.MULTILINE) - start_line = code_block['start_line'] - for i, hunk_line in enumerate(hunk_lines): - if hunk_line.group(0) == line: - curr_issue.start_line = start_line - break - if i != 0 and (hunk_line.group(0).startswith('+') or not hunk_line.group(0).startswith('-')): - start_line += 1 + elif curr_issue: + # Extract other issue information that may exist below the title. + line_labels = self._get_labels(cleaned_line) + line_assignees = self._get_assignees(cleaned_line) + line_milestone = self._get_milestone(cleaned_line) + line_url = self._get_issue_url(cleaned_line) + if line_labels: + curr_issue.labels.extend(line_labels) + elif line_assignees: + curr_issue.assignees.extend(line_assignees) + elif line_milestone: + curr_issue.milestone = line_milestone + elif line_url: + curr_issue.issue_url = line_url + issue_number_search = self.ISSUE_NUMBER_PATTERN.search(line_url) + if issue_number_search: + curr_issue.issue_number = issue_number_search.group(1) + elif len(cleaned_line) and line_status != LineStatus.DELETED: + if self.should_escape: + curr_issue.body.append(self._escape_markdown(cleaned_line)) + else: + curr_issue.body.append(cleaned_line) + if not line.startswith('-'): + curr_issue.num_lines += 1 + if not line_title: + prev_line_title = False - elif curr_issue: - # Extract other issue information that may exist below the title. - line_labels = self._get_labels(cleaned_line) - line_assignees = self._get_assignees(cleaned_line) - line_milestone = self._get_milestone(cleaned_line) - line_url = self._get_issue_url(cleaned_line) - if line_labels: - curr_issue.labels.extend(line_labels) - elif line_assignees: - curr_issue.assignees.extend(line_assignees) - elif line_milestone: - curr_issue.milestone = line_milestone - elif line_url: - curr_issue.issue_url = line_url - issue_number_search = self.ISSUE_NUMBER_PATTERN.search(line_url) - if issue_number_search: - curr_issue.issue_number = issue_number_search.group(1) - elif len(cleaned_line) and line_status != LineStatus.DELETED: - if self.should_escape: - curr_issue.body.append(self._escape_markdown(cleaned_line)) - else: - curr_issue.body.append(cleaned_line) - if not line.startswith('-'): - curr_issue.num_lines += 1 - if not line_title: - prev_line_title = False if curr_issue is not None and curr_issue.identifier is not None and self.identifiers_dict is not None: for identifier_dict in self.identifiers_dict: if identifier_dict['name'] == curr_issue.identifier: @@ -468,25 +530,18 @@ class TodoParser(object): """Check the passed comment for a new issue title (and reference, if specified).""" title = None ref = None + title_identifier_actual = None title_identifier = None for identifier in self.identifiers: - title_identifier = identifier - title_pattern = re.compile(fr'(?<={identifier}[\s:]).+', re.IGNORECASE) - title_search = title_pattern.search(comment, re.IGNORECASE) + title_pattern = re.compile(fr'(\b{re.escape(identifier)}\b)(\(([^)]+)\))?\s*:?\s*(.+)', re.IGNORECASE) + title_search = title_pattern.search(comment) if title_search: - title = title_search.group(0).strip(': ') + title_identifier_actual = title_search.group(1) + title_identifier = identifier + ref = title_search.group(3) # may be empty, which is OK + title = title_search.group(4) break - else: - title_ref_pattern = re.compile(fr'(?<={identifier}\().+', re.IGNORECASE) - title_ref_search = title_ref_pattern.search(comment, re.IGNORECASE) - if title_ref_search: - title = title_ref_search.group(0).strip() - ref_search = self.REF_PATTERN.search(title) - if ref_search: - ref = ref_search.group(0) - title = title.replace(ref, '', 1).lstrip(':) ') - break - return title, ref, title_identifier + return title, ref, title_identifier, title_identifier_actual def _get_issue_url(self, comment): """Check the passed comment for a GitHub issue URL.""" From 7ce78256b65d0507ae9e4dcc195ff0b526c85246 Mon Sep 17 00:00:00 2001 From: Robert Alonso <17463757+rgalonso@users.noreply.github.com> Date: Mon, 11 Nov 2024 23:36:20 +0000 Subject: [PATCH 8/9] test: allow test to be an expected success now that bug is fixed Earlier commit resolved issues #216, #224, #225 --- tests/test_process_diff.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/test_process_diff.py b/tests/test_process_diff.py index 4a71955..689d47a 100644 --- a/tests/test_process_diff.py +++ b/tests/test_process_diff.py @@ -108,10 +108,6 @@ class IssueUrlInsertionTest(unittest.TestCase): ''])+ self.output_log) - # There is a known bug related to this issue, so until it's resolved - # this is an expected failure. - # See #225 and #224 - @unittest.expectedFailure def test_same_title_in_same_file(self): self._setUp(['test_same_title_in_same_file.diff']) self._standardTest(5) From 8c28d096ee5f5a297284100395a19ad9b41f5f97 Mon Sep 17 00:00:00 2001 From: Robert Alonso <17463757+rgalonso@users.noreply.github.com> Date: Mon, 11 Nov 2024 23:41:14 +0000 Subject: [PATCH 9/9] test: allow test to be an expected success now that bug is fixed Earlier commit resolved issue #234 --- tests/test_todo_parser.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_todo_parser.py b/tests/test_todo_parser.py index ad14e96..c8fd393 100644 --- a/tests/test_todo_parser.py +++ b/tests/test_todo_parser.py @@ -140,8 +140,6 @@ class CustomOptionsTest(unittest.TestCase): with open('tests/test_new.diff', 'r') as diff_file: self.raw_issues.extend(parser.parse(diff_file)) - # See GitHub issue #234 - @unittest.expectedFailure def test_exact_identifier_match(self): """ Verify that issues are only created when there's an exact identifier match