From 2d98b5c359d3a342c38f364db57ebd89fd7c9e5b Mon Sep 17 00:00:00 2001 From: Robert Alonso <17463757+rgalonso@users.noreply.github.com> Date: Tue, 12 Nov 2024 16:07:05 +0000 Subject: [PATCH] fix: handle identifier with regex characters Provides a test for and the solution to GitHub issue #242. Namely, an identifier which contains regex characters (e.g. "[TODO]") is properly handled by having the parser look for literal "[" and "]" characters rather than treating those characters as part of a regex pattern. The word boundary regex pattern '\b' does NOT properly handle this, so a slightly different pattern is used to identify the boundary. --- TodoParser.py | 9 +++++---- main.py | 2 +- tests/test_new.diff | 2 +- tests/test_process_diff.py | 2 +- tests/test_todo_parser.py | 25 ++++++++++++++++++++++--- 5 files changed, 30 insertions(+), 10 deletions(-) diff --git a/TodoParser.py b/TodoParser.py index 422adeb..42173d0 100644 --- a/TodoParser.py +++ b/TodoParser.py @@ -563,13 +563,14 @@ class TodoParser(object): title_identifier_actual = None title_identifier = None for identifier in self.identifiers: - title_pattern = re.compile(fr'(^.*?)(\s*?)(\b{re.escape(identifier)}\b)(\(([^)]+)\))?\s*:?\s*(.+)', re.IGNORECASE) + title_pattern = re.compile(fr'(^|(^.*?)(\s*?)\s)({re.escape(identifier)})(\(([^)]+)\))?\s*(:|\s)\s*(.+)', + re.IGNORECASE) title_search = title_pattern.search(comment) if title_search: - title_identifier_actual = title_search.group(3) + title_identifier_actual = title_search.group(4) title_identifier = identifier - ref = title_search.group(5) # may be empty, which is OK - title = title_search.group(6) + ref = title_search.group(6) # may be empty, which is OK + title = title_search.group(8) break return title, ref, title_identifier, title_identifier_actual diff --git a/main.py b/main.py index b9d594f..845d82b 100644 --- a/main.py +++ b/main.py @@ -76,7 +76,7 @@ def process_diff(diff, client=Client(), insert_issue_urls=False, parser=TodoPars if line_number < len(file_lines): # Duplicate the line to retain the comment syntax. old_line = file_lines[line_number] - remove = fr'(?i:{raw_issue.identifier}).*{re.escape(raw_issue.title)}' + remove = fr'(?i:{re.escape(raw_issue.identifier)}).*{re.escape(raw_issue.title)}' insert = f'Issue URL: {client.get_issue_url(new_issue_number)}' new_line = re.sub('^.*'+remove, raw_issue.prefix + insert, old_line) # make sure the above operation worked as intended diff --git a/tests/test_new.diff b/tests/test_new.diff index b63bc45..740a179 100644 --- a/tests/test_new.diff +++ b/tests/test_new.diff @@ -58,7 +58,7 @@ index 0000000..525e25d + # TODO: Come up with a more imaginative greeting + print('Hello world') + -+ # TODO: Do more stuff ++ # [TODO]: Do more stuff + # This function should probably do something more interesting + # labels: help wanted + diff --git a/tests/test_process_diff.py b/tests/test_process_diff.py index 32556c2..18da372 100644 --- a/tests/test_process_diff.py +++ b/tests/test_process_diff.py @@ -78,7 +78,7 @@ class IssueUrlInsertionTest(unittest.TestCase): "Skipping because 'SKIP_PROCESS_DIFF_TEST' is 'true'") def test_url_insertion(self): self._setUp(['test_new.diff']) - self._standardTest(80) + self._standardTest(79) def test_line_numbering_with_deletions(self): self._setUp(['test_new_py.diff', 'test_edit_py.diff']) diff --git a/tests/test_todo_parser.py b/tests/test_todo_parser.py index c8fd393..45b577c 100644 --- a/tests/test_todo_parser.py +++ b/tests/test_todo_parser.py @@ -45,7 +45,7 @@ class NewIssueTest(unittest.TestCase): def test_python_issues(self): # Includes 4 tests for Starlark. - self.assertEqual(count_issues_for_file_type(self.raw_issues, 'python'), 8) + self.assertEqual(count_issues_for_file_type(self.raw_issues, 'python'), 7) def test_yaml_issues(self): self.assertEqual(count_issues_for_file_type(self.raw_issues, 'yaml'), 2) @@ -80,7 +80,7 @@ class NewIssueTest(unittest.TestCase): self.assertEqual(count_issues_for_file_type(self.raw_issues, 'julia'), 2) def test_starlark_issues(self): - self.assertEqual(count_issues_for_file_type(self.raw_issues, 'python'), 8) + self.assertEqual(count_issues_for_file_type(self.raw_issues, 'python'), 7) def test_autohotkey_issues(self): self.assertEqual(count_issues_for_file_type(self.raw_issues, 'autohotkey'), 1) @@ -133,7 +133,9 @@ class CustomOptionsTest(unittest.TestCase): def setUp(self): parser = TodoParser(options={"identifiers": [{"name": "FIX", "labels": []}, - {"name": "TODO", "labels": []}]}) + {"name": "[TODO]", "labels": []}, + {"name": "TODO", "labels": []} + ]}) self.raw_issues = [] with open('syntax.json', 'r') as syntax_json: parser.syntax_dict = json.load(syntax_json) @@ -157,6 +159,23 @@ class CustomOptionsTest(unittest.TestCase): self.assertEqual(len(matching_issues), 0, msg=print_unexpected_issues(matching_issues)) + # See GitHub issue #242 + def test_regex_identifier_chars(self): + """ + Verify that the presence of regex characters in the identifier + doesn't confuse the parser + + An identifier such as "[TODO]" should be matched literally, not treating + the "[" and "]" characters as part of a regular expression pattern. + """ + matching_issues = get_issues_for_fields(self.raw_issues, + { + "file_name": "example_file.py", + "identifier": "[TODO]" + }) + self.assertEqual(len(matching_issues), 1, + msg=print_unexpected_issues(matching_issues)) + # See GitHub issue #235 @unittest.expectedFailure def test_multiple_identifiers(self):