fix: handle identifier with regex characters

Provides a test for and the solution to GitHub
issue #242. Namely, an identifier which contains
regex characters (e.g. "[TODO]") is properly
handled by having the parser look for literal
"[" and "]" characters rather than treating those
characters as part of a regex pattern. The word
boundary regex pattern '\b' does NOT properly
handle this, so a slightly different pattern is
used to identify the boundary.
This commit is contained in:
Robert Alonso 2024-11-12 16:07:05 +00:00
parent 59c6b5395c
commit 2d98b5c359
5 changed files with 30 additions and 10 deletions

View File

@ -563,13 +563,14 @@ class TodoParser(object):
title_identifier_actual = None title_identifier_actual = None
title_identifier = None title_identifier = None
for identifier in self.identifiers: for identifier in self.identifiers:
title_pattern = re.compile(fr'(^.*?)(\s*?)(\b{re.escape(identifier)}\b)(\(([^)]+)\))?\s*:?\s*(.+)', re.IGNORECASE) title_pattern = re.compile(fr'(^|(^.*?)(\s*?)\s)({re.escape(identifier)})(\(([^)]+)\))?\s*(:|\s)\s*(.+)',
re.IGNORECASE)
title_search = title_pattern.search(comment) title_search = title_pattern.search(comment)
if title_search: if title_search:
title_identifier_actual = title_search.group(3) title_identifier_actual = title_search.group(4)
title_identifier = identifier title_identifier = identifier
ref = title_search.group(5) # may be empty, which is OK ref = title_search.group(6) # may be empty, which is OK
title = title_search.group(6) title = title_search.group(8)
break break
return title, ref, title_identifier, title_identifier_actual return title, ref, title_identifier, title_identifier_actual

View File

@ -76,7 +76,7 @@ def process_diff(diff, client=Client(), insert_issue_urls=False, parser=TodoPars
if line_number < len(file_lines): if line_number < len(file_lines):
# Duplicate the line to retain the comment syntax. # Duplicate the line to retain the comment syntax.
old_line = file_lines[line_number] old_line = file_lines[line_number]
remove = fr'(?i:{raw_issue.identifier}).*{re.escape(raw_issue.title)}' remove = fr'(?i:{re.escape(raw_issue.identifier)}).*{re.escape(raw_issue.title)}'
insert = f'Issue URL: {client.get_issue_url(new_issue_number)}' insert = f'Issue URL: {client.get_issue_url(new_issue_number)}'
new_line = re.sub('^.*'+remove, raw_issue.prefix + insert, old_line) new_line = re.sub('^.*'+remove, raw_issue.prefix + insert, old_line)
# make sure the above operation worked as intended # make sure the above operation worked as intended

View File

@ -58,7 +58,7 @@ index 0000000..525e25d
+ # TODO: Come up with a more imaginative greeting + # TODO: Come up with a more imaginative greeting
+ print('Hello world') + print('Hello world')
+ +
+ # TODO: Do more stuff + # [TODO]: Do more stuff
+ # This function should probably do something more interesting + # This function should probably do something more interesting
+ # labels: help wanted + # labels: help wanted
+ +

View File

@ -78,7 +78,7 @@ class IssueUrlInsertionTest(unittest.TestCase):
"Skipping because 'SKIP_PROCESS_DIFF_TEST' is 'true'") "Skipping because 'SKIP_PROCESS_DIFF_TEST' is 'true'")
def test_url_insertion(self): def test_url_insertion(self):
self._setUp(['test_new.diff']) self._setUp(['test_new.diff'])
self._standardTest(80) self._standardTest(79)
def test_line_numbering_with_deletions(self): def test_line_numbering_with_deletions(self):
self._setUp(['test_new_py.diff', 'test_edit_py.diff']) self._setUp(['test_new_py.diff', 'test_edit_py.diff'])

View File

@ -45,7 +45,7 @@ class NewIssueTest(unittest.TestCase):
def test_python_issues(self): def test_python_issues(self):
# Includes 4 tests for Starlark. # Includes 4 tests for Starlark.
self.assertEqual(count_issues_for_file_type(self.raw_issues, 'python'), 8) self.assertEqual(count_issues_for_file_type(self.raw_issues, 'python'), 7)
def test_yaml_issues(self): def test_yaml_issues(self):
self.assertEqual(count_issues_for_file_type(self.raw_issues, 'yaml'), 2) self.assertEqual(count_issues_for_file_type(self.raw_issues, 'yaml'), 2)
@ -80,7 +80,7 @@ class NewIssueTest(unittest.TestCase):
self.assertEqual(count_issues_for_file_type(self.raw_issues, 'julia'), 2) self.assertEqual(count_issues_for_file_type(self.raw_issues, 'julia'), 2)
def test_starlark_issues(self): def test_starlark_issues(self):
self.assertEqual(count_issues_for_file_type(self.raw_issues, 'python'), 8) self.assertEqual(count_issues_for_file_type(self.raw_issues, 'python'), 7)
def test_autohotkey_issues(self): def test_autohotkey_issues(self):
self.assertEqual(count_issues_for_file_type(self.raw_issues, 'autohotkey'), 1) self.assertEqual(count_issues_for_file_type(self.raw_issues, 'autohotkey'), 1)
@ -133,7 +133,9 @@ class CustomOptionsTest(unittest.TestCase):
def setUp(self): def setUp(self):
parser = TodoParser(options={"identifiers": parser = TodoParser(options={"identifiers":
[{"name": "FIX", "labels": []}, [{"name": "FIX", "labels": []},
{"name": "TODO", "labels": []}]}) {"name": "[TODO]", "labels": []},
{"name": "TODO", "labels": []}
]})
self.raw_issues = [] self.raw_issues = []
with open('syntax.json', 'r') as syntax_json: with open('syntax.json', 'r') as syntax_json:
parser.syntax_dict = json.load(syntax_json) parser.syntax_dict = json.load(syntax_json)
@ -157,6 +159,23 @@ class CustomOptionsTest(unittest.TestCase):
self.assertEqual(len(matching_issues), 0, self.assertEqual(len(matching_issues), 0,
msg=print_unexpected_issues(matching_issues)) msg=print_unexpected_issues(matching_issues))
# See GitHub issue #242
def test_regex_identifier_chars(self):
"""
Verify that the presence of regex characters in the identifier
doesn't confuse the parser
An identifier such as "[TODO]" should be matched literally, not treating
the "[" and "]" characters as part of a regular expression pattern.
"""
matching_issues = get_issues_for_fields(self.raw_issues,
{
"file_name": "example_file.py",
"identifier": "[TODO]"
})
self.assertEqual(len(matching_issues), 1,
msg=print_unexpected_issues(matching_issues))
# See GitHub issue #235 # See GitHub issue #235
@unittest.expectedFailure @unittest.expectedFailure
def test_multiple_identifiers(self): def test_multiple_identifiers(self):