fix: handle identifier with regex characters

Provides a test for and the solution to GitHub
issue #242. Namely, an identifier which contains
regex characters (e.g. "[TODO]") is properly
handled by having the parser look for literal
"[" and "]" characters rather than treating those
characters as part of a regex pattern. The word
boundary regex pattern '\b' does NOT properly
handle this, so a slightly different pattern is
used to identify the boundary.
This commit is contained in:
Robert Alonso
2024-11-12 16:07:05 +00:00
parent 59c6b5395c
commit 2d98b5c359
5 changed files with 30 additions and 10 deletions

View File

@@ -45,7 +45,7 @@ class NewIssueTest(unittest.TestCase):
def test_python_issues(self):
# Includes 4 tests for Starlark.
self.assertEqual(count_issues_for_file_type(self.raw_issues, 'python'), 8)
self.assertEqual(count_issues_for_file_type(self.raw_issues, 'python'), 7)
def test_yaml_issues(self):
self.assertEqual(count_issues_for_file_type(self.raw_issues, 'yaml'), 2)
@@ -80,7 +80,7 @@ class NewIssueTest(unittest.TestCase):
self.assertEqual(count_issues_for_file_type(self.raw_issues, 'julia'), 2)
def test_starlark_issues(self):
self.assertEqual(count_issues_for_file_type(self.raw_issues, 'python'), 8)
self.assertEqual(count_issues_for_file_type(self.raw_issues, 'python'), 7)
def test_autohotkey_issues(self):
self.assertEqual(count_issues_for_file_type(self.raw_issues, 'autohotkey'), 1)
@@ -133,7 +133,9 @@ class CustomOptionsTest(unittest.TestCase):
def setUp(self):
parser = TodoParser(options={"identifiers":
[{"name": "FIX", "labels": []},
{"name": "TODO", "labels": []}]})
{"name": "[TODO]", "labels": []},
{"name": "TODO", "labels": []}
]})
self.raw_issues = []
with open('syntax.json', 'r') as syntax_json:
parser.syntax_dict = json.load(syntax_json)
@@ -157,6 +159,23 @@ class CustomOptionsTest(unittest.TestCase):
self.assertEqual(len(matching_issues), 0,
msg=print_unexpected_issues(matching_issues))
# See GitHub issue #242
def test_regex_identifier_chars(self):
"""
Verify that the presence of regex characters in the identifier
doesn't confuse the parser
An identifier such as "[TODO]" should be matched literally, not treating
the "[" and "]" characters as part of a regular expression pattern.
"""
matching_issues = get_issues_for_fields(self.raw_issues,
{
"file_name": "example_file.py",
"identifier": "[TODO]"
})
self.assertEqual(len(matching_issues), 1,
msg=print_unexpected_issues(matching_issues))
# See GitHub issue #235
@unittest.expectedFailure
def test_multiple_identifiers(self):