From 2d98b5c359d3a342c38f364db57ebd89fd7c9e5b Mon Sep 17 00:00:00 2001
From: Robert Alonso <17463757+rgalonso@users.noreply.github.com>
Date: Tue, 12 Nov 2024 16:07:05 +0000
Subject: [PATCH] fix: handle identifier with regex characters

Provides a test for and the solution to GitHub
issue #242. Namely, an identifier which contains
regex characters (e.g. "[TODO]") is properly
handled by having the parser look for literal
"[" and "]" characters rather than treating those
characters as part of a regex pattern. The word
boundary regex pattern '\b' does NOT properly
handle this, so a slightly different pattern is
used to identify the boundary.
---
 TodoParser.py              |  9 +++++----
 main.py                    |  2 +-
 tests/test_new.diff        |  2 +-
 tests/test_process_diff.py |  2 +-
 tests/test_todo_parser.py  | 25 ++++++++++++++++++++++---
 5 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/TodoParser.py b/TodoParser.py
index 422adeb..42173d0 100644
--- a/TodoParser.py
+++ b/TodoParser.py
@@ -563,13 +563,14 @@ class TodoParser(object):
         title_identifier_actual = None
         title_identifier = None
         for identifier in self.identifiers:
-            title_pattern = re.compile(fr'(^.*?)(\s*?)(\b{re.escape(identifier)}\b)(\(([^)]+)\))?\s*:?\s*(.+)', re.IGNORECASE)
+            title_pattern = re.compile(fr'(^|(^.*?)(\s*?)\s)({re.escape(identifier)})(\(([^)]+)\))?\s*(:|\s)\s*(.+)',
+                                       re.IGNORECASE)
             title_search = title_pattern.search(comment)
             if title_search:
-                title_identifier_actual = title_search.group(3)
+                title_identifier_actual = title_search.group(4)
                 title_identifier = identifier
-                ref = title_search.group(5) # may be empty, which is OK
-                title = title_search.group(6)
+                ref = title_search.group(6) # may be empty, which is OK
+                title = title_search.group(8)
                 break
         return title, ref, title_identifier, title_identifier_actual
 
diff --git a/main.py b/main.py
index b9d594f..845d82b 100644
--- a/main.py
+++ b/main.py
@@ -76,7 +76,7 @@ def process_diff(diff, client=Client(), insert_issue_urls=False, parser=TodoPars
                     if line_number < len(file_lines):
                         # Duplicate the line to retain the comment syntax.
                         old_line = file_lines[line_number]
-                        remove = fr'(?i:{raw_issue.identifier}).*{re.escape(raw_issue.title)}'
+                        remove = fr'(?i:{re.escape(raw_issue.identifier)}).*{re.escape(raw_issue.title)}'
                         insert = f'Issue URL: {client.get_issue_url(new_issue_number)}'
                         new_line = re.sub('^.*'+remove, raw_issue.prefix + insert, old_line)
                         # make sure the above operation worked as intended
diff --git a/tests/test_new.diff b/tests/test_new.diff
index b63bc45..740a179 100644
--- a/tests/test_new.diff
+++ b/tests/test_new.diff
@@ -58,7 +58,7 @@ index 0000000..525e25d
 +    # TODO: Come up with a more imaginative greeting
 +    print('Hello world')
 +
-+    # TODO: Do more stuff
++    # [TODO]: Do more stuff
 +    #  This function should probably do something more interesting
 +    #  labels: help wanted
 +
diff --git a/tests/test_process_diff.py b/tests/test_process_diff.py
index 32556c2..18da372 100644
--- a/tests/test_process_diff.py
+++ b/tests/test_process_diff.py
@@ -78,7 +78,7 @@ class IssueUrlInsertionTest(unittest.TestCase):
                      "Skipping because 'SKIP_PROCESS_DIFF_TEST' is 'true'")
     def test_url_insertion(self):
         self._setUp(['test_new.diff'])
-        self._standardTest(80)
+        self._standardTest(79)
 
     def test_line_numbering_with_deletions(self):
         self._setUp(['test_new_py.diff', 'test_edit_py.diff'])
diff --git a/tests/test_todo_parser.py b/tests/test_todo_parser.py
index c8fd393..45b577c 100644
--- a/tests/test_todo_parser.py
+++ b/tests/test_todo_parser.py
@@ -45,7 +45,7 @@ class NewIssueTest(unittest.TestCase):
 
     def test_python_issues(self):
         # Includes 4 tests for Starlark.
-        self.assertEqual(count_issues_for_file_type(self.raw_issues, 'python'), 8)
+        self.assertEqual(count_issues_for_file_type(self.raw_issues, 'python'), 7)
 
     def test_yaml_issues(self):
         self.assertEqual(count_issues_for_file_type(self.raw_issues, 'yaml'), 2)
@@ -80,7 +80,7 @@ class NewIssueTest(unittest.TestCase):
         self.assertEqual(count_issues_for_file_type(self.raw_issues, 'julia'), 2)
 
     def test_starlark_issues(self):
-        self.assertEqual(count_issues_for_file_type(self.raw_issues, 'python'), 8)
+        self.assertEqual(count_issues_for_file_type(self.raw_issues, 'python'), 7)
 
     def test_autohotkey_issues(self):
         self.assertEqual(count_issues_for_file_type(self.raw_issues, 'autohotkey'), 1)
@@ -133,7 +133,9 @@ class CustomOptionsTest(unittest.TestCase):
     def setUp(self):
         parser = TodoParser(options={"identifiers":
                                      [{"name": "FIX", "labels": []},
-                                      {"name": "TODO", "labels": []}]})
+                                      {"name": "[TODO]", "labels": []},
+                                      {"name": "TODO", "labels": []}
+                                      ]})
         self.raw_issues = []
         with open('syntax.json', 'r') as syntax_json:
             parser.syntax_dict = json.load(syntax_json)
@@ -157,6 +159,23 @@ class CustomOptionsTest(unittest.TestCase):
         self.assertEqual(len(matching_issues), 0,
                          msg=print_unexpected_issues(matching_issues))
 
+    # See GitHub issue #242
+    def test_regex_identifier_chars(self):
+        """
+        Verify that the presence of regex characters in the identifier
+        doesn't confuse the parser
+
+        An identifier such as "[TODO]" should be matched literally, not treating
+        the "[" and "]" characters as part of a regular expression pattern.
+        """
+        matching_issues = get_issues_for_fields(self.raw_issues,
+                                                {
+                                                    "file_name": "example_file.py",
+                                                    "identifier": "[TODO]"
+                                                })
+        self.assertEqual(len(matching_issues), 1,
+                         msg=print_unexpected_issues(matching_issues))
+
     # See GitHub issue #235
     @unittest.expectedFailure
     def test_multiple_identifiers(self):