From e42bca636e25e9c6db7e5d543831840f7a369d87 Mon Sep 17 00:00:00 2001
From: Robert Alonso <17463757+rgalonso@users.noreply.github.com>
Date: Mon, 11 Nov 2024 17:52:38 +0000
Subject: [PATCH 1/9] refactor: add optional argument to TodoParser to set
 configuration

Currently, this is just used to set identifiers
without needing to modify the environment, but this
could (should?) be extended to other options
---
 TodoParser.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/TodoParser.py b/TodoParser.py
index 6101704..af4dc05 100644
--- a/TodoParser.py
+++ b/TodoParser.py
@@ -24,22 +24,30 @@ class TodoParser(object):
     ISSUE_URL_PATTERN = re.compile(r'(?<=Issue URL:\s).+', re.IGNORECASE)
     ISSUE_NUMBER_PATTERN = re.compile(r'/issues/(\d+)', re.IGNORECASE)
 
-    def __init__(self):
+    def __init__(self, options=dict()):
         # Determine if the issues should be escaped.
         self.should_escape = os.getenv('INPUT_ESCAPE', 'true') == 'true'
-        # Load any custom identifiers, otherwise use the default.
+        # Load any custom identifiers specified by the environment,
+        # falling back to any specified by the constructor argument,
+        # otherwise using the default.
         custom_identifiers = os.getenv('INPUT_IDENTIFIERS')
         self.identifiers = ['TODO']
         self.identifiers_dict = None
         if custom_identifiers:
             try:
                 custom_identifiers_dict = json.loads(custom_identifiers)
+            except json.JSONDecodeError:
+                print('Invalid identifiers dict, ignoring.')
+        else:
+            custom_identifiers_dict = options.get("identifiers", None)
+        if custom_identifiers_dict:
+            try:
                 for identifier_dict in custom_identifiers_dict:
                     if type(identifier_dict['name']) is not str or type(identifier_dict['labels']) is not list:
                         raise TypeError
                 self.identifiers = [identifier['name'] for identifier in custom_identifiers_dict]
                 self.identifiers_dict = custom_identifiers_dict
-            except (json.JSONDecodeError, KeyError, TypeError):
+            except (KeyError, TypeError):
                 print('Invalid identifiers dict, ignoring.')
 
         self.languages_dict = None

From dd15f79ab860c08889d37f3a23572601ba06b8f2 Mon Sep 17 00:00:00 2001
From: Robert Alonso <17463757+rgalonso@users.noreply.github.com>
Date: Mon, 11 Nov 2024 17:53:48 +0000
Subject: [PATCH 2/9] feat: add ability to print Issue object

Facilitates debug and error handling
---
 Issue.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Issue.py b/Issue.py
index 8a79d37..b7c8e6e 100644
--- a/Issue.py
+++ b/Issue.py
@@ -19,3 +19,9 @@ class Issue(object):
         self.issue_url = issue_url
         self.issue_number = issue_number
 
+    def __str__(self):
+        selflist = []
+        for key in [x for x in vars(self).keys() if x not in ("hunk")]:
+            selflist.append(f'"{key}": "{getattr(self, key)}"')
+        selflist.append((f'"hunk": "{self.hunk}"'))
+        return '\n'.join(selflist)
\ No newline at end of file

From 872a997803f9dba03786bb744e33e7ca47a907df Mon Sep 17 00:00:00 2001
From: Robert Alonso <17463757+rgalonso@users.noreply.github.com>
Date: Mon, 11 Nov 2024 18:16:28 +0000
Subject: [PATCH 3/9] test: add tests to capture additional known issues

These tests capture the existing issues #234 and
issue #235. As no solution is in place yet, they're
marked as expected failures.
---
 tests/test_new.diff       | 15 ++++++-
 tests/test_todo_parser.py | 82 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 96 insertions(+), 1 deletion(-)

diff --git a/tests/test_new.diff b/tests/test_new.diff
index 88f647f..b63bc45 100644
--- a/tests/test_new.diff
+++ b/tests/test_new.diff
@@ -53,7 +53,7 @@ new file mode 100644
 index 0000000..525e25d
 --- /dev/null
 +++ b/example_file.py
-@@ -0,0 +1,23 @@
+@@ -0,0 +1,36 @@
 +def hello_world():
 +    # TODO: Come up with a more imaginative greeting
 +    print('Hello world')
@@ -77,6 +77,19 @@ index 0000000..525e25d
 +    kept together as one.
 +    '''
 +    pass
++
++"""
++The docstring documentation of MyClass
++
++Using a docstring in order to generate autodocumentation
++"""
++def SuffixAdder:
++  def __init__(self, base_str):
++    self.base_str=base_str
++
++  def add_suffix(self, suffix):
++    # add the suffix after the base string
++    return self.base_str + suffix
 \ No newline at end of file
 diff --git a/example.hs b/example.hs
 new file mode 100644
diff --git a/tests/test_todo_parser.py b/tests/test_todo_parser.py
index 34b94c1..ad14e96 100644
--- a/tests/test_todo_parser.py
+++ b/tests/test_todo_parser.py
@@ -12,6 +12,22 @@ def count_issues_for_file_type(raw_issues, file_type):
             num_issues += 1
     return num_issues
 
+def get_issues_for_fields(raw_issues, fields):
+    matching_issues = []
+    for issue in raw_issues:
+        for key in fields.keys():
+            if getattr(issue, key) != fields.get(key):
+                break
+        else:
+            matching_issues.append(issue)
+    return matching_issues
+
+def print_unexpected_issues(unexpected_issues):
+    return '\n'.join([
+           '',
+           'Unexpected issues:',
+           '\n=========================\n'.join(map(str, unexpected_issues))])
+
 
 class NewIssueTest(unittest.TestCase):
     # Check for newly added TODOs across the files specified.
@@ -112,6 +128,72 @@ class NewIssueTest(unittest.TestCase):
     def test_lua_issues(self):
         self.assertEqual(count_issues_for_file_type(self.raw_issues, 'lua'), 2)
 
+
+class CustomOptionsTest(unittest.TestCase):
+    def setUp(self):
+        parser = TodoParser(options={"identifiers":
+                                     [{"name": "FIX", "labels": []},
+                                      {"name": "TODO", "labels": []}]})
+        self.raw_issues = []
+        with open('syntax.json', 'r') as syntax_json:
+            parser.syntax_dict = json.load(syntax_json)
+        with open('tests/test_new.diff', 'r') as diff_file:
+            self.raw_issues.extend(parser.parse(diff_file))
+
+    # See GitHub issue #234
+    @unittest.expectedFailure
+    def test_exact_identifier_match(self):
+        """
+        Verify that issues are only created when there's an exact identifier match
+
+        Other than case-insensitivity, an issue should only be matched if the
+        identifier is exactly within the list of identifiers. For instances, if
+        "FIX" is an identifier, it should NOT accidentaly match comments with
+        the words "suffix" or "prefix".
+        """
+        matching_issues = get_issues_for_fields(self.raw_issues,
+                                                {
+                                                    "file_name": "example_file.py",
+                                                    "identifier": "FIX"
+                                                })
+        self.assertEqual(len(matching_issues), 0,
+                         msg=print_unexpected_issues(matching_issues))
+
+    # See GitHub issue #235
+    @unittest.expectedFailure
+    def test_multiple_identifiers(self):
+        """
+        Verify that issues by matching the first identifier on the line
+
+        Issues should be identified such that the priority is where the identifier
+        is found within the comment line, which is not necessarily the order they're
+        specified in the identifier dictionary. For instance, if the dictionary is
+            [{"name": "FIX", "labels": []},
+             {"name": "TODO", "labels": []}]})
+        then a comment line such as
+            # TODO: Fix this
+        should match because of the "TODO", not because of the "Fix". This is not
+        a trivial difference. If it matches for the "TODO", then the title will be
+        "Fix this", but if it matches for the "Fix", then the title will erroneously
+        be just "this".
+        """
+        matching_issues = get_issues_for_fields(self.raw_issues,
+                                                {
+                                                    "file_name": "init.lua",
+                                                    "identifier": "FIX"
+                                                })
+        self.assertEqual(len(matching_issues), 0,
+                         msg=print_unexpected_issues(matching_issues))
+
+        matching_issues = get_issues_for_fields(self.raw_issues,
+                                                {
+                                                    "file_name": "init.lua",
+                                                    "identifier": "TODO"
+                                                })
+        self.assertEqual(len(matching_issues), 2,
+                         msg=print_unexpected_issues(matching_issues))
+
+
 class ClosedIssueTest(unittest.TestCase):
     # Check for removed TODOs across the files specified.
     def setUp(self):

From 69e2360329a6628319876691424f18c4d89a840d Mon Sep 17 00:00:00 2001
From: Robert Alonso <17463757+rgalonso@users.noreply.github.com>
Date: Mon, 11 Nov 2024 20:45:26 +0000
Subject: [PATCH 4/9] test: allow multiple diff files to be consumed

This sets up the ability to have one diff file
create a simulated filesystem and a second to
simulate an edit of an existing file
---
 tests/test_process_diff.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/tests/test_process_diff.py b/tests/test_process_diff.py
index 576e8c1..fe32b61 100644
--- a/tests/test_process_diff.py
+++ b/tests/test_process_diff.py
@@ -16,21 +16,22 @@ class IssueUrlInsertionTest(unittest.TestCase):
     diff_file = None
     parser = None
 
-    def _setUp(self, diff_file):
+    def _setUp(self, diff_files):
         # get current working directory
         self.orig_cwd = os.getcwd()
 
         # Create temporary directory to hold simulated filesystem.
         self.tempdir = tempfile.TemporaryDirectory()
 
-        # run patch against the diff file to generate the simulated filesystem
-        subprocess.run(['patch', '-d', self.tempdir.name,
-                        '-i', f'{os.getcwd()}/tests/{diff_file}'],
-                        stdout=subprocess.DEVNULL,
-                        stderr=subprocess.DEVNULL,
-                        check=True)
+        for diff_file in diff_files:
+            # run patch against the diff file to generate/update the simulated filesystem
+            subprocess.run(['patch', '-d', self.tempdir.name,
+                            '-i', f'{os.getcwd()}/tests/{diff_file}'],
+                            stdout=subprocess.DEVNULL,
+                            stderr=subprocess.DEVNULL,
+                            check=True)
 
-        self.diff_file = open(f'tests/{diff_file}', 'r')
+        self.diff_file = open(f'tests/{diff_files[-1]}', 'r')
         self.parser = TodoParser()
         with open('syntax.json', 'r') as syntax_json:
             self.parser.syntax_dict = json.load(syntax_json)
@@ -54,7 +55,7 @@ class IssueUrlInsertionTest(unittest.TestCase):
     @unittest.skipIf(os.getenv('SKIP_PROCESS_DIFF_TEST', 'false') == 'true',
                      "Skipping because 'SKIP_PROCESS_DIFF_TEST' is 'true'")
     def test_url_insertion(self):
-        self._setUp('test_new.diff')
+        self._setUp(['test_new.diff'])
         self._standardTest(80)
 
     # There is a known bug related to this issue, so until it's resolved
@@ -62,7 +63,7 @@ class IssueUrlInsertionTest(unittest.TestCase):
     # See #225 and #224
     @unittest.expectedFailure
     def test_same_title_in_same_file(self):
-        self._setUp('test_same_title_in_same_file.diff')
+        self._setUp(['test_same_title_in_same_file.diff'])
         self._standardTest(5)
 
     # There is a known bug related to this issue, so until it's resolved
@@ -70,7 +71,7 @@ class IssueUrlInsertionTest(unittest.TestCase):
     # See #229
     @unittest.expectedFailure
     def test_comment_suffix_after_source_line(self):
-        self._setUp('test_comment_suffix_after_source_line.diff')
+        self._setUp(['test_comment_suffix_after_source_line.diff'])
         self._standardTest(1)
         # get details about the issue and source file
         issue = self.raw_issues[0]

From c643aecf02754c493ce8807eb1415f1ac1a8e459 Mon Sep 17 00:00:00 2001
From: Robert Alonso <17463757+rgalonso@users.noreply.github.com>
Date: Mon, 11 Nov 2024 20:49:19 +0000
Subject: [PATCH 5/9] refactor: optionally output log if _standardTest fails

Defaults True for backwards compatibility
---
 tests/test_process_diff.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/test_process_diff.py b/tests/test_process_diff.py
index fe32b61..fb02725 100644
--- a/tests/test_process_diff.py
+++ b/tests/test_process_diff.py
@@ -39,7 +39,7 @@ class IssueUrlInsertionTest(unittest.TestCase):
         # change to the simulated filesystem directory
         os.chdir(self.tempdir.name)
 
-    def _standardTest(self, expected_count):
+    def _standardTest(self, expected_count, output_log_on_failure=True):
         # create object to hold output
         output = io.StringIO()
         # process the diffs
@@ -47,7 +47,9 @@ class IssueUrlInsertionTest(unittest.TestCase):
         # make sure the number of issue URL comments inserted is as expected
         self.assertEqual(output.getvalue().count('Issue URL successfully inserted'),
                          expected_count,
-                         msg='\nProcessing log\n--------------\n'+output.getvalue())
+                         msg=(
+                             '\nProcessing log\n--------------\n'+output.getvalue()
+                                if output_log_on_failure else None))
 
     # this test can take a while and, as far as TodoParser is concerned,
     # redundant with the tests of test_todo_parser, so enable the means

From f0ed571547c22e222014e593e7fe679d1522dbc3 Mon Sep 17 00:00:00 2001
From: Robert Alonso <17463757+rgalonso@users.noreply.github.com>
Date: Mon, 11 Nov 2024 20:55:14 +0000
Subject: [PATCH 6/9] test: add test to track additional bugs

Several related bugs that stem from a diff that
contains both deletions and additions. Specifically,
the line numbers aren't counted correctly, leading
to
- issue URL can't be inserted because it can't find
  the right line in the latest file
- generated issue references the wrong line number
- closed issue references the wrong line number

See GitHub issue #236

The last item might not have any actual impact as
(I think) it's just informational. But it'd still
be better if it reported the correct line number
of the deletion, which necessarily has to be
relative to the _old_ file's line number, not the
updated file's.

As there is no solution in place yet for these
bugs, the unittest is marked as an expected failure
---
 tests/test_edit_py.diff    | 15 ++++++++++++
 tests/test_new_py.diff     | 10 ++++++++
 tests/test_process_diff.py | 48 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 73 insertions(+)
 create mode 100644 tests/test_edit_py.diff
 create mode 100644 tests/test_new_py.diff

diff --git a/tests/test_edit_py.diff b/tests/test_edit_py.diff
new file mode 100644
index 0000000..e6e2c0b
--- /dev/null
+++ b/tests/test_edit_py.diff
@@ -0,0 +1,15 @@
+diff --git a/example_file.py b/example_file.py
+index 6b0c6cf..b37e70a 100644
+@@ -1,3 +1,9 @@
++def imaginative_string():
++    return 'figment of my imagination'
++
+ def hello_world():
+-    # TODO: Come up with a more imaginative greeting
+-    print('Hello world')
+\ No newline at end of file
++    print(f'Hello {imaginative_string()}')
++
++    # TODO: Do more stuff
++    #  This function should probably do something more interesting
++    #  labels: help wanted
diff --git a/tests/test_new_py.diff b/tests/test_new_py.diff
new file mode 100644
index 0000000..91a74be
--- /dev/null
+++ b/tests/test_new_py.diff
@@ -0,0 +1,10 @@
+diff --git a/example_file.py b/example_file.py
+new file mode 100644
+index 0000000..525e25d
+--- /dev/null
++++ b/example_file.py
+@@ -0,0 +1,3 @@
++def hello_world():
++    # TODO: Come up with a more imaginative greeting
++    print('Hello world')
+\ No newline at end of file
diff --git a/tests/test_process_diff.py b/tests/test_process_diff.py
index fb02725..4a71955 100644
--- a/tests/test_process_diff.py
+++ b/tests/test_process_diff.py
@@ -11,12 +11,17 @@ from main import process_diff
 
 
 class IssueUrlInsertionTest(unittest.TestCase):
+    _original_addSubTest = None
+    num_subtest_failures = 0
     orig_cwd = None
     tempdir = None
     diff_file = None
     parser = None
 
     def _setUp(self, diff_files):
+        # reset counter
+        self.num_subtest_failures = 0
+
         # get current working directory
         self.orig_cwd = os.getcwd()
 
@@ -44,6 +49,8 @@ class IssueUrlInsertionTest(unittest.TestCase):
         output = io.StringIO()
         # process the diffs
         self.raw_issues = process_diff(diff=self.diff_file, insert_issue_urls=True, parser=self.parser, output=output)
+        # store the log for later processing
+        self.output_log = output.getvalue()
         # make sure the number of issue URL comments inserted is as expected
         self.assertEqual(output.getvalue().count('Issue URL successfully inserted'),
                          expected_count,
@@ -51,6 +58,19 @@ class IssueUrlInsertionTest(unittest.TestCase):
                              '\nProcessing log\n--------------\n'+output.getvalue()
                                 if output_log_on_failure else None))
 
+    def _addSubTest(self, test, subtest, outcome):
+        if outcome:
+            self.num_subtest_failures+=1
+        if self._original_addSubTest:
+            self._original_addSubTest(test, subtest, outcome)
+
+    def run(self, result=None):
+        if result and getattr(result, "addSubTest", None):
+            self._original_addSubTest = result.addSubTest
+            result.addSubTest = self._addSubTest
+
+        super().run(result)
+
     # this test can take a while and, as far as TodoParser is concerned,
     # redundant with the tests of test_todo_parser, so enable the means
     # to skip it if desired
@@ -60,6 +80,34 @@ class IssueUrlInsertionTest(unittest.TestCase):
         self._setUp(['test_new.diff'])
         self._standardTest(80)
 
+    # See GitHub issue #236
+    @unittest.expectedFailure
+    def test_line_numbering_with_deletions(self):
+        self._setUp(['test_new_py.diff', 'test_edit_py.diff'])
+        with self.subTest("Issue URL insertion"):
+            # was issue URL successfully inserted?
+            self._standardTest(1, False)
+        with self.subTest("Issue insertion line numbering"):
+            # make sure the log reports having inserted the issue based on the
+            # correct line numbering of the updated file
+            self.assertIn("Processing issue 1 of 2: 'Do more stuff' @ example_file.py:7",
+                          self.output_log)
+        with self.subTest("Issue deletion line numbering"):
+            # make sure the log reports having closed the issue based on the
+            # correct line numbering of the old (not the updated!) file
+            self.assertIn("Processing issue 2 of 2: 'Come up with a more imaginative greeting' @ example_file.py:2",
+                          self.output_log)
+
+        if self.num_subtest_failures > 0:
+            self.fail(
+                '\n'.join([
+                '',
+                'One or more subtests have failed',
+                'Processing log',
+                '--------------',
+                ''])+
+                self.output_log)
+
     # There is a known bug related to this issue, so until it's resolved
     # this is an expected failure.
     # See #225 and #224

From 57928cd01e15fc55949ffa8b0648aa9e3d1b1e01 Mon Sep 17 00:00:00 2001
From: Robert Alonso <17463757+rgalonso@users.noreply.github.com>
Date: Mon, 11 Nov 2024 23:23:27 +0000
Subject: [PATCH 7/9] fix: resolve numerous issue stemming from
 case-(in)sensitivity matching of TODOs

Some parts of the code were using case-insensitive
matches when searching for an identifier (i.e.
TODO and todo would both be acceptable) whereas
other parts of the code would search for a strict
case-sensitive match (only TODO, not todo). This
inconsistency led to many issues, among them
- #216
- #224
- #225

Further, the identifier match wasn't being done
using word breaks, meaning an identifier of "FIX"
would match (case-insensitively) with "suffix" or
"prefix" if those words happened to appear in a
comment. (See #234).

Issue #230 was also preventing issue labels from
being applied properly if the identifier case
did not match exactly the canonical version. i.e.
"todo" would generate an issue, but the associated
labels wouldn't be applied because the exact
identifier used wasn't "TODO".

This commit resolves all of these issues.
---
 Issue.py      |   4 +-
 TodoParser.py | 269 ++++++++++++++++++++++++++++++--------------------
 2 files changed, 165 insertions(+), 108 deletions(-)

diff --git a/Issue.py b/Issue.py
index b7c8e6e..36b50f6 100644
--- a/Issue.py
+++ b/Issue.py
@@ -2,7 +2,7 @@ class Issue(object):
     """Basic Issue model for collecting the necessary info to send to GitHub."""
 
     def __init__(self, title, labels, assignees, milestone, body, hunk, file_name,
-                 start_line, num_lines, markdown_language, status, identifier, ref, issue_url, issue_number):
+                 start_line, num_lines, markdown_language, status, identifier, identifier_actual, ref, issue_url, issue_number, start_line_within_hunk=1):
         self.title = title
         self.labels = labels
         self.assignees = assignees
@@ -15,9 +15,11 @@ class Issue(object):
         self.markdown_language = markdown_language
         self.status = status
         self.identifier = identifier
+        self.identifier_actual = identifier_actual
         self.ref = ref
         self.issue_url = issue_url
         self.issue_number = issue_number
+        self.start_line_within_hunk = start_line_within_hunk
 
     def __str__(self):
         selflist = []
diff --git a/TodoParser.py b/TodoParser.py
index af4dc05..801702e 100644
--- a/TodoParser.py
+++ b/TodoParser.py
@@ -6,6 +6,7 @@ from Issue import Issue
 import requests
 import json
 from urllib.parse import urlparse
+import itertools
 
 class TodoParser(object):
     """Parser for extracting information from a given diff file."""
@@ -208,7 +209,13 @@ class TodoParser(object):
 
         # Now for each code block, check for comments, then those comments for TODOs.
         for block in code_blocks:
+            # convert hunk string into newline-separated list (excluding first element which is always null and not actually first line of hunk)
+            bl=block['hunk'].split('\n')[1:]
+
             for marker in block['markers']:
+                # initialize list
+                contiguous_comments_and_positions = []
+
                 # Check if there are line or block comments.
                 if marker['type'] == 'line':
                     # Add a negative lookup to include the second character from alternative comment patterns.
@@ -240,32 +247,95 @@ class TodoParser(object):
                                        + (r'(?!(' + '|'.join(suff_escape_list) + r'))' if len(suff_escape_list) > 0
                                           else '')
                                        + r'\s*.+$)')
-                    comments = re.finditer(comment_pattern, block['hunk'], re.MULTILINE)
-                    extracted_comments = []
-                    prev_comment = None
-                    for i, comment in enumerate(comments):
-                        if prev_comment and comment.start() == prev_comment.end() + 1:
-                            extracted_comments[len(extracted_comments) - 1].append(comment)
-                        else:
-                            extracted_comments.append([comment])
-                        prev_comment = comment
-                    for comment in extracted_comments:
-                        extracted_issues = self._extract_issue_if_exists(comment, marker, block)
-                        if extracted_issues:
-                            issues.extend(extracted_issues)
-                else:
-                    comment_pattern = (r'(?:[+\-\s]\s*' + marker['pattern']['start'] + r'.*?'
-                                       + marker['pattern']['end'] + ')')
-                    comments = re.finditer(comment_pattern, block['hunk'], re.DOTALL)
-                    extracted_comments = []
-                    for i, comment in enumerate(comments):
-                        if re.search('|'.join(self.identifiers), comment.group(0)):
-                            extracted_comments.append([comment])
 
-                    for comment in extracted_comments:
-                        extracted_issues = self._extract_issue_if_exists(comment, marker, block)
-                        if extracted_issues:
-                            issues.extend(extracted_issues)
+                    # create regex object to search for comments
+                    compiled_pattern=re.compile(comment_pattern)
+                    # for each element of list, enumerate it and if value is a regex match, include it in list that is returned,
+                    # where each element of the list is a dictionary that is the start end end lines of the match (relative to
+                    # start of the hunk) and the matching string itself
+                    comments_and_positions = [{'start': i, 'end': i, 'comment': x} for i, x in enumerate(bl) if compiled_pattern.search(x)]
+                    if len(comments_and_positions) > 0:
+                        # create filtered list which consolidates contiguous lines
+                        contiguous_comments_and_positions=[comments_and_positions[0]]
+                        for j, x in enumerate(comments_and_positions[1:]):
+                            if x['start'] == (comments_and_positions[j]['end'] + 1):
+                                contiguous_comments_and_positions[-1]['end']+=1
+                                contiguous_comments_and_positions[-1]['comment'] += '\n' + x['comment']
+                            else:
+                                contiguous_comments_and_positions.append(x)
+                else:
+                    # pattern consists of one group, the first comment block encountered
+                    pattern=(r'([+\-\s]\s*' + marker['pattern']['start'] + r'.*?'
+                                       + marker['pattern']['end'] + ')')
+                    # compile above pattern
+                    compiled_pattern = re.compile(pattern, re.DOTALL)
+
+                    # search for the pattern within the hunk and
+                    # return a list of iterators to all of the matches
+                    match_iters = compiled_pattern.finditer(block['hunk'])
+
+                    # split off into overlapping pairs. i.e. ['A', 'B', C'] => [('A', 'B'), ('B', 'C')]
+                    pairs = itertools.pairwise(match_iters)
+
+                    for i, pair in enumerate(pairs):
+                        # get start/end index (within hunk) of previous section
+                        prev_span = pair[0].span()
+
+                        # if first iteration, special handling
+                        if i == 0:
+                            # set start line and comment string of first section
+                            contiguous_comments_and_positions.append({
+                                        # -1 to ignore first newline, which isn't actually part of the hunk
+                                        'start': block['hunk'].count('\n', 0, prev_span[0]) - 1,
+                                        'end': 0,
+                                        'comment': pair[0].group(0)
+                                    })
+                            # get number of lines in first section
+                            num_lines_in_first_section = block['hunk'].count('\n', prev_span[0], prev_span[1])
+                            # set end line of first section relative to its start
+                            contiguous_comments_and_positions[-1]['end'] = contiguous_comments_and_positions[-1]['start'] + num_lines_in_first_section
+
+                        # get start/end index (within hunk) of current section
+                        curr_span = pair[1].span()
+                        # determine number of lines between previous end and current start
+                        num_lines_from_prev_section_end_line = block['hunk'].count('\n', prev_span[1], curr_span[0])
+                        # set start line of current section based on previous end
+                        contiguous_comments_and_positions.append({
+                                    'start': contiguous_comments_and_positions[-1]['end'] + num_lines_from_prev_section_end_line,
+                                    'end': 0,
+                                    'comment': pair[1].group(0)
+                                })
+                        # get number of lines in current section
+                        num_lines_in_curr_section = block['hunk'].count('\n', curr_span[0], curr_span[1])
+                        # set end line of current section relative to its start
+                        contiguous_comments_and_positions[-1]['end'] = contiguous_comments_and_positions[-1]['start'] + num_lines_in_curr_section
+
+                    # handle potential corner case where there was only one match
+                    # and therefore it couldn't be paired off
+                    if len(contiguous_comments_and_positions) == 0:
+                        # redo the search, this time returning the
+                        # result directly rather than an iterator
+                        match = compiled_pattern.search(block['hunk'])
+
+                        if match:
+                            # get start/end index (within hunk) of this section
+                            span = match.span()
+                            # set start line and comment string of first section
+                            contiguous_comments_and_positions.append({
+                                        # -1 to ignore first newline, which isn't actually part of the hunk
+                                        'start': block['hunk'].count('\n', 0, span[0]) - 1,
+                                        'end': 0,
+                                        'comment': match.group(0)
+                                    })
+                            # get number of lines in first section
+                            num_lines_in_first_section = block['hunk'].count('\n', span[0], span[1])
+                            # set end line of first section relative to its start
+                            contiguous_comments_and_positions[-1]['end'] = contiguous_comments_and_positions[-1]['start'] + num_lines_in_first_section
+
+                for comment_and_position in contiguous_comments_and_positions:
+                    extracted_issues = self._extract_issue_if_exists(comment_and_position, marker, block)
+                    if extracted_issues:
+                        issues.extend(extracted_issues)
 
         for i, issue in enumerate(issues):
             # Strip some of the diff symbols so it can be included as a code snippet in the issue body.
@@ -304,80 +374,72 @@ class TodoParser(object):
                     return syntax_details, ace_mode
         return None, None
 
-    def _extract_issue_if_exists(self, comment, marker, code_block):
+    def _extract_issue_if_exists(self, comment_block, marker, hunk_info):
         """Check this comment for TODOs, and if found, build an Issue object."""
         curr_issue = None
         found_issues = []
         line_statuses = []
         prev_line_title = False
-        for match in comment:
-            comment_lines = match.group().split('\n')
-            for line in comment_lines:
-                line_status, committed_line = self._get_line_status(line)
-                line_statuses.append(line_status)
-                cleaned_line = self._clean_line(committed_line, marker)
-                line_title, ref, identifier = self._get_title(cleaned_line)
-                if line_title:
-                    if prev_line_title and line_status == line_statuses[-2]:
-                        # This means that there is a separate one-line TODO directly above this one.
-                        # We need to store the previous one.
-                        curr_issue.status = line_status
-                        found_issues.append(curr_issue)
-                    curr_issue = Issue(
-                        title=line_title,
-                        labels=[],
-                        assignees=[],
-                        milestone=None,
-                        body=[],
-                        hunk=code_block['hunk'],
-                        file_name=code_block['file'],
-                        start_line=code_block['start_line'],
-                        num_lines=1,
-                        markdown_language=code_block['markdown_language'],
-                        status=line_status,
-                        identifier=identifier,
-                        ref=ref,
-                        issue_url=None,
-                        issue_number=None
-                    )
-                    prev_line_title = True
+        comment_lines = comment_block['comment'].split('\n')
+        for line_number_within_comment_block, line in enumerate(comment_lines):
+            line_status, committed_line = self._get_line_status(line)
+            line_statuses.append(line_status)
+            cleaned_line = self._clean_line(committed_line, marker)
+            line_title, ref, identifier, identifier_actual = self._get_title(cleaned_line)
+            if line_title:
+                if prev_line_title and line_status == line_statuses[-2]:
+                    # This means that there is a separate one-line TODO directly above this one.
+                    # We need to store the previous one.
+                    curr_issue.status = line_status
+                    found_issues.append(curr_issue)
+                curr_issue = Issue(
+                    title=line_title,
+                    labels=[],
+                    assignees=[],
+                    milestone=None,
+                    body=[],
+                    hunk=hunk_info['hunk'],
+                    file_name=hunk_info['file'],
+                    start_line=hunk_info['start_line'] + comment_block['start'] + line_number_within_comment_block,
+                    start_line_within_hunk=comment_block['start'] + line_number_within_comment_block + 1,
+                    num_lines=1,
+                    markdown_language=hunk_info['markdown_language'],
+                    status=line_status,
+                    identifier=identifier,
+                    identifier_actual=identifier_actual,
+                    ref=ref,
+                    issue_url=None,
+                    issue_number=None
+                )
+                prev_line_title = True
 
-                    # Calculate the file line number that this issue references.
-                    hunk_lines = re.finditer(self.LINE_PATTERN, code_block['hunk'], re.MULTILINE)
-                    start_line = code_block['start_line']
-                    for i, hunk_line in enumerate(hunk_lines):
-                        if hunk_line.group(0) == line:
-                            curr_issue.start_line = start_line
-                            break
-                        if i != 0 and (hunk_line.group(0).startswith('+') or not hunk_line.group(0).startswith('-')):
-                            start_line += 1
+            elif curr_issue:
+                # Extract other issue information that may exist below the title.
+                line_labels = self._get_labels(cleaned_line)
+                line_assignees = self._get_assignees(cleaned_line)
+                line_milestone = self._get_milestone(cleaned_line)
+                line_url = self._get_issue_url(cleaned_line)
+                if line_labels:
+                    curr_issue.labels.extend(line_labels)
+                elif line_assignees:
+                    curr_issue.assignees.extend(line_assignees)
+                elif line_milestone:
+                    curr_issue.milestone = line_milestone
+                elif line_url:
+                    curr_issue.issue_url = line_url
+                    issue_number_search = self.ISSUE_NUMBER_PATTERN.search(line_url)
+                    if issue_number_search:
+                        curr_issue.issue_number = issue_number_search.group(1)
+                elif len(cleaned_line) and line_status != LineStatus.DELETED:
+                    if self.should_escape:
+                        curr_issue.body.append(self._escape_markdown(cleaned_line))
+                    else:
+                        curr_issue.body.append(cleaned_line)
+                if not line.startswith('-'):
+                    curr_issue.num_lines += 1
+            if not line_title:
+                prev_line_title = False
 
-                elif curr_issue:
-                    # Extract other issue information that may exist below the title.
-                    line_labels = self._get_labels(cleaned_line)
-                    line_assignees = self._get_assignees(cleaned_line)
-                    line_milestone = self._get_milestone(cleaned_line)
-                    line_url = self._get_issue_url(cleaned_line)
-                    if line_labels:
-                        curr_issue.labels.extend(line_labels)
-                    elif line_assignees:
-                        curr_issue.assignees.extend(line_assignees)
-                    elif line_milestone:
-                        curr_issue.milestone = line_milestone
-                    elif line_url:
-                        curr_issue.issue_url = line_url
-                        issue_number_search = self.ISSUE_NUMBER_PATTERN.search(line_url)
-                        if issue_number_search:
-                            curr_issue.issue_number = issue_number_search.group(1)
-                    elif len(cleaned_line) and line_status != LineStatus.DELETED:
-                        if self.should_escape:
-                            curr_issue.body.append(self._escape_markdown(cleaned_line))
-                        else:
-                            curr_issue.body.append(cleaned_line)
-                    if not line.startswith('-'):
-                        curr_issue.num_lines += 1
-                if not line_title:
-                    prev_line_title = False
         if curr_issue is not None and curr_issue.identifier is not None and self.identifiers_dict is not None:
             for identifier_dict in self.identifiers_dict:
                 if identifier_dict['name'] == curr_issue.identifier:
@@ -468,25 +530,18 @@ class TodoParser(object):
         """Check the passed comment for a new issue title (and reference, if specified)."""
         title = None
         ref = None
+        title_identifier_actual = None
         title_identifier = None
         for identifier in self.identifiers:
-            title_identifier = identifier
-            title_pattern = re.compile(fr'(?<={identifier}[\s:]).+', re.IGNORECASE)
-            title_search = title_pattern.search(comment, re.IGNORECASE)
+            title_pattern = re.compile(fr'(\b{re.escape(identifier)}\b)(\(([^)]+)\))?\s*:?\s*(.+)', re.IGNORECASE)
+            title_search = title_pattern.search(comment)
             if title_search:
-                title = title_search.group(0).strip(': ')
+                title_identifier_actual = title_search.group(1)
+                title_identifier = identifier
+                ref = title_search.group(3) # may be empty, which is OK
+                title = title_search.group(4)
                 break
-            else:
-                title_ref_pattern = re.compile(fr'(?<={identifier}\().+', re.IGNORECASE)
-                title_ref_search = title_ref_pattern.search(comment, re.IGNORECASE)
-                if title_ref_search:
-                    title = title_ref_search.group(0).strip()
-                    ref_search = self.REF_PATTERN.search(title)
-                    if ref_search:
-                        ref = ref_search.group(0)
-                        title = title.replace(ref, '', 1).lstrip(':) ')
-                    break
-        return title, ref, title_identifier
+        return title, ref, title_identifier, title_identifier_actual
 
     def _get_issue_url(self, comment):
         """Check the passed comment for a GitHub issue URL."""

From 7ce78256b65d0507ae9e4dcc195ff0b526c85246 Mon Sep 17 00:00:00 2001
From: Robert Alonso <17463757+rgalonso@users.noreply.github.com>
Date: Mon, 11 Nov 2024 23:36:20 +0000
Subject: [PATCH 8/9] test: allow test to be an expected success now that bug
 is fixed

Earlier commit resolved issues #216, #224, #225
---
 tests/test_process_diff.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/test_process_diff.py b/tests/test_process_diff.py
index 4a71955..689d47a 100644
--- a/tests/test_process_diff.py
+++ b/tests/test_process_diff.py
@@ -108,10 +108,6 @@ class IssueUrlInsertionTest(unittest.TestCase):
                 ''])+
                 self.output_log)
 
-    # There is a known bug related to this issue, so until it's resolved
-    # this is an expected failure.
-    # See #225 and #224
-    @unittest.expectedFailure
     def test_same_title_in_same_file(self):
         self._setUp(['test_same_title_in_same_file.diff'])
         self._standardTest(5)

From 8c28d096ee5f5a297284100395a19ad9b41f5f97 Mon Sep 17 00:00:00 2001
From: Robert Alonso <17463757+rgalonso@users.noreply.github.com>
Date: Mon, 11 Nov 2024 23:41:14 +0000
Subject: [PATCH 9/9] test: allow test to be an expected success now that bug
 is fixed

Earlier commit resolved issue #234
---
 tests/test_todo_parser.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/test_todo_parser.py b/tests/test_todo_parser.py
index ad14e96..c8fd393 100644
--- a/tests/test_todo_parser.py
+++ b/tests/test_todo_parser.py
@@ -140,8 +140,6 @@ class CustomOptionsTest(unittest.TestCase):
         with open('tests/test_new.diff', 'r') as diff_file:
             self.raw_issues.extend(parser.parse(diff_file))
 
-    # See GitHub issue #234
-    @unittest.expectedFailure
     def test_exact_identifier_match(self):
         """
         Verify that issues are only created when there's an exact identifier match