Improve handling of similar/moved TODOs

Closes #70 Closes #53
2026-01-23 06:16:43 +00:00 · 2021-08-07 09:17:00 +01:00 · 2021-08-07 09:17:00 +01:00 · 6687008177
commit 6687008177
parent 3fd212fda1
2 changed files with 29 additions and 7 deletions
--- a/README.md
+++ b/README.md
@ -37,7 +37,7 @@ Create a `workflow.yml` file in your `.github/workflows` directory like:
        steps:
          - uses: "actions/checkout@master"
          - name: "TODO to Issue"
-            uses: "alstr/todo-to-issue-action@v4.0.9"
+            uses: "alstr/todo-to-issue-action@v4.1"
            id: "todo"
 ```
@ -154,6 +154,8 @@ You can also specify default projects in the same way by defining `USER_PROJECTS
 Make sure your file language is in `syntax.json`. Also, the action will not recognise TODOs in the first commit to a new repo, or existing TODOs that have already been committed.
 If a similar TODO appears in the diff as both an addition and deletion, it is assumed to have been moved, so is ignored.
 ### Multiple issues have been created
 Issues are created whenever the action runs and finds a newly added TODO in the diff. Rebasing may cause a TODO to show up in a diff multiple times. This is an acknowledged issue, but you may have some luck by adjusting your workflow file.
@ -169,3 +171,5 @@ The action was developed for the GitHub Hackathon. Whilst every effort is made t
 Thanks to Jacob Tomlinson for [his handy overview of GitHub Actions](https://www.jacobtomlinson.co.uk/posts/2019/creating-github-actions-in-python/).
 Thanks to GitHub's [linguist repo](https://github.com/github/linguist/) for the [`languages.yml`](https://raw.githubusercontent.com/github/linguist/master/lib/linguist/languages.yml) file used by the app to look up file extensions and determine the correct highlighting to apply to code snippets.
 Thanks to all those who have [contributed](https://github.com/alstr/todo-to-issue-action/graphs/contributors) to the further development of this action.
--- a/main.py
+++ b/main.py
@ -10,6 +10,8 @@ from io import StringIO
 from ruamel.yaml import YAML
 import hashlib
 from enum import Enum
 import itertools
 import operator
 class LineStatus(Enum):
@ -107,18 +109,19 @@ class GitHubClient(object):
        for existing_issue in self.existing_issues:
            if issue_id in existing_issue['body']:
                # The issue_id matching means the issue issues are identical.
-                print(f'Skipping issue (already exists)')
+                print(f'Skipping issue (already exists).')
                return
            else:
                # There may be cases (rebasing) where a different SHA means the above comparison is False but the
                # issue is otherwise identical.
-                # Long term we should improve how the action handles rebasing.
+                # For now, if an issue already exists with the same title and file name, we will ignore it.
                # This should cover most use cases. Long term we should improve how the action handles rebasing.
                existing_issue_body = existing_issue['body']
                issue_exists = (formatted_issue_body in existing_issue_body
                                and issue.file_name in existing_issue_body
-                                and issue.markdown_language in existing_issue_body
+                                and issue.markdown_language in existing_issue_body)
                                and issue.hunk in existing_issue_body)
                if issue_exists:
                    print(f'Skipping issue (already exists).')
                    return
        new_issue_body = {'title': title, 'body': body, 'labels': issue.labels}
@ -596,9 +599,24 @@ if __name__ == "__main__":
        last_diff = StringIO(client.get_last_diff())
        # Parse the diff for TODOs and create an Issue object for each.
        raw_issues = TodoParser().parse(last_diff)
        # This is a simple, non-perfect check to filter out any TODOs that have just been moved.
        # It looks for items that appear in the diff as both an addition and deletion.
        # It is based on the assumption that TODOs will not have identical titles in identical files.
        issues_to_process = []
        for values, similar_issues in itertools.groupby(raw_issues, key=operator.attrgetter('title', 'file_name',
                                                                                            'markdown_language')):
            similar_issues = list(similar_issues)
            if (len(similar_issues) == 2 and ((similar_issues[0].status == LineStatus.ADDED and
                                               similar_issues[1].status == LineStatus.DELETED) or
                                              (similar_issues[1].status == LineStatus.ADDED and
                                               similar_issues[0].status == LineStatus.DELETED))):
                print(f'Issue "{values[0]}" appears as both addition and deletion. '
                      f'Assuming this issue has been moved so skipping.')
                continue
            issues_to_process.extend(similar_issues)
        # Cycle through the Issue objects and create or close a corresponding GitHub issue for each.
-        for j, raw_issue in enumerate(raw_issues):
+        for j, raw_issue in enumerate(issues_to_process):
-            print(f'Processing issue {j + 1} of {len(raw_issues)}')
+            print(f'Processing issue {j + 1} of {len(issues_to_process)}')
            if raw_issue.status == LineStatus.ADDED:
                status_code = client.create_issue(raw_issue)
                if status_code == 201: