Improve handling of similar/moved TODOs

Closes #70
Closes #53
This commit is contained in:
alstr 2021-08-07 09:17:00 +01:00
parent 3fd212fda1
commit 6687008177
2 changed files with 29 additions and 7 deletions

View File

@ -37,7 +37,7 @@ Create a `workflow.yml` file in your `.github/workflows` directory like:
steps: steps:
- uses: "actions/checkout@master" - uses: "actions/checkout@master"
- name: "TODO to Issue" - name: "TODO to Issue"
uses: "alstr/todo-to-issue-action@v4.0.9" uses: "alstr/todo-to-issue-action@v4.1"
id: "todo" id: "todo"
``` ```
@ -154,6 +154,8 @@ You can also specify default projects in the same way by defining `USER_PROJECTS
Make sure your file language is in `syntax.json`. Also, the action will not recognise TODOs in the first commit to a new repo, or existing TODOs that have already been committed. Make sure your file language is in `syntax.json`. Also, the action will not recognise TODOs in the first commit to a new repo, or existing TODOs that have already been committed.
If a similar TODO appears in the diff as both an addition and deletion, it is assumed to have been moved, so is ignored.
### Multiple issues have been created ### Multiple issues have been created
Issues are created whenever the action runs and finds a newly added TODO in the diff. Rebasing may cause a TODO to show up in a diff multiple times. This is an acknowledged issue, but you may have some luck by adjusting your workflow file. Issues are created whenever the action runs and finds a newly added TODO in the diff. Rebasing may cause a TODO to show up in a diff multiple times. This is an acknowledged issue, but you may have some luck by adjusting your workflow file.
@ -169,3 +171,5 @@ The action was developed for the GitHub Hackathon. Whilst every effort is made t
Thanks to Jacob Tomlinson for [his handy overview of GitHub Actions](https://www.jacobtomlinson.co.uk/posts/2019/creating-github-actions-in-python/). Thanks to Jacob Tomlinson for [his handy overview of GitHub Actions](https://www.jacobtomlinson.co.uk/posts/2019/creating-github-actions-in-python/).
Thanks to GitHub's [linguist repo](https://github.com/github/linguist/) for the [`languages.yml`](https://raw.githubusercontent.com/github/linguist/master/lib/linguist/languages.yml) file used by the app to look up file extensions and determine the correct highlighting to apply to code snippets. Thanks to GitHub's [linguist repo](https://github.com/github/linguist/) for the [`languages.yml`](https://raw.githubusercontent.com/github/linguist/master/lib/linguist/languages.yml) file used by the app to look up file extensions and determine the correct highlighting to apply to code snippets.
Thanks to all those who have [contributed](https://github.com/alstr/todo-to-issue-action/graphs/contributors) to the further development of this action.

30
main.py
View File

@ -10,6 +10,8 @@ from io import StringIO
from ruamel.yaml import YAML from ruamel.yaml import YAML
import hashlib import hashlib
from enum import Enum from enum import Enum
import itertools
import operator
class LineStatus(Enum): class LineStatus(Enum):
@ -107,18 +109,19 @@ class GitHubClient(object):
for existing_issue in self.existing_issues: for existing_issue in self.existing_issues:
if issue_id in existing_issue['body']: if issue_id in existing_issue['body']:
# The issue_id matching means the issue issues are identical. # The issue_id matching means the issue issues are identical.
print(f'Skipping issue (already exists)') print(f'Skipping issue (already exists).')
return return
else: else:
# There may be cases (rebasing) where a different SHA means the above comparison is False but the # There may be cases (rebasing) where a different SHA means the above comparison is False but the
# issue is otherwise identical. # issue is otherwise identical.
# Long term we should improve how the action handles rebasing. # For now, if an issue already exists with the same title and file name, we will ignore it.
# This should cover most use cases. Long term we should improve how the action handles rebasing.
existing_issue_body = existing_issue['body'] existing_issue_body = existing_issue['body']
issue_exists = (formatted_issue_body in existing_issue_body issue_exists = (formatted_issue_body in existing_issue_body
and issue.file_name in existing_issue_body and issue.file_name in existing_issue_body
and issue.markdown_language in existing_issue_body and issue.markdown_language in existing_issue_body)
and issue.hunk in existing_issue_body)
if issue_exists: if issue_exists:
print(f'Skipping issue (already exists).')
return return
new_issue_body = {'title': title, 'body': body, 'labels': issue.labels} new_issue_body = {'title': title, 'body': body, 'labels': issue.labels}
@ -596,9 +599,24 @@ if __name__ == "__main__":
last_diff = StringIO(client.get_last_diff()) last_diff = StringIO(client.get_last_diff())
# Parse the diff for TODOs and create an Issue object for each. # Parse the diff for TODOs and create an Issue object for each.
raw_issues = TodoParser().parse(last_diff) raw_issues = TodoParser().parse(last_diff)
# This is a simple, non-perfect check to filter out any TODOs that have just been moved.
# It looks for items that appear in the diff as both an addition and deletion.
# It is based on the assumption that TODOs will not have identical titles in identical files.
issues_to_process = []
for values, similar_issues in itertools.groupby(raw_issues, key=operator.attrgetter('title', 'file_name',
'markdown_language')):
similar_issues = list(similar_issues)
if (len(similar_issues) == 2 and ((similar_issues[0].status == LineStatus.ADDED and
similar_issues[1].status == LineStatus.DELETED) or
(similar_issues[1].status == LineStatus.ADDED and
similar_issues[0].status == LineStatus.DELETED))):
print(f'Issue "{values[0]}" appears as both addition and deletion. '
f'Assuming this issue has been moved so skipping.')
continue
issues_to_process.extend(similar_issues)
# Cycle through the Issue objects and create or close a corresponding GitHub issue for each. # Cycle through the Issue objects and create or close a corresponding GitHub issue for each.
for j, raw_issue in enumerate(raw_issues): for j, raw_issue in enumerate(issues_to_process):
print(f'Processing issue {j + 1} of {len(raw_issues)}') print(f'Processing issue {j + 1} of {len(issues_to_process)}')
if raw_issue.status == LineStatus.ADDED: if raw_issue.status == LineStatus.ADDED:
status_code = client.create_issue(raw_issue) status_code = client.create_issue(raw_issue)
if status_code == 201: if status_code == 201: