From 2f1ea87bd0138fcc19f37a1cc665fc0332388ef0 Mon Sep 17 00:00:00 2001 From: alstr Date: Fri, 20 Mar 2020 13:50:01 +0000 Subject: [PATCH] Improve diff parsing Create link to each TODO start line Allow multiline TODOs --- main.py | 99 ++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 74 insertions(+), 25 deletions(-) diff --git a/main.py b/main.py index f3bc141..321c005 100644 --- a/main.py +++ b/main.py @@ -15,6 +15,7 @@ def main(): repo = os.getenv('INPUT_REPO') before = os.getenv('INPUT_BEFORE') sha = os.getenv('INPUT_SHA') + comment_marker = os.getenv('COMMENT_MARKER') label = os.getenv('INPUT_LABEL') params = { 'access_token': os.getenv('INPUT_TOKEN') @@ -30,17 +31,74 @@ def main(): if diff_request.status_code == 200: diff = diff_request.text - # Check for additions in the diff. - addition_pattern = re.compile(r'(?<=^\+).*', re.MULTILINE) - additions = addition_pattern.findall(diff) - new_issues = [] + header_pattern = re.compile(r'(?<=diff\s--git\s).+') + hunk_start_pattern = re.compile(r'((?<=^@@\s).+(?=\s@@))') + line_num_pattern = re.compile(r'(?<=\+).+') + addition_pattern = re.compile(r'(?<=^\+\s).+') + deletion_pattern = re.compile(r'(?<=^-\s).+') + todo_pattern = re.compile(r'(?<=' + label + r'\s).+') + comment_pattern = re.compile(r'(?<=' + comment_marker + r'\s).+') - # Filter the additions down to newly added TODOs. - for addition in additions: - todo_pattern = re.compile(r'(?<=' + label + r'\s).*') - todos = todo_pattern.search(addition) - if todos: - new_issues.append(todos.group(0)) + new_issues = [] + closed_issues = [] + + # Read the diff file one line at a time, checking for additions/deletions in each hunk. + with open(diff, 'r') as diff_file: + curr_file = None + hunk_start = None + hunk_index = None + recording = False + + for n, line in enumerate(diff_file): + # First look for a diff header so we can determine the file the changes relate to. + header_search = header_pattern.search(line) + if header_search: + files = header_search.group(0).split(' ') + curr_file = files[1][2:] + else: + # Look for hunks so we can get the line numbers for the changes. + hunk_search = hunk_start_pattern.search(line) + if hunk_search: + hunk = hunk_search.group(0) + line_nums = line_num_pattern.search(hunk).group(0).split(',') + hunk_start = int(line_nums[0]) + hunk_index = n + else: + # Look for additions and deletions (specifically TODOs) within each hunk. + addition_search = addition_pattern.search(line) + if addition_search: + addition = addition_search.group(0) + todo_search = todo_pattern.search(addition) + if todo_search: + # Start recording so we can capture multiline TODOs. + recording = True + todo = todo_search.group(0) + new_issue = { + 'labels': ['todo'], + 'todo': todo, + 'body': todo, + 'file': curr_file, + 'line_num': hunk_start + (n - hunk_index - 1) + } + new_issues.append(new_issue) + continue + elif recording: + # If we are recording, check if the current line continues the last. + comment_search = comment_pattern.search(addition) + if comment_search: + comment = comment_search.group(0).lstrip() + last_issue = new_issues[len(new_issues) - 1] + last_issue['body'] += '\n' + comment + continue + else: + deletion_search = deletion_pattern.search(line) + if deletion_search: + deletion = deletion_search.group(0) + todo_search = todo_pattern.search(deletion) + if todo_search: + closed_issues.append(todo_search.group(0)) + if recording: + recording = False # Create new issues for any newly added TODOs. issues_url = f'{base_url}{repo}/issues' @@ -48,27 +106,18 @@ def main(): 'Content-Type': 'application/json', } for issue in new_issues: - title = issue + title = issue['todo'] # Truncate the title if it's longer than 50 chars. if len(title) > 50: - title = issue[:50] + '...' - new_issue_body = {'title': title, 'body': issue, 'labels': ['todo']} + title = title[:50] + '...' + file = issue['file'] + line = issue['line_num'] + body = issue['body'] + '\n' + f'https://github.com/{ repo }/blob/{ sha }/{ file }#L{ line }' + new_issue_body = {'title': title, 'body': body, 'labels': ['todo']} requests.post(url=issues_url, headers=issue_headers, params=params, data=json.dumps(new_issue_body)) # Don't add too many issues too quickly. sleep(1) - # Check for deletions in the diff. - deletion_pattern = re.compile(r'(?<=^-).*', re.MULTILINE) - deletions = deletion_pattern.findall(diff) - closed_issues = [] - - # Filter the deletions down to removed TODOs. - for deletion in deletions: - todo_pattern = re.compile(r'(?<=' + label + r'\s).*') - todos = todo_pattern.search(deletion) - if todos: - closed_issues.append(todos.group(0)) - if len(closed_issues) > 0: # Get the list of current issues. list_issues_request = requests.get(issues_url, headers=issue_headers, params=params)