Update line parsing process

Closes #90
This commit is contained in:
alstr 2024-09-19 16:53:59 +01:00
parent da852bef15
commit 28b4a92a27

108
main.py
View File

@ -26,7 +26,7 @@ class Issue(object):
"""Basic Issue model for collecting the necessary info to send to GitHub.""" """Basic Issue model for collecting the necessary info to send to GitHub."""
def __init__(self, title, labels, assignees, milestone, body, hunk, file_name, def __init__(self, title, labels, assignees, milestone, body, hunk, file_name,
start_line, markdown_language, status, identifier, ref, issue_url, issue_number): start_line, num_lines, markdown_language, status, identifier, ref, issue_url, issue_number):
self.title = title self.title = title
self.labels = labels self.labels = labels
self.assignees = assignees self.assignees = assignees
@ -35,6 +35,7 @@ class Issue(object):
self.hunk = hunk self.hunk = hunk
self.file_name = file_name self.file_name = file_name
self.start_line = start_line self.start_line = start_line
self.num_lines = num_lines
self.markdown_language = markdown_language self.markdown_language = markdown_language
self.status = status self.status = status
self.identifier = identifier self.identifier = identifier
@ -161,7 +162,10 @@ class GitHubClient(object):
# Title is too long. # Title is too long.
title = title[:80] + '...' title = title[:80] + '...'
formatted_issue_body = self.line_break.join(issue.body) formatted_issue_body = self.line_break.join(issue.body)
url_to_line = f'{self.line_base_url}{self.repo}/blob/{self.sha}/{issue.file_name}#L{issue.start_line}' line_num_anchor = f'#L{issue.start_line}'
if issue.num_lines > 1:
line_num_anchor += f'-L{issue.start_line + issue.num_lines - 1}'
url_to_line = f'{self.line_base_url}{self.repo}/blob/{self.sha}/{issue.file_name}{line_num_anchor}'
snippet = '```' + issue.markdown_language + '\n' + issue.hunk + '\n' + '```' snippet = '```' + issue.markdown_language + '\n' + issue.hunk + '\n' + '```'
issue_template = os.getenv('INPUT_ISSUE_TEMPLATE', None) issue_template = os.getenv('INPUT_ISSUE_TEMPLATE', None)
@ -225,10 +229,14 @@ class GitHubClient(object):
def close_issue(self, issue): def close_issue(self, issue):
"""Check to see if this issue can be found on GitHub and if so close it.""" """Check to see if this issue can be found on GitHub and if so close it."""
matched = 0
issue_number = None issue_number = None
if issue.issue_number:
# If URL insertion is enabled.
issue_number = issue.issue_number
else:
# Try simple matching.
matched = 0
for existing_issue in self.existing_issues: for existing_issue in self.existing_issues:
# This is admittedly a simple check that may not work in complex scenarios, but we can't deal with them yet.
if existing_issue['title'] == issue.title: if existing_issue['title'] == issue.title:
matched += 1 matched += 1
# If there are multiple issues with similar titles, don't try and close any. # If there are multiple issues with similar titles, don't try and close any.
@ -236,13 +244,12 @@ class GitHubClient(object):
print(f'Skipping issue (multiple matches)') print(f'Skipping issue (multiple matches)')
break break
issue_number = existing_issue['number'] issue_number = existing_issue['number']
else: if issue_number:
# The titles match, so we will try and close the issue. update_issue_url = f'{self.issues_url}/{issue_number}'
update_issue_url = f'{self.repos_url}{self.repo}/issues/{issue_number}'
body = {'state': 'closed'} body = {'state': 'closed'}
requests.patch(update_issue_url, headers=self.issue_headers, data=json.dumps(body)) requests.patch(update_issue_url, headers=self.issue_headers, data=json.dumps(body))
issue_comment_url = f'{self.repos_url}{self.repo}/issues/{issue_number}/comments' issue_comment_url = f'{self.issues_url}/{issue_number}/comments'
body = {'body': f'Closed in {self.sha}'} body = {'body': f'Closed in {self.sha}'}
update_issue_request = requests.post(issue_comment_url, headers=self.issue_headers, update_issue_request = requests.post(issue_comment_url, headers=self.issue_headers,
data=json.dumps(body)) data=json.dumps(body))
@ -480,17 +487,15 @@ class TodoParser(object):
extracted_comments = [] extracted_comments = []
prev_comment = None prev_comment = None
for i, comment in enumerate(comments): for i, comment in enumerate(comments):
if i == 0 or re.search(fr'{marker["pattern"]}\s?' + '|'.join(self.identifiers), comment.group(0), if prev_comment and comment.start() == prev_comment.end() + 1:
re.IGNORECASE):
extracted_comments.append([comment])
else:
if comment.start() == prev_comment.end() + 1:
extracted_comments[len(extracted_comments) - 1].append(comment) extracted_comments[len(extracted_comments) - 1].append(comment)
else:
extracted_comments.append([comment])
prev_comment = comment prev_comment = comment
for comment in extracted_comments: for comment in extracted_comments:
issue = self._extract_issue_if_exists(comment, marker, block) extracted_issues = self._extract_issue_if_exists(comment, marker, block)
if issue: if extracted_issues:
issues.append(issue) issues.extend(extracted_issues)
else: else:
comment_pattern = (r'(?:[+\-\s]\s*' + marker['pattern']['start'] + r'.*?' comment_pattern = (r'(?:[+\-\s]\s*' + marker['pattern']['start'] + r'.*?'
+ marker['pattern']['end'] + ')') + marker['pattern']['end'] + ')')
@ -501,9 +506,9 @@ class TodoParser(object):
extracted_comments.append([comment]) extracted_comments.append([comment])
for comment in extracted_comments: for comment in extracted_comments:
issue = self._extract_issue_if_exists(comment, marker, block) extracted_issues = self._extract_issue_if_exists(comment, marker, block)
if issue: if extracted_issues:
issues.append(issue) issues.extend(extracted_issues)
for i, issue in enumerate(issues): for i, issue in enumerate(issues):
# Strip some of the diff symbols so it can be included as a code snippet in the issue body. # Strip some of the diff symbols so it can be included as a code snippet in the issue body.
@ -515,20 +520,6 @@ class TodoParser(object):
cleaned_hunk = re.sub(r'\n\sNo newline at end of file', '', cleaned_hunk, 0, re.MULTILINE) cleaned_hunk = re.sub(r'\n\sNo newline at end of file', '', cleaned_hunk, 0, re.MULTILINE)
issue.hunk = cleaned_hunk issue.hunk = cleaned_hunk
# The parser creates a new issue object every time it detects the relevant keyword.
# If a TODO is amended, there will be a deletion and an addition issue object created.
# The deletion won't have the issue URL because the parser immediately notices the addition.
# Therefore, check if the issue prior to this one should have a URL set.
if i == 0:
continue
previous_issue = issues[i - 1]
if (issue.start_line == previous_issue.start_line and
issue.file_name == previous_issue.file_name and
issue.status != previous_issue.status and
issue.issue_url and not previous_issue.issue_url):
# Update the previous issue with the current issue's URL.
previous_issue.issue_url = issue.issue_url
return issues return issues
def _get_language_details(self, language_name, attribute, value): def _get_language_details(self, language_name, attribute, value):
@ -557,8 +548,10 @@ class TodoParser(object):
def _extract_issue_if_exists(self, comment, marker, code_block): def _extract_issue_if_exists(self, comment, marker, code_block):
"""Check this comment for TODOs, and if found, build an Issue object.""" """Check this comment for TODOs, and if found, build an Issue object."""
issue = None curr_issue = None
found_issues = []
line_statuses = [] line_statuses = []
prev_line_title = False
for match in comment: for match in comment:
comment_lines = match.group().split('\n') comment_lines = match.group().split('\n')
for line in comment_lines: for line in comment_lines:
@ -567,7 +560,11 @@ class TodoParser(object):
cleaned_line = self._clean_line(committed_line, marker) cleaned_line = self._clean_line(committed_line, marker)
line_title, ref, identifier = self._get_title(cleaned_line) line_title, ref, identifier = self._get_title(cleaned_line)
if line_title: if line_title:
issue = Issue( if line_status == line_statuses[-1] and prev_line_title:
# This means that there is a separate one-line TODO directly above this one.
# We need to store the previous one.
found_issues.append(curr_issue)
curr_issue = Issue(
title=line_title, title=line_title,
labels=['todo'], labels=['todo'],
assignees=[], assignees=[],
@ -576,6 +573,7 @@ class TodoParser(object):
hunk=code_block['hunk'], hunk=code_block['hunk'],
file_name=code_block['file'], file_name=code_block['file'],
start_line=code_block['start_line'], start_line=code_block['start_line'],
num_lines=1,
markdown_language=code_block['markdown_language'], markdown_language=code_block['markdown_language'],
status=None, status=None,
identifier=identifier, identifier=identifier,
@ -583,56 +581,62 @@ class TodoParser(object):
issue_url=None, issue_url=None,
issue_number=None issue_number=None
) )
prev_line_title = True
# Calculate the file line number that this issue references. # Calculate the file line number that this issue references.
hunk_lines = re.finditer(self.LINE_PATTERN, code_block['hunk'], re.MULTILINE) hunk_lines = re.finditer(self.LINE_PATTERN, code_block['hunk'], re.MULTILINE)
start_line = code_block['start_line'] start_line = code_block['start_line']
for i, hunk_line in enumerate(hunk_lines): for i, hunk_line in enumerate(hunk_lines):
if hunk_line.group(0) == line: if hunk_line.group(0) == line:
issue.start_line = start_line curr_issue.start_line = start_line
break break
if i != 0 and (hunk_line.group(0).startswith('+') or not hunk_line.group(0).startswith('-')): if i != 0 and (hunk_line.group(0).startswith('+') or not hunk_line.group(0).startswith('-')):
start_line += 1 start_line += 1
elif issue: elif curr_issue:
# Extract other issue information that may exist. # Extract other issue information that may exist.
line_labels = self._get_labels(cleaned_line) line_labels = self._get_labels(cleaned_line)
line_assignees = self._get_assignees(cleaned_line) line_assignees = self._get_assignees(cleaned_line)
line_milestone = self._get_milestone(cleaned_line) line_milestone = self._get_milestone(cleaned_line)
line_url = self._get_issue_url(cleaned_line) line_url = self._get_issue_url(cleaned_line)
if line_labels: if line_labels:
issue.labels.extend(line_labels) curr_issue.labels.extend(line_labels)
elif line_assignees: elif line_assignees:
issue.assignees.extend(line_assignees) curr_issue.assignees.extend(line_assignees)
elif line_milestone: elif line_milestone:
issue.milestone = line_milestone curr_issue.milestone = line_milestone
elif line_url: elif line_url:
issue.issue_url = line_url curr_issue.issue_url = line_url
issue_number_search = self.ISSUE_NUMBER_PATTERN.search(line_url) issue_number_search = self.ISSUE_NUMBER_PATTERN.search(line_url)
if issue_number_search: if issue_number_search:
issue.issue_number = issue_number_search.group(1) curr_issue.issue_number = issue_number_search.group(1)
elif len(cleaned_line): elif len(cleaned_line):
if self.should_escape: if self.should_escape:
issue.body.append(self._escape_markdown(cleaned_line)) curr_issue.body.append(self._escape_markdown(cleaned_line))
else: else:
issue.body.append(cleaned_line) curr_issue.body.append(cleaned_line)
if not line.startswith('-'):
if issue is not None and issue.identifier is not None and self.identifiers_dict is not None: curr_issue.num_lines += 1
if not line_title:
prev_line_title = False
if curr_issue is not None and curr_issue.identifier is not None and self.identifiers_dict is not None:
for identifier_dict in self.identifiers_dict: for identifier_dict in self.identifiers_dict:
if identifier_dict['name'] == issue.identifier: if identifier_dict['name'] == curr_issue.identifier:
for label in identifier_dict['labels']: for label in identifier_dict['labels']:
if label not in issue.labels: if label not in curr_issue.labels:
issue.labels.append(label) curr_issue.labels.append(label)
if issue is not None: if curr_issue is not None:
# If all the lines are unchanged, don't do anything. # If all the lines are unchanged, don't do anything.
if all(s == LineStatus.UNCHANGED for s in line_statuses): if all(s == LineStatus.UNCHANGED for s in line_statuses):
return None return None
# LineStatus.ADDED also covers modifications. # LineStatus.ADDED also covers modifications.
issue.status = LineStatus.DELETED if all(s == LineStatus.DELETED for s in line_statuses) \ curr_issue.status = LineStatus.DELETED if all(s == LineStatus.DELETED for s in line_statuses) \
else LineStatus.ADDED else LineStatus.ADDED
return issue found_issues.append(curr_issue)
return found_issues
@staticmethod @staticmethod
def _escape_markdown(comment): def _escape_markdown(comment):