mirror of
https://github.com/ditkrg/todo-to-issue-action.git
synced 2026-01-22 22:06:43 +00:00
Merge pull request #240 from rgalonso/fix/line-numbering-with-additions-and-deletions
fix: get correct line numbering of old and new lines
This commit is contained in:
commit
ff0c64d51e
@ -11,9 +11,7 @@ import itertools
|
|||||||
class TodoParser(object):
|
class TodoParser(object):
|
||||||
"""Parser for extracting information from a given diff file."""
|
"""Parser for extracting information from a given diff file."""
|
||||||
FILE_HUNK_PATTERN = r'(?<=diff)(.*?)(?=diff\s--git\s)'
|
FILE_HUNK_PATTERN = r'(?<=diff)(.*?)(?=diff\s--git\s)'
|
||||||
HEADER_PATTERN = r'(?<=--git).*?(?=$\n(index|new|deleted))'
|
HEADERS_PATTERN = re.compile(r'(?<=--git) a/(.*?) b/(.*?)$\n(?=((new|deleted).*?$\n)?index ([0-9a-f]+)\.\.([0-9a-f]+))', re.MULTILINE)
|
||||||
LINE_PATTERN = r'^.*$'
|
|
||||||
FILENAME_PATTERN = re.compile(r'(?<=a/).+?(?=\sb/)')
|
|
||||||
LINE_NUMBERS_PATTERN = re.compile(r'@@[\d\s,\-+]*\s@@.*')
|
LINE_NUMBERS_PATTERN = re.compile(r'@@[\d\s,\-+]*\s@@.*')
|
||||||
LINE_NUMBERS_INNER_PATTERN = re.compile(r'@@[\d\s,\-+]*\s@@')
|
LINE_NUMBERS_INNER_PATTERN = re.compile(r'@@[\d\s,\-+]*\s@@')
|
||||||
ADDITION_PATTERN = re.compile(r'(?<=^\+).*')
|
ADDITION_PATTERN = re.compile(r'(?<=^\+).*')
|
||||||
@ -154,15 +152,10 @@ class TodoParser(object):
|
|||||||
# Iterate through each section extracted above.
|
# Iterate through each section extracted above.
|
||||||
for hunk in extracted_file_hunks:
|
for hunk in extracted_file_hunks:
|
||||||
# Extract the file information so we can figure out the Markdown language and comment syntax.
|
# Extract the file information so we can figure out the Markdown language and comment syntax.
|
||||||
header_search = re.search(self.HEADER_PATTERN, hunk, re.MULTILINE)
|
headers = self.HEADERS_PATTERN.search(hunk)
|
||||||
if not header_search:
|
if not headers:
|
||||||
continue
|
continue
|
||||||
files = header_search.group(0)
|
curr_file = headers.group(2)
|
||||||
|
|
||||||
filename_search = re.search(self.FILENAME_PATTERN, files)
|
|
||||||
if not filename_search:
|
|
||||||
continue
|
|
||||||
curr_file = filename_search.group(0)
|
|
||||||
if self._should_ignore(curr_file):
|
if self._should_ignore(curr_file):
|
||||||
continue
|
continue
|
||||||
curr_markers, curr_markdown_language = self._get_file_details(curr_file)
|
curr_markers, curr_markdown_language = self._get_file_details(curr_file)
|
||||||
@ -175,15 +168,18 @@ class TodoParser(object):
|
|||||||
for i, line_numbers in enumerate(line_numbers_iterator):
|
for i, line_numbers in enumerate(line_numbers_iterator):
|
||||||
line_numbers_inner_search = re.search(self.LINE_NUMBERS_INNER_PATTERN, line_numbers.group(0))
|
line_numbers_inner_search = re.search(self.LINE_NUMBERS_INNER_PATTERN, line_numbers.group(0))
|
||||||
line_numbers_str = line_numbers_inner_search.group(0).strip('@@ -')
|
line_numbers_str = line_numbers_inner_search.group(0).strip('@@ -')
|
||||||
start_line = line_numbers_str.split(' ')[1].strip('+')
|
deleted_start_line = line_numbers_str.split(' ')[0]
|
||||||
start_line = int(start_line.split(',')[0])
|
deleted_start_line = int(deleted_start_line.split(',')[0])
|
||||||
|
added_start_line = line_numbers_str.split(' ')[1].strip('+')
|
||||||
|
added_start_line = int(added_start_line.split(',')[0])
|
||||||
|
|
||||||
# Put this information into a temporary dict for simplicity.
|
# Put this information into a temporary dict for simplicity.
|
||||||
block = {
|
block = {
|
||||||
'file': curr_file,
|
'file': curr_file,
|
||||||
'markers': curr_markers,
|
'markers': curr_markers,
|
||||||
'markdown_language': curr_markdown_language,
|
'markdown_language': curr_markdown_language,
|
||||||
'start_line': start_line,
|
'deleted_start_line': deleted_start_line,
|
||||||
|
'added_start_line': added_start_line,
|
||||||
'hunk': hunk,
|
'hunk': hunk,
|
||||||
'hunk_start': line_numbers.end(),
|
'hunk_start': line_numbers.end(),
|
||||||
'hunk_end': None
|
'hunk_end': None
|
||||||
@ -209,8 +205,25 @@ class TodoParser(object):
|
|||||||
|
|
||||||
# Now for each code block, check for comments, then those comments for TODOs.
|
# Now for each code block, check for comments, then those comments for TODOs.
|
||||||
for block in code_blocks:
|
for block in code_blocks:
|
||||||
# convert hunk string into newline-separated list (excluding first element which is always null and not actually first line of hunk)
|
# for both the set of deleted lines and set of new lines, convert hunk string into
|
||||||
bl=block['hunk'].split('\n')[1:]
|
# newline-separated list (excluding first element which is always null and not
|
||||||
|
# actually first line of hunk)
|
||||||
|
old=[]
|
||||||
|
new=[]
|
||||||
|
for line in block['hunk'].split('\n')[1:]:
|
||||||
|
if line: # if not empty
|
||||||
|
match line[0]:
|
||||||
|
case '-':
|
||||||
|
old.append(line)
|
||||||
|
case '+':
|
||||||
|
new.append(line)
|
||||||
|
case _:
|
||||||
|
if line != '\\ No newline at end of file':
|
||||||
|
old.append(line)
|
||||||
|
new.append(line)
|
||||||
|
elif line != '\\ No newline at end of file':
|
||||||
|
old.append(line)
|
||||||
|
new.append(line)
|
||||||
|
|
||||||
for marker in block['markers']:
|
for marker in block['markers']:
|
||||||
# initialize list
|
# initialize list
|
||||||
@ -250,13 +263,16 @@ class TodoParser(object):
|
|||||||
|
|
||||||
# create regex object to search for comments
|
# create regex object to search for comments
|
||||||
compiled_pattern=re.compile(comment_pattern)
|
compiled_pattern=re.compile(comment_pattern)
|
||||||
|
# analyze the set of old lines and new lines separately, so that we don't, for example,
|
||||||
|
# accidentally treat deleted lines as if they were being added in this diff
|
||||||
|
for block_lines in [old, new]:
|
||||||
# for each element of list, enumerate it and if value is a regex match, include it in list that is returned,
|
# for each element of list, enumerate it and if value is a regex match, include it in list that is returned,
|
||||||
# where each element of the list is a dictionary that is the start end end lines of the match (relative to
|
# where each element of the list is a dictionary that is the start and end lines of the match (relative to
|
||||||
# start of the hunk) and the matching string itself
|
# start of the hunk) and the matching string itself
|
||||||
comments_and_positions = [{'start': i, 'end': i, 'comment': x} for i, x in enumerate(bl) if compiled_pattern.search(x)]
|
comments_and_positions = [{'start': i, 'end': i, 'comment': x} for i, x in enumerate(block_lines) if compiled_pattern.search(x)]
|
||||||
if len(comments_and_positions) > 0:
|
if len(comments_and_positions) > 0:
|
||||||
# create filtered list which consolidates contiguous lines
|
# append filtered list which consolidates contiguous lines
|
||||||
contiguous_comments_and_positions=[comments_and_positions[0]]
|
contiguous_comments_and_positions.append(comments_and_positions[0])
|
||||||
for j, x in enumerate(comments_and_positions[1:]):
|
for j, x in enumerate(comments_and_positions[1:]):
|
||||||
if x['start'] == (comments_and_positions[j]['end'] + 1):
|
if x['start'] == (comments_and_positions[j]['end'] + 1):
|
||||||
contiguous_comments_and_positions[-1]['end']+=1
|
contiguous_comments_and_positions[-1]['end']+=1
|
||||||
@ -270,9 +286,14 @@ class TodoParser(object):
|
|||||||
# compile above pattern
|
# compile above pattern
|
||||||
compiled_pattern = re.compile(pattern, re.DOTALL)
|
compiled_pattern = re.compile(pattern, re.DOTALL)
|
||||||
|
|
||||||
|
# analyze the set of old lines and new lines separately, so that we don't, for example,
|
||||||
|
# accidentally treat deleted lines as if they were being added in this diff
|
||||||
|
for block_lines in [old, new]:
|
||||||
|
# convert list to string
|
||||||
|
block_lines_str = '\n'.join(block_lines)
|
||||||
# search for the pattern within the hunk and
|
# search for the pattern within the hunk and
|
||||||
# return a list of iterators to all of the matches
|
# return a list of iterators to all of the matches
|
||||||
match_iters = compiled_pattern.finditer(block['hunk'])
|
match_iters = compiled_pattern.finditer(block_lines_str)
|
||||||
|
|
||||||
# split off into overlapping pairs. i.e. ['A', 'B', C'] => [('A', 'B'), ('B', 'C')]
|
# split off into overlapping pairs. i.e. ['A', 'B', C'] => [('A', 'B'), ('B', 'C')]
|
||||||
pairs = itertools.pairwise(match_iters)
|
pairs = itertools.pairwise(match_iters)
|
||||||
@ -285,20 +306,19 @@ class TodoParser(object):
|
|||||||
if i == 0:
|
if i == 0:
|
||||||
# set start line and comment string of first section
|
# set start line and comment string of first section
|
||||||
contiguous_comments_and_positions.append({
|
contiguous_comments_and_positions.append({
|
||||||
# -1 to ignore first newline, which isn't actually part of the hunk
|
'start': block_lines_str.count('\n', 0, prev_span[0]),
|
||||||
'start': block['hunk'].count('\n', 0, prev_span[0]) - 1,
|
|
||||||
'end': 0,
|
'end': 0,
|
||||||
'comment': pair[0].group(0)
|
'comment': pair[0].group(0)
|
||||||
})
|
})
|
||||||
# get number of lines in first section
|
# get number of lines in first section
|
||||||
num_lines_in_first_section = block['hunk'].count('\n', prev_span[0], prev_span[1])
|
num_lines_in_first_section = block_lines_str.count('\n', prev_span[0], prev_span[1])
|
||||||
# set end line of first section relative to its start
|
# set end line of first section relative to its start
|
||||||
contiguous_comments_and_positions[-1]['end'] = contiguous_comments_and_positions[-1]['start'] + num_lines_in_first_section
|
contiguous_comments_and_positions[-1]['end'] = contiguous_comments_and_positions[-1]['start'] + num_lines_in_first_section
|
||||||
|
|
||||||
# get start/end index (within hunk) of current section
|
# get start/end index (within hunk) of current section
|
||||||
curr_span = pair[1].span()
|
curr_span = pair[1].span()
|
||||||
# determine number of lines between previous end and current start
|
# determine number of lines between previous end and current start
|
||||||
num_lines_from_prev_section_end_line = block['hunk'].count('\n', prev_span[1], curr_span[0])
|
num_lines_from_prev_section_end_line = block_lines_str.count('\n', prev_span[1], curr_span[0])
|
||||||
# set start line of current section based on previous end
|
# set start line of current section based on previous end
|
||||||
contiguous_comments_and_positions.append({
|
contiguous_comments_and_positions.append({
|
||||||
'start': contiguous_comments_and_positions[-1]['end'] + num_lines_from_prev_section_end_line,
|
'start': contiguous_comments_and_positions[-1]['end'] + num_lines_from_prev_section_end_line,
|
||||||
@ -306,7 +326,7 @@ class TodoParser(object):
|
|||||||
'comment': pair[1].group(0)
|
'comment': pair[1].group(0)
|
||||||
})
|
})
|
||||||
# get number of lines in current section
|
# get number of lines in current section
|
||||||
num_lines_in_curr_section = block['hunk'].count('\n', curr_span[0], curr_span[1])
|
num_lines_in_curr_section = block_lines_str.count('\n', curr_span[0], curr_span[1])
|
||||||
# set end line of current section relative to its start
|
# set end line of current section relative to its start
|
||||||
contiguous_comments_and_positions[-1]['end'] = contiguous_comments_and_positions[-1]['start'] + num_lines_in_curr_section
|
contiguous_comments_and_positions[-1]['end'] = contiguous_comments_and_positions[-1]['start'] + num_lines_in_curr_section
|
||||||
|
|
||||||
@ -315,20 +335,19 @@ class TodoParser(object):
|
|||||||
if len(contiguous_comments_and_positions) == 0:
|
if len(contiguous_comments_and_positions) == 0:
|
||||||
# redo the search, this time returning the
|
# redo the search, this time returning the
|
||||||
# result directly rather than an iterator
|
# result directly rather than an iterator
|
||||||
match = compiled_pattern.search(block['hunk'])
|
match = compiled_pattern.search(block_lines_str)
|
||||||
|
|
||||||
if match:
|
if match:
|
||||||
# get start/end index (within hunk) of this section
|
# get start/end index (within hunk) of this section
|
||||||
span = match.span()
|
span = match.span()
|
||||||
# set start line and comment string of first section
|
# set start line and comment string of first section
|
||||||
contiguous_comments_and_positions.append({
|
contiguous_comments_and_positions.append({
|
||||||
# -1 to ignore first newline, which isn't actually part of the hunk
|
'start': block_lines_str.count('\n', 0, span[0]),
|
||||||
'start': block['hunk'].count('\n', 0, span[0]) - 1,
|
|
||||||
'end': 0,
|
'end': 0,
|
||||||
'comment': match.group(0)
|
'comment': match.group(0)
|
||||||
})
|
})
|
||||||
# get number of lines in first section
|
# get number of lines in first section
|
||||||
num_lines_in_first_section = block['hunk'].count('\n', span[0], span[1])
|
num_lines_in_first_section = block_lines_str.count('\n', span[0], span[1])
|
||||||
# set end line of first section relative to its start
|
# set end line of first section relative to its start
|
||||||
contiguous_comments_and_positions[-1]['end'] = contiguous_comments_and_positions[-1]['start'] + num_lines_in_first_section
|
contiguous_comments_and_positions[-1]['end'] = contiguous_comments_and_positions[-1]['start'] + num_lines_in_first_section
|
||||||
|
|
||||||
@ -400,7 +419,8 @@ class TodoParser(object):
|
|||||||
body=[],
|
body=[],
|
||||||
hunk=hunk_info['hunk'],
|
hunk=hunk_info['hunk'],
|
||||||
file_name=hunk_info['file'],
|
file_name=hunk_info['file'],
|
||||||
start_line=hunk_info['start_line'] + comment_block['start'] + line_number_within_comment_block,
|
start_line=((hunk_info['deleted_start_line'] if line_status == LineStatus.DELETED else hunk_info['added_start_line'])
|
||||||
|
+ comment_block['start'] + line_number_within_comment_block),
|
||||||
start_line_within_hunk=comment_block['start'] + line_number_within_comment_block + 1,
|
start_line_within_hunk=comment_block['start'] + line_number_within_comment_block + 1,
|
||||||
num_lines=1,
|
num_lines=1,
|
||||||
markdown_language=hunk_info['markdown_language'],
|
markdown_language=hunk_info['markdown_language'],
|
||||||
|
|||||||
@ -80,8 +80,6 @@ class IssueUrlInsertionTest(unittest.TestCase):
|
|||||||
self._setUp(['test_new.diff'])
|
self._setUp(['test_new.diff'])
|
||||||
self._standardTest(80)
|
self._standardTest(80)
|
||||||
|
|
||||||
# See GitHub issue #236
|
|
||||||
@unittest.expectedFailure
|
|
||||||
def test_line_numbering_with_deletions(self):
|
def test_line_numbering_with_deletions(self):
|
||||||
self._setUp(['test_new_py.diff', 'test_edit_py.diff'])
|
self._setUp(['test_new_py.diff', 'test_edit_py.diff'])
|
||||||
with self.subTest("Issue URL insertion"):
|
with self.subTest("Issue URL insertion"):
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user