From b862c98a11eb989ea5fa2c1e9df9de638607a920 Mon Sep 17 00:00:00 2001
From: alstr <alstr@users.noreply.github.com>
Date: Sat, 19 Jun 2021 09:25:43 +0100
Subject: [PATCH] Reduce possibility for duplicate issues

Add enhanced check discussed in #24
---
 main.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/main.py b/main.py
index 250d6fa..33215bb 100644
--- a/main.py
+++ b/main.py
@@ -93,19 +93,33 @@ class GitHubClient(object):
             # Title is too long.
             title = title[:80] + '...'
         url_to_line = f'https://github.com/{self.repo}/blob/{self.sha}/{issue.file_name}#L{issue.start_line}'
-        body = (self.line_break.join(issue.body) + '\n\n'
+        formatted_issue_body = self.line_break.join(issue.body)
+        body = (formatted_issue_body + '\n\n'
                 + url_to_line + '\n\n'
                 + '```' + issue.markdown_language + '\n'
                 + issue.hunk + '\n'
                 + '```')
 
         # Check if the current issue already exists - if so, skip it.
+        # The below is a simple and imperfect check.
         issue_id = hashlib.sha1(body.encode('utf-8')).hexdigest()
         body += '\n\n' + issue_id
         for existing_issue in self.existing_issues:
             if issue_id in existing_issue['body']:
+                # The issue_id matching means the issue issues are identical.
                 print(f'Skipping issue (already exists)')
                 return
+            else:
+                # There may be cases (rebasing) where a different SHA means the above comparison is False but the
+                # issue is otherwise identical.
+                # Long term we should improve how the action handles rebasing.
+                existing_issue_body = existing_issue['body']
+                issue_exists = (formatted_issue_body in existing_issue_body
+                                and issue.file_name in existing_issue_body
+                                and issue.markdown_language in existing_issue_body
+                                and issue.hunk in existing_issue_body)
+                if issue_exists:
+                    return
 
         new_issue_body = {'title': title, 'body': body, 'labels': issue.labels}