diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 9ee3e2bf..a91b1476 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -174,6 +174,24 @@ class GithubProvider(GitProvider): diff_files = [] invalid_files_names = [] + is_close_to_rate_limit = False + + # The base.sha will point to the current state of the base branch (including parallel merges), not the original base commit when the PR was created + # We can fix this by finding the merge base commit between the PR head and base branches + # Note that The pr.head.sha is actually correct as is - it points to the latest commit in your PR branch. + # This SHA isn't affected by parallel merges to the base branch since it's specific to your PR's branch. + repo = self.repo_obj + pr = self.pr + try: + compare = repo.compare(pr.base.sha, pr.head.sha) # communication with GitHub + merge_base_commit = compare.merge_base_commit + except Exception as e: + get_logger().error(f"Failed to get merge base commit: {e}") + merge_base_commit = pr.base + if merge_base_commit.sha != pr.base.sha: + get_logger().info( + f"Using merge base commit {merge_base_commit.sha} instead of base commit ") + counter_valid = 0 for file in files: if not is_valid_file(file.filename): @@ -181,48 +199,36 @@ class GithubProvider(GitProvider): continue patch = file.patch - - # allow only a limited number of files to be fully loaded. We can manage the rest with diffs only - counter_valid += 1 - avoid_load = False - if counter_valid >= MAX_FILES_ALLOWED_FULL and patch and not self.incremental.is_incremental: - avoid_load = True - if counter_valid == MAX_FILES_ALLOWED_FULL: - get_logger().info(f"Too many files in PR, will avoid loading full content for rest of files") - - if avoid_load: + if is_close_to_rate_limit: new_file_content_str = "" + original_file_content_str = "" else: - new_file_content_str = self._get_pr_file_content(file, self.pr.head.sha) # communication with GitHub + # allow only a limited number of files to be fully loaded. We can manage the rest with diffs only + counter_valid += 1 + avoid_load = False + if counter_valid >= MAX_FILES_ALLOWED_FULL and patch and not self.incremental.is_incremental: + avoid_load = True + if counter_valid == MAX_FILES_ALLOWED_FULL: + get_logger().info(f"Too many files in PR, will avoid loading full content for rest of files") - if self.incremental.is_incremental and self.unreviewed_files_set: - original_file_content_str = self._get_pr_file_content(file, self.incremental.last_seen_commit_sha) - patch = load_large_diff(file.filename, new_file_content_str, original_file_content_str) - self.unreviewed_files_set[file.filename] = patch - else: if avoid_load: - original_file_content_str = "" + new_file_content_str = "" else: - # The base.sha will point to the current state of the base branch (including parallel merges), not the original base commit when the PR was created - # We can fix this by finding the merge base commit between the PR head and base branches - # Note that The pr.head.sha is actually correct as is - it points to the latest commit in your PR branch. - # This SHA isn't affected by parallel merges to the base branch since it's specific to your PR's branch. - repo = self.repo_obj - pr = self.pr - try: - compare = repo.compare(pr.base.sha, pr.head.sha) - merge_base_commit = compare.merge_base_commit - except Exception as e: - get_logger().error(f"Failed to get merge base commit: {e}") - merge_base_commit = pr.base - if merge_base_commit.sha != pr.base.sha: - get_logger().info( - f"Using merge base commit {merge_base_commit.sha} instead of base commit " - f"{pr.base.sha} for {file.filename}") - original_file_content_str = self._get_pr_file_content(file, merge_base_commit.sha) + new_file_content_str = self._get_pr_file_content(file, self.pr.head.sha) # communication with GitHub - if not patch: + if self.incremental.is_incremental and self.unreviewed_files_set: + original_file_content_str = self._get_pr_file_content(file, self.incremental.last_seen_commit_sha) patch = load_large_diff(file.filename, new_file_content_str, original_file_content_str) + self.unreviewed_files_set[file.filename] = patch + else: + if avoid_load: + original_file_content_str = "" + else: + original_file_content_str = self._get_pr_file_content(file, merge_base_commit.sha) + # original_file_content_str = self._get_pr_file_content(file, self.pr.base.sha) + if not patch: + patch = load_large_diff(file.filename, new_file_content_str, original_file_content_str) + if file.status == 'added': edit_type = EDIT_TYPE.ADDED @@ -237,9 +243,14 @@ class GithubProvider(GitProvider): edit_type = EDIT_TYPE.UNKNOWN # count number of lines added and removed - patch_lines = patch.splitlines(keepends=True) - num_plus_lines = len([line for line in patch_lines if line.startswith('+')]) - num_minus_lines = len([line for line in patch_lines if line.startswith('-')]) + if hasattr(file, 'additions') and hasattr(file, 'deletions'): + num_plus_lines = file.additions + num_minus_lines = file.deletions + else: + patch_lines = patch.splitlines(keepends=True) + num_plus_lines = len([line for line in patch_lines if line.startswith('+')]) + num_minus_lines = len([line for line in patch_lines if line.startswith('-')]) + file_patch_canonical_structure = FilePatchInfo(original_file_content_str, new_file_content_str, patch, file.filename, edit_type=edit_type, num_plus_lines=num_plus_lines,