mirror of
https://github.com/qodo-ai/pr-agent.git
synced 2025-07-02 11:50:37 +08:00
refactor: optimize file content loading and improve rate limit handling
This commit is contained in:
@ -174,6 +174,24 @@ class GithubProvider(GitProvider):
|
||||
|
||||
diff_files = []
|
||||
invalid_files_names = []
|
||||
is_close_to_rate_limit = False
|
||||
|
||||
# The base.sha will point to the current state of the base branch (including parallel merges), not the original base commit when the PR was created
|
||||
# We can fix this by finding the merge base commit between the PR head and base branches
|
||||
# Note that The pr.head.sha is actually correct as is - it points to the latest commit in your PR branch.
|
||||
# This SHA isn't affected by parallel merges to the base branch since it's specific to your PR's branch.
|
||||
repo = self.repo_obj
|
||||
pr = self.pr
|
||||
try:
|
||||
compare = repo.compare(pr.base.sha, pr.head.sha) # communication with GitHub
|
||||
merge_base_commit = compare.merge_base_commit
|
||||
except Exception as e:
|
||||
get_logger().error(f"Failed to get merge base commit: {e}")
|
||||
merge_base_commit = pr.base
|
||||
if merge_base_commit.sha != pr.base.sha:
|
||||
get_logger().info(
|
||||
f"Using merge base commit {merge_base_commit.sha} instead of base commit ")
|
||||
|
||||
counter_valid = 0
|
||||
for file in files:
|
||||
if not is_valid_file(file.filename):
|
||||
@ -181,48 +199,36 @@ class GithubProvider(GitProvider):
|
||||
continue
|
||||
|
||||
patch = file.patch
|
||||
|
||||
# allow only a limited number of files to be fully loaded. We can manage the rest with diffs only
|
||||
counter_valid += 1
|
||||
avoid_load = False
|
||||
if counter_valid >= MAX_FILES_ALLOWED_FULL and patch and not self.incremental.is_incremental:
|
||||
avoid_load = True
|
||||
if counter_valid == MAX_FILES_ALLOWED_FULL:
|
||||
get_logger().info(f"Too many files in PR, will avoid loading full content for rest of files")
|
||||
|
||||
if avoid_load:
|
||||
if is_close_to_rate_limit:
|
||||
new_file_content_str = ""
|
||||
original_file_content_str = ""
|
||||
else:
|
||||
new_file_content_str = self._get_pr_file_content(file, self.pr.head.sha) # communication with GitHub
|
||||
# allow only a limited number of files to be fully loaded. We can manage the rest with diffs only
|
||||
counter_valid += 1
|
||||
avoid_load = False
|
||||
if counter_valid >= MAX_FILES_ALLOWED_FULL and patch and not self.incremental.is_incremental:
|
||||
avoid_load = True
|
||||
if counter_valid == MAX_FILES_ALLOWED_FULL:
|
||||
get_logger().info(f"Too many files in PR, will avoid loading full content for rest of files")
|
||||
|
||||
if self.incremental.is_incremental and self.unreviewed_files_set:
|
||||
original_file_content_str = self._get_pr_file_content(file, self.incremental.last_seen_commit_sha)
|
||||
patch = load_large_diff(file.filename, new_file_content_str, original_file_content_str)
|
||||
self.unreviewed_files_set[file.filename] = patch
|
||||
else:
|
||||
if avoid_load:
|
||||
original_file_content_str = ""
|
||||
new_file_content_str = ""
|
||||
else:
|
||||
# The base.sha will point to the current state of the base branch (including parallel merges), not the original base commit when the PR was created
|
||||
# We can fix this by finding the merge base commit between the PR head and base branches
|
||||
# Note that The pr.head.sha is actually correct as is - it points to the latest commit in your PR branch.
|
||||
# This SHA isn't affected by parallel merges to the base branch since it's specific to your PR's branch.
|
||||
repo = self.repo_obj
|
||||
pr = self.pr
|
||||
try:
|
||||
compare = repo.compare(pr.base.sha, pr.head.sha)
|
||||
merge_base_commit = compare.merge_base_commit
|
||||
except Exception as e:
|
||||
get_logger().error(f"Failed to get merge base commit: {e}")
|
||||
merge_base_commit = pr.base
|
||||
if merge_base_commit.sha != pr.base.sha:
|
||||
get_logger().info(
|
||||
f"Using merge base commit {merge_base_commit.sha} instead of base commit "
|
||||
f"{pr.base.sha} for {file.filename}")
|
||||
original_file_content_str = self._get_pr_file_content(file, merge_base_commit.sha)
|
||||
new_file_content_str = self._get_pr_file_content(file, self.pr.head.sha) # communication with GitHub
|
||||
|
||||
if not patch:
|
||||
if self.incremental.is_incremental and self.unreviewed_files_set:
|
||||
original_file_content_str = self._get_pr_file_content(file, self.incremental.last_seen_commit_sha)
|
||||
patch = load_large_diff(file.filename, new_file_content_str, original_file_content_str)
|
||||
self.unreviewed_files_set[file.filename] = patch
|
||||
else:
|
||||
if avoid_load:
|
||||
original_file_content_str = ""
|
||||
else:
|
||||
original_file_content_str = self._get_pr_file_content(file, merge_base_commit.sha)
|
||||
# original_file_content_str = self._get_pr_file_content(file, self.pr.base.sha)
|
||||
if not patch:
|
||||
patch = load_large_diff(file.filename, new_file_content_str, original_file_content_str)
|
||||
|
||||
|
||||
if file.status == 'added':
|
||||
edit_type = EDIT_TYPE.ADDED
|
||||
@ -237,9 +243,14 @@ class GithubProvider(GitProvider):
|
||||
edit_type = EDIT_TYPE.UNKNOWN
|
||||
|
||||
# count number of lines added and removed
|
||||
patch_lines = patch.splitlines(keepends=True)
|
||||
num_plus_lines = len([line for line in patch_lines if line.startswith('+')])
|
||||
num_minus_lines = len([line for line in patch_lines if line.startswith('-')])
|
||||
if hasattr(file, 'additions') and hasattr(file, 'deletions'):
|
||||
num_plus_lines = file.additions
|
||||
num_minus_lines = file.deletions
|
||||
else:
|
||||
patch_lines = patch.splitlines(keepends=True)
|
||||
num_plus_lines = len([line for line in patch_lines if line.startswith('+')])
|
||||
num_minus_lines = len([line for line in patch_lines if line.startswith('-')])
|
||||
|
||||
file_patch_canonical_structure = FilePatchInfo(original_file_content_str, new_file_content_str, patch,
|
||||
file.filename, edit_type=edit_type,
|
||||
num_plus_lines=num_plus_lines,
|
||||
|
Reference in New Issue
Block a user