mirror of
https://github.com/qodo-ai/pr-agent.git
synced 2025-07-03 04:10:49 +08:00
Merge pull request #439 from Codium-ai/tr/fixes_added_files
Enhancement of Patch Handling and PR Processing
This commit is contained in:
@ -23,7 +23,7 @@ def filter_ignored(files):
|
||||
|
||||
# keep filenames that _don't_ match the ignore regex
|
||||
for r in compiled_patterns:
|
||||
files = [f for f in files if not r.match(f.filename)]
|
||||
files = [f for f in files if (f.filename and not r.match(f.filename))]
|
||||
|
||||
except Exception as e:
|
||||
print(f"Could not filter file list: {e}")
|
||||
|
@ -3,6 +3,7 @@ from __future__ import annotations
|
||||
import re
|
||||
|
||||
from pr_agent.config_loader import get_settings
|
||||
from pr_agent.git_providers.git_provider import EDIT_TYPE
|
||||
from pr_agent.log import get_logger
|
||||
|
||||
|
||||
@ -115,7 +116,7 @@ def omit_deletion_hunks(patch_lines) -> str:
|
||||
|
||||
|
||||
def handle_patch_deletions(patch: str, original_file_content_str: str,
|
||||
new_file_content_str: str, file_name: str) -> str:
|
||||
new_file_content_str: str, file_name: str, edit_type: EDIT_TYPE = EDIT_TYPE.UNKNOWN) -> str:
|
||||
"""
|
||||
Handle entire file or deletion patches.
|
||||
|
||||
@ -132,7 +133,7 @@ def handle_patch_deletions(patch: str, original_file_content_str: str,
|
||||
str: The modified patch with deletion hunks omitted.
|
||||
|
||||
"""
|
||||
if not new_file_content_str:
|
||||
if not new_file_content_str and edit_type != EDIT_TYPE.ADDED:
|
||||
# logic for handling deleted files - don't show patch, just show that the file was deleted
|
||||
if get_settings().config.verbosity_level > 0:
|
||||
get_logger().info(f"Processing file: {file_name}, minimizing deletion file")
|
||||
|
@ -13,12 +13,14 @@ from pr_agent.algo.file_filter import filter_ignored
|
||||
from pr_agent.algo.token_handler import TokenHandler, get_token_encoder
|
||||
from pr_agent.algo.utils import get_max_tokens
|
||||
from pr_agent.config_loader import get_settings
|
||||
from pr_agent.git_providers.git_provider import FilePatchInfo, GitProvider
|
||||
from pr_agent.git_providers.git_provider import FilePatchInfo, GitProvider, EDIT_TYPE
|
||||
from pr_agent.log import get_logger
|
||||
|
||||
DELETED_FILES_ = "Deleted files:\n"
|
||||
|
||||
MORE_MODIFIED_FILES_ = "More modified files:\n"
|
||||
MORE_MODIFIED_FILES_ = "Additional modified files (insufficient token budget to process):\n"
|
||||
|
||||
ADDED_FILES_ = "Additional added files (insufficient token budget to process):\n"
|
||||
|
||||
OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD = 1000
|
||||
OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD = 600
|
||||
@ -68,10 +70,13 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: s
|
||||
return "\n".join(patches_extended)
|
||||
|
||||
# if we are over the limit, start pruning
|
||||
patches_compressed, modified_file_names, deleted_file_names = \
|
||||
patches_compressed, modified_file_names, deleted_file_names, added_file_names = \
|
||||
pr_generate_compressed_diff(pr_languages, token_handler, model, add_line_numbers_to_hunks)
|
||||
|
||||
final_diff = "\n".join(patches_compressed)
|
||||
if added_file_names:
|
||||
added_list_str = ADDED_FILES_ + "\n".join(added_file_names)
|
||||
final_diff = final_diff + "\n\n" + added_list_str
|
||||
if modified_file_names:
|
||||
modified_list_str = MORE_MODIFIED_FILES_ + "\n".join(modified_file_names)
|
||||
final_diff = final_diff + "\n\n" + modified_list_str
|
||||
@ -122,7 +127,7 @@ def pr_generate_extended_diff(pr_languages: list,
|
||||
|
||||
|
||||
def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, model: str,
|
||||
convert_hunks_to_line_numbers: bool) -> Tuple[list, list, list]:
|
||||
convert_hunks_to_line_numbers: bool) -> Tuple[list, list, list, list]:
|
||||
"""
|
||||
Generate a compressed diff string for a pull request, using diff minimization techniques to reduce the number of
|
||||
tokens used.
|
||||
@ -148,6 +153,7 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo
|
||||
"""
|
||||
|
||||
patches = []
|
||||
added_files_list = []
|
||||
modified_files_list = []
|
||||
deleted_files_list = []
|
||||
# sort each one of the languages in top_langs by the number of tokens in the diff
|
||||
@ -165,7 +171,7 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo
|
||||
|
||||
# removing delete-only hunks
|
||||
patch = handle_patch_deletions(patch, original_file_content_str,
|
||||
new_file_content_str, file.filename)
|
||||
new_file_content_str, file.filename, file.edit_type)
|
||||
if patch is None:
|
||||
if not deleted_files_list:
|
||||
total_tokens += token_handler.count_tokens(DELETED_FILES_)
|
||||
@ -190,10 +196,15 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo
|
||||
# until we meet the requirements
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
get_logger().warning(f"Patch too large, minimizing it, {file.filename}")
|
||||
if not modified_files_list:
|
||||
total_tokens += token_handler.count_tokens(MORE_MODIFIED_FILES_)
|
||||
modified_files_list.append(file.filename)
|
||||
total_tokens += token_handler.count_tokens(file.filename) + 1
|
||||
if file.edit_type == EDIT_TYPE.ADDED:
|
||||
if not added_files_list:
|
||||
total_tokens += token_handler.count_tokens(ADDED_FILES_)
|
||||
added_files_list.append(file.filename)
|
||||
else:
|
||||
if not modified_files_list:
|
||||
total_tokens += token_handler.count_tokens(MORE_MODIFIED_FILES_)
|
||||
modified_files_list.append(file.filename)
|
||||
total_tokens += token_handler.count_tokens(file.filename) + 1
|
||||
continue
|
||||
|
||||
if patch:
|
||||
@ -206,7 +217,7 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
get_logger().info(f"Tokens: {total_tokens}, last filename: {file.filename}")
|
||||
|
||||
return patches, modified_files_list, deleted_files_list
|
||||
return patches, modified_files_list, deleted_files_list, added_files_list
|
||||
|
||||
|
||||
async def retry_with_fallback_models(f: Callable):
|
||||
@ -397,7 +408,7 @@ def get_pr_multi_diffs(git_provider: GitProvider,
|
||||
continue
|
||||
|
||||
# Remove delete-only hunks
|
||||
patch = handle_patch_deletions(patch, original_file_content_str, new_file_content_str, file.filename)
|
||||
patch = handle_patch_deletions(patch, original_file_content_str, new_file_content_str, file.filename, file.edit_type)
|
||||
if patch is None:
|
||||
continue
|
||||
|
||||
|
@ -328,20 +328,26 @@ def set_custom_labels(variables):
|
||||
variables["custom_labels_examples"] = f" - {list(labels.keys())[0]}"
|
||||
|
||||
|
||||
def get_user_labels(current_labels):
|
||||
## Only keep labels that has been added by the user
|
||||
if current_labels is None:
|
||||
current_labels = []
|
||||
user_labels = []
|
||||
for label in current_labels:
|
||||
if label in ['Bug fix', 'Tests', 'Refactoring', 'Enhancement', 'Documentation', 'Other']:
|
||||
continue
|
||||
if get_settings().config.enable_custom_labels:
|
||||
if label in get_settings().custom_labels:
|
||||
def get_user_labels(current_labels: List[str] = None):
|
||||
"""
|
||||
Only keep labels that has been added by the user
|
||||
"""
|
||||
try:
|
||||
if current_labels is None:
|
||||
current_labels = []
|
||||
user_labels = []
|
||||
for label in current_labels:
|
||||
if label.lower() in ['bug fix', 'tests', 'refactoring', 'enhancement', 'documentation', 'other']:
|
||||
continue
|
||||
user_labels.append(label)
|
||||
if user_labels:
|
||||
get_logger().info(f"Keeping user labels: {user_labels}")
|
||||
if get_settings().config.enable_custom_labels:
|
||||
if label in get_settings().custom_labels:
|
||||
continue
|
||||
user_labels.append(label)
|
||||
if user_labels:
|
||||
get_logger().info(f"Keeping user labels: {user_labels}")
|
||||
except Exception as e:
|
||||
get_logger().exception(f"Failed to get user labels: {e}")
|
||||
return current_labels
|
||||
return user_labels
|
||||
|
||||
|
||||
|
@ -9,7 +9,7 @@ from starlette_context import context
|
||||
from ..algo.pr_processing import find_line_number_of_relevant_line_in_file
|
||||
from ..config_loader import get_settings
|
||||
from ..log import get_logger
|
||||
from .git_provider import FilePatchInfo, GitProvider
|
||||
from .git_provider import FilePatchInfo, GitProvider, EDIT_TYPE
|
||||
|
||||
|
||||
class BitbucketProvider(GitProvider):
|
||||
@ -132,14 +132,24 @@ class BitbucketProvider(GitProvider):
|
||||
diff.old.get_data("links")
|
||||
)
|
||||
new_file_content_str = self._get_pr_file_content(diff.new.get_data("links"))
|
||||
diff_files.append(
|
||||
FilePatchInfo(
|
||||
original_file_content_str,
|
||||
new_file_content_str,
|
||||
diff_split[index],
|
||||
diff.new.path,
|
||||
)
|
||||
file_patch_canonic_structure = FilePatchInfo(
|
||||
original_file_content_str,
|
||||
new_file_content_str,
|
||||
diff_split[index],
|
||||
diff.new.path,
|
||||
)
|
||||
|
||||
if diff.data['status'] == 'added':
|
||||
file_patch_canonic_structure.edit_type = EDIT_TYPE.ADDED
|
||||
elif diff.data['status'] == 'removed':
|
||||
file_patch_canonic_structure.edit_type = EDIT_TYPE.DELETED
|
||||
elif diff.data['status'] == 'modified':
|
||||
file_patch_canonic_structure.edit_type = EDIT_TYPE.MODIFIED
|
||||
elif diff.data['status'] == 'renamed':
|
||||
file_patch_canonic_structure.edit_type = EDIT_TYPE.RENAMED
|
||||
diff_files.append(file_patch_canonic_structure)
|
||||
|
||||
|
||||
self.diff_files = diff_files
|
||||
return diff_files
|
||||
|
||||
@ -288,6 +298,11 @@ class BitbucketProvider(GitProvider):
|
||||
})
|
||||
|
||||
response = requests.request("PUT", self.bitbucket_pull_request_api_url, headers=self.headers, data=payload)
|
||||
try:
|
||||
if response.status_code != 200:
|
||||
get_logger().info(f"Failed to update description, error code: {response.status_code}")
|
||||
except:
|
||||
pass
|
||||
return response
|
||||
|
||||
# bitbucket does not support labels
|
||||
|
@ -13,6 +13,7 @@ class EDIT_TYPE(Enum):
|
||||
DELETED = 2
|
||||
MODIFIED = 3
|
||||
RENAMED = 4
|
||||
UNKNOWN = 5
|
||||
|
||||
|
||||
@dataclass
|
||||
@ -22,7 +23,7 @@ class FilePatchInfo:
|
||||
patch: str
|
||||
filename: str
|
||||
tokens: int = -1
|
||||
edit_type: EDIT_TYPE = EDIT_TYPE.MODIFIED
|
||||
edit_type: EDIT_TYPE = EDIT_TYPE.UNKNOWN
|
||||
old_filename: str = None
|
||||
|
||||
|
||||
@ -94,10 +95,10 @@ class GitProvider(ABC):
|
||||
def get_pr_description(self, *, full: bool = True) -> str:
|
||||
from pr_agent.config_loader import get_settings
|
||||
from pr_agent.algo.pr_processing import clip_tokens
|
||||
max_tokens = get_settings().get("CONFIG.MAX_DESCRIPTION_TOKENS", None)
|
||||
max_tokens_description = get_settings().get("CONFIG.MAX_DESCRIPTION_TOKENS", None)
|
||||
description = self.get_pr_description_full() if full else self.get_user_description()
|
||||
if max_tokens:
|
||||
return clip_tokens(description, max_tokens)
|
||||
if max_tokens_description:
|
||||
return clip_tokens(description, max_tokens_description)
|
||||
return description
|
||||
|
||||
def get_user_description(self) -> str:
|
||||
@ -153,6 +154,8 @@ def get_main_pr_language(languages, files) -> str:
|
||||
# validate that the specific commit uses the main language
|
||||
extension_list = []
|
||||
for file in files:
|
||||
if not file:
|
||||
continue
|
||||
if isinstance(file, str):
|
||||
file = FilePatchInfo(base_file=None, head_file=None, patch=None, filename=file)
|
||||
extension_list.append(file.filename.rsplit('.')[-1])
|
||||
|
@ -13,7 +13,7 @@ from ..algo.utils import load_large_diff
|
||||
from ..config_loader import get_settings
|
||||
from ..log import get_logger
|
||||
from ..servers.utils import RateLimitExceeded
|
||||
from .git_provider import FilePatchInfo, GitProvider, IncrementalPR
|
||||
from .git_provider import FilePatchInfo, GitProvider, IncrementalPR, EDIT_TYPE
|
||||
|
||||
|
||||
class GithubProvider(GitProvider):
|
||||
@ -129,7 +129,20 @@ class GithubProvider(GitProvider):
|
||||
if not patch:
|
||||
patch = load_large_diff(file.filename, new_file_content_str, original_file_content_str)
|
||||
|
||||
diff_files.append(FilePatchInfo(original_file_content_str, new_file_content_str, patch, file.filename))
|
||||
if file.status == 'added':
|
||||
edit_type = EDIT_TYPE.ADDED
|
||||
elif file.status == 'removed':
|
||||
edit_type = EDIT_TYPE.DELETED
|
||||
elif file.status == 'renamed':
|
||||
edit_type = EDIT_TYPE.RENAMED
|
||||
elif file.status == 'modified':
|
||||
edit_type = EDIT_TYPE.MODIFIED
|
||||
else:
|
||||
get_logger().error(f"Unknown edit type: {file.status}")
|
||||
edit_type = EDIT_TYPE.UNKNOWN
|
||||
file_patch_canonical_structure = FilePatchInfo(original_file_content_str, new_file_content_str, patch,
|
||||
file.filename, edit_type=edit_type)
|
||||
diff_files.append(file_patch_canonical_structure)
|
||||
|
||||
self.diff_files = diff_files
|
||||
return diff_files
|
||||
|
@ -152,7 +152,8 @@ PR Analysis:
|
||||
Focused PR: no, because ...
|
||||
{%- endif %}
|
||||
{%- if require_estimate_effort_to_review %}
|
||||
Estimated effort to review [1-5]: 3, because ...
|
||||
Estimated effort to review [1-5]: |-
|
||||
3, because ...
|
||||
{%- endif %}
|
||||
PR Feedback:
|
||||
General PR suggestions: |-
|
||||
|
Reference in New Issue
Block a user