feat: improve patch extension with new file content comparison

This commit is contained in:
mrT23
2025-02-24 11:46:12 +02:00
parent 2a647709c4
commit 56250f5ea8
4 changed files with 103 additions and 55 deletions

View File

@ -195,13 +195,15 @@ def pr_generate_extended_diff(pr_languages: list,
for lang in pr_languages:
for file in lang['files']:
original_file_content_str = file.base_file
new_file_content_str = file.head_file
patch = file.patch
if not patch:
continue
# extend each patch with extra lines of context
extended_patch = extend_patch(original_file_content_str, patch,
patch_extra_lines_before, patch_extra_lines_after, file.filename)
patch_extra_lines_before, patch_extra_lines_after, file.filename,
new_file_str=new_file_content_str)
if not extended_patch:
get_logger().warning(f"Failed to extend patch for file: {file.filename}")
continue
@ -212,7 +214,7 @@ def pr_generate_extended_diff(pr_languages: list,
full_extended_patch = f"\n\n## File: '{file.filename.strip()}'\n{extended_patch.rstrip()}\n"
# add AI-summary metadata to the patch
if file.ai_file_summary and get_settings().get("config.enable_ai_metadata", False):
if file.ai_file_summary and get_settings().get("config.enable_ai_metadata", False):
full_extended_patch = add_ai_summary_top_patch(file, full_extended_patch)
patch_tokens = token_handler.count_tokens(full_extended_patch)
@ -384,7 +386,8 @@ def _get_all_deployments(all_models: List[str]) -> List[str]:
def get_pr_multi_diffs(git_provider: GitProvider,
token_handler: TokenHandler,
model: str,
max_calls: int = 5) -> List[str]:
max_calls: int = 5,
add_line_numbers: bool = True) -> List[str]:
"""
Retrieves the diff files from a Git provider, sorts them by main language, and generates patches for each file.
The patches are split into multiple groups based on the maximum number of tokens allowed for the given model.
@ -425,7 +428,8 @@ def get_pr_multi_diffs(git_provider: GitProvider,
# try first a single run with standard diff string, with patch extension, and no deletions
patches_extended, total_tokens, patches_extended_tokens = pr_generate_extended_diff(
pr_languages, token_handler, add_line_numbers_to_hunks=True,
pr_languages, token_handler,
add_line_numbers_to_hunks=add_line_numbers,
patch_extra_lines_before=PATCH_EXTRA_LINES_BEFORE,
patch_extra_lines_after=PATCH_EXTRA_LINES_AFTER)
@ -454,7 +458,12 @@ def get_pr_multi_diffs(git_provider: GitProvider,
if patch is None:
continue
patch = convert_to_hunks_with_lines_numbers(patch, file)
# Add line numbers and metadata to the patch
if add_line_numbers:
patch = convert_to_hunks_with_lines_numbers(patch, file)
else:
patch = f"\n\n## File: '{file.filename.strip()}'\n\n{patch.strip()}\n"
# add AI-summary metadata to the patch
if file.ai_file_summary and get_settings().get("config.enable_ai_metadata", False):
patch = add_ai_summary_top_patch(file, patch)