mirror of
https://github.com/qodo-ai/pr-agent.git
synced 2025-07-02 11:50:37 +08:00
Merge pull request #3 from Codium-ai/algo/combine_modified_files_one_list
Combine all modified and deleted files that been compressed to the prompt
This commit is contained in:
@ -96,7 +96,7 @@ def handle_patch_deletions(patch: str, original_file_content_str: str,
|
|||||||
# logic for handling deleted files - don't show patch, just show that the file was deleted
|
# logic for handling deleted files - don't show patch, just show that the file was deleted
|
||||||
if settings.config.verbosity_level > 0:
|
if settings.config.verbosity_level > 0:
|
||||||
logging.info(f"Processing file: {file_name}, minimizing deletion file")
|
logging.info(f"Processing file: {file_name}, minimizing deletion file")
|
||||||
patch = "File was deleted\n"
|
patch = None # file was deleted
|
||||||
else:
|
else:
|
||||||
patch_lines = patch.splitlines()
|
patch_lines = patch.splitlines()
|
||||||
patch_new = omit_deletion_hunks(patch_lines)
|
patch_new = omit_deletion_hunks(patch_lines)
|
||||||
|
@ -2,7 +2,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import difflib
|
import difflib
|
||||||
import logging
|
import logging
|
||||||
from typing import Any, Dict, Tuple
|
from typing import Any, Dict, Tuple, Union
|
||||||
|
|
||||||
from pr_agent.algo.git_patch_processing import extend_patch, handle_patch_deletions
|
from pr_agent.algo.git_patch_processing import extend_patch, handle_patch_deletions
|
||||||
from pr_agent.algo.language_handler import sort_files_by_main_languages
|
from pr_agent.algo.language_handler import sort_files_by_main_languages
|
||||||
@ -10,11 +10,15 @@ from pr_agent.algo.token_handler import TokenHandler
|
|||||||
from pr_agent.config_loader import settings
|
from pr_agent.config_loader import settings
|
||||||
from pr_agent.git_providers import GithubProvider
|
from pr_agent.git_providers import GithubProvider
|
||||||
|
|
||||||
|
DELETED_FILES_ = "Deleted files:\n"
|
||||||
|
|
||||||
|
MORE_MODIFIED_FILES_ = "More modified files:\n"
|
||||||
|
|
||||||
OUTPUT_BUFFER_TOKENS = 800
|
OUTPUT_BUFFER_TOKENS = 800
|
||||||
PATCH_EXTRA_LINES = 3
|
PATCH_EXTRA_LINES = 3
|
||||||
|
|
||||||
|
|
||||||
def get_pr_diff(git_provider: [GithubProvider, Any], token_handler: TokenHandler) -> str:
|
def get_pr_diff(git_provider: Union[GithubProvider, Any], token_handler: TokenHandler) -> str:
|
||||||
"""
|
"""
|
||||||
Returns a string with the diff of the PR.
|
Returns a string with the diff of the PR.
|
||||||
If needed, apply diff minimization techniques to reduce the number of tokens
|
If needed, apply diff minimization techniques to reduce the number of tokens
|
||||||
@ -32,8 +36,15 @@ def get_pr_diff(git_provider: [GithubProvider, Any], token_handler: TokenHandler
|
|||||||
return "\n".join(patches_extended)
|
return "\n".join(patches_extended)
|
||||||
|
|
||||||
# if we are over the limit, start pruning
|
# if we are over the limit, start pruning
|
||||||
patches_compressed = pr_generate_compressed_diff(pr_languages, token_handler)
|
patches_compressed, modified_file_names, deleted_file_names = pr_generate_compressed_diff(pr_languages, token_handler)
|
||||||
return "\n".join(patches_compressed)
|
final_diff = "\n".join(patches_compressed)
|
||||||
|
if modified_file_names:
|
||||||
|
modified_list_str = MORE_MODIFIED_FILES_ + "\n".join(modified_file_names)
|
||||||
|
final_diff = final_diff + "\n\n" + modified_list_str
|
||||||
|
if deleted_file_names:
|
||||||
|
deleted_list_str = DELETED_FILES_ + "\n".join(deleted_file_names)
|
||||||
|
final_diff = final_diff + "\n\n" + deleted_list_str
|
||||||
|
return final_diff
|
||||||
|
|
||||||
|
|
||||||
def pr_generate_extended_diff(pr_languages: list, token_handler: TokenHandler) -> \
|
def pr_generate_extended_diff(pr_languages: list, token_handler: TokenHandler) -> \
|
||||||
@ -67,7 +78,7 @@ def pr_generate_extended_diff(pr_languages: list, token_handler: TokenHandler) -
|
|||||||
return patches_extended, total_tokens
|
return patches_extended, total_tokens
|
||||||
|
|
||||||
|
|
||||||
def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler) -> list:
|
def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler) -> Tuple(list, list, list):
|
||||||
# Apply Diff Minimization techniques to reduce the number of tokens:
|
# Apply Diff Minimization techniques to reduce the number of tokens:
|
||||||
# 0. Start from the largest diff patch to smaller ones
|
# 0. Start from the largest diff patch to smaller ones
|
||||||
# 1. Don't use extend context lines around diff
|
# 1. Don't use extend context lines around diff
|
||||||
@ -76,7 +87,8 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler) ->
|
|||||||
# 4. Minimize all remaining files when you reach token limit
|
# 4. Minimize all remaining files when you reach token limit
|
||||||
|
|
||||||
patches = []
|
patches = []
|
||||||
|
modified_files = []
|
||||||
|
deleted_files = []
|
||||||
# sort each one of the languages in top_langs by the number of tokens in the diff
|
# sort each one of the languages in top_langs by the number of tokens in the diff
|
||||||
sorted_files = []
|
sorted_files = []
|
||||||
for lang in top_langs:
|
for lang in top_langs:
|
||||||
@ -94,6 +106,12 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler) ->
|
|||||||
# removing delete-only hunks
|
# removing delete-only hunks
|
||||||
patch = handle_patch_deletions(patch, original_file_content_str,
|
patch = handle_patch_deletions(patch, original_file_content_str,
|
||||||
new_file_content_str, file.filename)
|
new_file_content_str, file.filename)
|
||||||
|
if patch is None:
|
||||||
|
if not deleted_files:
|
||||||
|
total_tokens += token_handler.count_tokens(DELETED_FILES_)
|
||||||
|
deleted_files.append(file.filename)
|
||||||
|
total_tokens += token_handler.count_tokens(file.filename) + 1
|
||||||
|
continue
|
||||||
new_patch_tokens = token_handler.count_tokens(patch)
|
new_patch_tokens = token_handler.count_tokens(patch)
|
||||||
|
|
||||||
if total_tokens > token_handler.limit - OUTPUT_BUFFER_TOKENS // 2:
|
if total_tokens > token_handler.limit - OUTPUT_BUFFER_TOKENS // 2:
|
||||||
@ -105,14 +123,19 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler) ->
|
|||||||
# until we meet the requirements
|
# until we meet the requirements
|
||||||
if settings.config.verbosity_level >= 2:
|
if settings.config.verbosity_level >= 2:
|
||||||
logging.warning(f"Patch too large, minimizing it, {file.filename}")
|
logging.warning(f"Patch too large, minimizing it, {file.filename}")
|
||||||
patch = "File was modified"
|
patch = None
|
||||||
|
if not modified_files:
|
||||||
|
total_tokens += token_handler.count_tokens(MORE_MODIFIED_FILES_)
|
||||||
|
modified_files.append(file.filename)
|
||||||
|
total_tokens += token_handler.count_tokens(file.filename) + 1
|
||||||
if patch:
|
if patch:
|
||||||
patch_final = f"## {file.filename}\n\n{patch}\n"
|
patch_final = f"## {file.filename}\n\n{patch}\n"
|
||||||
patches.append(patch_final)
|
patches.append(patch_final)
|
||||||
total_tokens += token_handler.count_tokens(patch_final)
|
total_tokens += token_handler.count_tokens(patch_final)
|
||||||
if settings.config.verbosity_level >= 2:
|
if settings.config.verbosity_level >= 2:
|
||||||
logging.info(f"Tokens: {total_tokens}, last filename: {file.filename}")
|
logging.info(f"Tokens: {total_tokens}, last filename: {file.filename}")
|
||||||
return patches
|
|
||||||
|
return patches, modified_files, deleted_files
|
||||||
|
|
||||||
|
|
||||||
def load_large_diff(file, new_file_content_str: str, original_file_content_str: str, patch: str) -> str:
|
def load_large_diff(file, new_file_content_str: str, original_file_content_str: str, patch: str) -> str:
|
||||||
|
@ -62,7 +62,7 @@ class TestHandlePatchDeletions:
|
|||||||
new_file_content_str = ''
|
new_file_content_str = ''
|
||||||
file_name = 'file.py'
|
file_name = 'file.py'
|
||||||
assert handle_patch_deletions(patch, original_file_content_str, new_file_content_str,
|
assert handle_patch_deletions(patch, original_file_content_str, new_file_content_str,
|
||||||
file_name) == 'File was deleted\n'
|
file_name) is None
|
||||||
|
|
||||||
# Tests that handle_patch_deletions returns the original patch when patch and patch_new are equal
|
# Tests that handle_patch_deletions returns the original patch when patch and patch_new are equal
|
||||||
def test_handle_patch_deletions_edge_case_patch_and_patch_new_are_equal(self):
|
def test_handle_patch_deletions_edge_case_patch_and_patch_new_are_equal(self):
|
||||||
|
Reference in New Issue
Block a user