Add large_patch_policy configuration and implement patch clipping logic

This commit is contained in:
mrT23
2024-05-29 13:42:44 +03:00
parent 806ba3f9d8
commit 17f46bb53b
3 changed files with 23 additions and 4 deletions

View File

@ -377,9 +377,25 @@ def get_pr_multi_diffs(git_provider: GitProvider,
patch = convert_to_hunks_with_lines_numbers(patch, file)
new_patch_tokens = token_handler.count_tokens(patch)
if patch and (token_handler.prompt_tokens + new_patch_tokens) > get_max_tokens(model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD:
get_logger().warning(f"Patch too large, skipping: {file.filename}")
continue
if patch and (token_handler.prompt_tokens + new_patch_tokens) > get_max_tokens(
model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD:
if get_settings().config.get('large_patch_policy', 'skip') == 'skip':
get_logger().warning(f"Patch too large, skipping: {file.filename}")
continue
elif get_settings().config.get('large_patch_policy') == 'clip':
delta_tokens = int(0.9*(get_max_tokens(model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD - token_handler.prompt_tokens))
patch_clipped = clip_tokens(patch,delta_tokens, delete_last_line=True)
new_patch_tokens = token_handler.count_tokens(patch_clipped)
if patch_clipped and (token_handler.prompt_tokens + new_patch_tokens) > get_max_tokens(
model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD:
get_logger().warning(f"Patch too large, skipping: {file.filename}")
continue
else:
get_logger().info(f"Clipped large patch for file: {file.filename}")
patch = patch_clipped
else:
get_logger().warning(f"Patch too large, skipping: {file.filename}")
continue
if patch and (total_tokens + new_patch_tokens > get_max_tokens(model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD):
final_diff = "\n".join(patches)