From e9535ea1649e21a46de28d9b4f82aa48964ca7e2 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 18 Aug 2024 17:45:18 +0300 Subject: [PATCH 1/3] Add dynamic context handling in git patch processing - Introduce `allow_dynamic_context` and `max_extra_lines_before_dynamic_context` settings. - Adjust context limits dynamically based on section headers. - Add logging for dynamic context adjustments and section header findings. --- pr_agent/algo/git_patch_processing.py | 52 +++++++++++++++++++++------ pr_agent/settings/configuration.toml | 2 ++ 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py index 00f16d7b..dc254fea 100644 --- a/pr_agent/algo/git_patch_processing.py +++ b/pr_agent/algo/git_patch_processing.py @@ -17,6 +17,16 @@ def extend_patch(original_file_str, patch_str, patch_extra_lines_before=0, patch except UnicodeDecodeError: return "" + allow_dynamic_context = get_settings().config.allow_dynamic_context + max_extra_lines_before_dynamic_context = get_settings().config.max_extra_lines_before_dynamic_context + patch_extra_lines_before_original = patch_extra_lines_before + patch_extra_lines_before_new = patch_extra_lines_before + if allow_dynamic_context: + if max_extra_lines_before_dynamic_context > patch_extra_lines_before: + patch_extra_lines_before_new = max_extra_lines_before_dynamic_context + else: + get_logger().warning(f"'max_extra_lines_before_dynamic_context' should be greater than 'patch_extra_lines_before'") + original_lines = original_file_str.splitlines() len_original_lines = len(original_lines) patch_lines = patch_str.splitlines() @@ -48,18 +58,40 @@ def extend_patch(original_file_str, patch_str, patch_extra_lines_before=0, patch section_header = res[4] if patch_extra_lines_before > 0 or patch_extra_lines_after > 0: - extended_start1 = max(1, start1 - patch_extra_lines_before) - extended_size1 = size1 + (start1 - extended_start1) + patch_extra_lines_after - extended_start2 = max(1, start2 - patch_extra_lines_before) - extended_size2 = size2 + (start2 - extended_start2) + patch_extra_lines_after - if extended_start1 - 1 + extended_size1 > len_original_lines: - # we cannot extend beyond the original file - delta_cap = extended_start1 - 1 + extended_size1 - len_original_lines - extended_size1 = max(extended_size1 - delta_cap, size1) - extended_size2 = max(extended_size2 - delta_cap, size2) + def _calc_context_limits(patch_lines_before): + extended_start1 = max(1, start1 - patch_lines_before) + extended_size1 = size1 + (start1 - extended_start1) + patch_extra_lines_after + extended_start2 = max(1, start2 - patch_lines_before) + extended_size2 = size2 + (start2 - extended_start2) + patch_extra_lines_after + if extended_start1 - 1 + extended_size1 > len_original_lines: + # we cannot extend beyond the original file + delta_cap = extended_start1 - 1 + extended_size1 - len_original_lines + extended_size1 = max(extended_size1 - delta_cap, size1) + extended_size2 = max(extended_size2 - delta_cap, size2) + return extended_start1, extended_size1, extended_start2, extended_size2 + + extended_start1, extended_size1, extended_start2, extended_size2 =\ + _calc_context_limits(patch_extra_lines_before_new) + + if allow_dynamic_context: + lines_before = original_lines[extended_start1 - 1:start1 - 1] + found_header = False + for i,line, in enumerate(lines_before): + if section_header in line: + found_header = True + extended_start1 = extended_start1 + i + get_logger().debug(f"Found section header in line {i} before the hunk") + section_header = '' + break + if not found_header: + get_logger().debug(f"Section header not found in the extra lines before the hunk") + extended_start1, extended_size1, extended_start2, extended_size2 = \ + _calc_context_limits(patch_extra_lines_before_original) + + delta_lines = original_lines[extended_start1 - 1:start1 - 1] delta_lines = [f' {line}' for line in delta_lines] - if section_header: + if section_header and not allow_dynamic_context: for line in delta_lines: if section_header in line: section_header = '' # remove section header if it is in the extra delta lines diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 67273c46..c6aa1e38 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -20,6 +20,8 @@ max_commits_tokens = 500 max_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities. custom_model_max_tokens=-1 # for models not in the default list # +allow_dynamic_context=true +max_extra_lines_before_dynamic_context = 10 # will try to include up to 10 extra lines before the hunk in the patch, until we reach an enclosing function or class patch_extra_lines_before = 3 # Number of extra lines (+3 default ones) to include before each hunk in the patch patch_extra_lines_after = 1 # Number of extra lines (+3 default ones) to include after each hunk in the patch secret_provider="" From fc40ca9196bca1fe0adcb8f5c8af55096228af76 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Mon, 19 Aug 2024 08:38:26 +0300 Subject: [PATCH 2/3] Refactor dynamic context handling in git patch processing and update configuration default --- pr_agent/algo/git_patch_processing.py | 16 ++++++++-------- pr_agent/settings/configuration.toml | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py index dc254fea..de216f6a 100644 --- a/pr_agent/algo/git_patch_processing.py +++ b/pr_agent/algo/git_patch_processing.py @@ -19,11 +19,10 @@ def extend_patch(original_file_str, patch_str, patch_extra_lines_before=0, patch allow_dynamic_context = get_settings().config.allow_dynamic_context max_extra_lines_before_dynamic_context = get_settings().config.max_extra_lines_before_dynamic_context - patch_extra_lines_before_original = patch_extra_lines_before - patch_extra_lines_before_new = patch_extra_lines_before + patch_extra_lines_before_dynamic = patch_extra_lines_before if allow_dynamic_context: if max_extra_lines_before_dynamic_context > patch_extra_lines_before: - patch_extra_lines_before_new = max_extra_lines_before_dynamic_context + patch_extra_lines_before_dynamic = max_extra_lines_before_dynamic_context else: get_logger().warning(f"'max_extra_lines_before_dynamic_context' should be greater than 'patch_extra_lines_before'") @@ -70,10 +69,9 @@ def extend_patch(original_file_str, patch_str, patch_extra_lines_before=0, patch extended_size2 = max(extended_size2 - delta_cap, size2) return extended_start1, extended_size1, extended_start2, extended_size2 - extended_start1, extended_size1, extended_start2, extended_size2 =\ - _calc_context_limits(patch_extra_lines_before_new) - if allow_dynamic_context: + extended_start1, extended_size1, extended_start2, extended_size2 = \ + _calc_context_limits(patch_extra_lines_before_dynamic) lines_before = original_lines[extended_start1 - 1:start1 - 1] found_header = False for i,line, in enumerate(lines_before): @@ -86,8 +84,10 @@ def extend_patch(original_file_str, patch_str, patch_extra_lines_before=0, patch if not found_header: get_logger().debug(f"Section header not found in the extra lines before the hunk") extended_start1, extended_size1, extended_start2, extended_size2 = \ - _calc_context_limits(patch_extra_lines_before_original) - + _calc_context_limits(patch_extra_lines_before) + else: + extended_start1, extended_size1, extended_start2, extended_size2 = \ + _calc_context_limits(patch_extra_lines_before) delta_lines = original_lines[extended_start1 - 1:start1 - 1] delta_lines = [f' {line}' for line in delta_lines] diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index c6aa1e38..80638e2c 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -20,7 +20,7 @@ max_commits_tokens = 500 max_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities. custom_model_max_tokens=-1 # for models not in the default list # -allow_dynamic_context=true +allow_dynamic_context=false max_extra_lines_before_dynamic_context = 10 # will try to include up to 10 extra lines before the hunk in the patch, until we reach an enclosing function or class patch_extra_lines_before = 3 # Number of extra lines (+3 default ones) to include before each hunk in the patch patch_extra_lines_after = 1 # Number of extra lines (+3 default ones) to include after each hunk in the patch From 8fd8d298e7bee3fd4cc3ef47c5536a30a6315bfe Mon Sep 17 00:00:00 2001 From: mrT23 Date: Mon, 19 Aug 2024 10:33:47 +0300 Subject: [PATCH 3/3] Add unit tests for dynamic context handling in extend_patch function --- tests/unittest/test_extend_patch.py | 30 +++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/unittest/test_extend_patch.py b/tests/unittest/test_extend_patch.py index 493f28e7..e76fcaae 100644 --- a/tests/unittest/test_extend_patch.py +++ b/tests/unittest/test_extend_patch.py @@ -2,9 +2,13 @@ import pytest from pr_agent.algo.git_patch_processing import extend_patch from pr_agent.algo.pr_processing import pr_generate_extended_diff from pr_agent.algo.token_handler import TokenHandler +from pr_agent.config_loader import get_settings class TestExtendPatch: + def setUp(self): + get_settings().config.allow_dynamic_context = False + # Tests that the function works correctly with valid input def test_happy_path(self): original_file_str = 'line1\nline2\nline3\nline4\nline5' @@ -61,8 +65,34 @@ class TestExtendPatch: patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines) assert actual_output == expected_output + def test_dynamic_context(self): + get_settings().config.max_extra_lines_before_dynamic_context = 10 + original_file_str = "def foo():" + for i in range(9): + original_file_str += f"\n line({i})" + patch_str ="@@ -11,1 +11,1 @@ def foo():\n- line(9)\n+ new_line(9)" + num_lines=1 + + get_settings().config.allow_dynamic_context = True + actual_output = extend_patch(original_file_str, patch_str, + patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines) + expected_output='\n@@ -1,10 +1,10 @@ \n def foo():\n line(0)\n line(1)\n line(2)\n line(3)\n line(4)\n line(5)\n line(6)\n line(7)\n line(8)\n- line(9)\n+ new_line(9)' + assert actual_output == expected_output + + get_settings().config.allow_dynamic_context = False + actual_output2 = extend_patch(original_file_str, patch_str, + patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines) + expected_output_no_dynamic_context = '\n@@ -10,1 +10,1 @@ def foo():\n line(8)\n- line(9)\n+ new_line(9)' + assert actual_output2 == expected_output_no_dynamic_context + + + + class TestExtendedPatchMoreLines: + def setUp(self): + get_settings().config.allow_dynamic_context = False + class File: def __init__(self, base_file, patch, filename): self.base_file = base_file