diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py
index 8b574976..b2b80f24 100644
--- a/pr_agent/algo/git_patch_processing.py
+++ b/pr_agent/algo/git_patch_processing.py
@@ -285,7 +285,7 @@ def handle_patch_deletions(patch: str, original_file_content_str: str,
return patch
-def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
+def decouple_and_convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
"""
Convert a given patch string into a string with line numbers for each hunk, indicating the new and old content of
the file.
@@ -317,11 +317,17 @@ __old hunk__
line6
...
"""
- # if the file was deleted, return a message indicating that the file was deleted
- if hasattr(file, 'edit_type') and file.edit_type == EDIT_TYPE.DELETED:
- return f"\n\n## File '{file.filename.strip()}' was deleted\n"
- patch_with_lines_str = f"\n\n## File: '{file.filename.strip()}'\n"
+ # Add a header for the file
+ if file:
+ # if the file was deleted, return a message indicating that the file was deleted
+ if hasattr(file, 'edit_type') and file.edit_type == EDIT_TYPE.DELETED:
+ return f"\n\n## File '{file.filename.strip()}' was deleted\n"
+
+ patch_with_lines_str = f"\n\n## File: '{file.filename.strip()}'\n"
+ else:
+ patch_with_lines_str = ""
+
patch_lines = patch.splitlines()
RE_HUNK_HEADER = re.compile(
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
diff --git a/pr_agent/algo/language_handler.py b/pr_agent/algo/language_handler.py
index dea77260..1271cad8 100644
--- a/pr_agent/algo/language_handler.py
+++ b/pr_agent/algo/language_handler.py
@@ -19,6 +19,12 @@ def is_valid_file(filename:str, bad_extensions=None) -> bool:
bad_extensions = get_settings().bad_extensions.default
if get_settings().config.use_extra_bad_extensions:
bad_extensions += get_settings().bad_extensions.extra
+
+ auto_generated_files = ['package-lock.json', 'yarn.lock', 'composer.lock', 'Gemfile.lock', 'poetry.lock']
+ for forbidden_file in auto_generated_files:
+ if filename.endswith(forbidden_file):
+ return False
+
return filename.split('.')[-1] not in bad_extensions
@@ -41,6 +47,7 @@ def sort_files_by_main_languages(languages: Dict, files: list):
# filter out files bad extensions
files_filtered = filter_bad_extensions(files)
+
# sort files by their extension, put the files that are in the main extension first
# and the rest files after, map languages_sorted to their respective files
files_sorted = []
diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py
index 21096a86..17a973dc 100644
--- a/pr_agent/algo/pr_processing.py
+++ b/pr_agent/algo/pr_processing.py
@@ -7,7 +7,8 @@ from github import RateLimitExceededException
from pr_agent.algo.file_filter import filter_ignored
from pr_agent.algo.git_patch_processing import (
- convert_to_hunks_with_lines_numbers, extend_patch, handle_patch_deletions)
+ extend_patch, handle_patch_deletions,
+ decouple_and_convert_to_hunks_with_lines_numbers)
from pr_agent.algo.language_handler import sort_files_by_main_languages
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo
@@ -188,9 +189,10 @@ def pr_generate_extended_diff(pr_languages: list,
continue
if add_line_numbers_to_hunks:
- full_extended_patch = convert_to_hunks_with_lines_numbers(extended_patch, file)
+ full_extended_patch = decouple_and_convert_to_hunks_with_lines_numbers(extended_patch, file)
else:
- full_extended_patch = f"\n\n## File: '{file.filename.strip()}'\n{extended_patch.rstrip()}\n"
+ extended_patch = extended_patch.replace('\n@@ ', '\n\n@@ ') # add extra line before each hunk
+ full_extended_patch = f"\n\n## File: '{file.filename.strip()}'\n\n{extended_patch.strip()}\n"
# add AI-summary metadata to the patch
if file.ai_file_summary and get_settings().get("config.enable_ai_metadata", False):
@@ -233,7 +235,7 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo
continue
if convert_hunks_to_line_numbers:
- patch = convert_to_hunks_with_lines_numbers(patch, file)
+ patch = decouple_and_convert_to_hunks_with_lines_numbers(patch, file)
## add AI-summary metadata to the patch (disabled, since we are in the compressed diff)
# if file.ai_file_summary and get_settings().config.get('config.is_auto_command', False):
@@ -437,7 +439,7 @@ def get_pr_multi_diffs(git_provider: GitProvider,
# Add line numbers and metadata to the patch
if add_line_numbers:
- patch = convert_to_hunks_with_lines_numbers(patch, file)
+ patch = decouple_and_convert_to_hunks_with_lines_numbers(patch, file)
else:
patch = f"\n\n## File: '{file.filename.strip()}'\n\n{patch.strip()}\n"
@@ -488,7 +490,7 @@ def get_pr_multi_diffs(git_provider: GitProvider,
# Add the last chunk
if patches:
final_diff = "\n".join(patches)
- final_diff_list.append(final_diff)
+ final_diff_list.append(final_diff.strip())
return final_diff_list
diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py
index d883d4ef..4e42332f 100644
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@@ -704,12 +704,14 @@ def _fix_key_value(key: str, value: str):
def load_yaml(response_text: str, keys_fix_yaml: List[str] = [], first_key="", last_key="") -> dict:
+ response_text_original = copy.deepcopy(response_text)
response_text = response_text.strip('\n').removeprefix('```yaml').rstrip().removesuffix('```')
try:
data = yaml.safe_load(response_text)
except Exception as e:
get_logger().warning(f"Initial failure to parse AI prediction: {e}")
- data = try_fix_yaml(response_text, keys_fix_yaml=keys_fix_yaml, first_key=first_key, last_key=last_key)
+ data = try_fix_yaml(response_text, keys_fix_yaml=keys_fix_yaml, first_key=first_key, last_key=last_key,
+ response_text_original=response_text_original)
if not data:
get_logger().error(f"Failed to parse AI prediction after fallbacks",
artifact={'response_text': response_text})
@@ -723,7 +725,8 @@ def load_yaml(response_text: str, keys_fix_yaml: List[str] = [], first_key="", l
def try_fix_yaml(response_text: str,
keys_fix_yaml: List[str] = [],
first_key="",
- last_key="",) -> dict:
+ last_key="",
+ response_text_original="") -> dict:
response_text_lines = response_text.split('\n')
keys_yaml = ['relevant line:', 'suggestion content:', 'relevant file:', 'existing code:', 'improved code:']
@@ -745,6 +748,8 @@ def try_fix_yaml(response_text: str,
# second fallback - try to extract only range from first ```yaml to ````
snippet_pattern = r'```(yaml)?[\s\S]*?```'
snippet = re.search(snippet_pattern, '\n'.join(response_text_lines_copy))
+ if not snippet:
+ snippet = re.search(snippet_pattern, response_text_original) # before we removed the "```"
if snippet:
snippet_text = snippet.group()
try:
diff --git a/pr_agent/config_loader.py b/pr_agent/config_loader.py
index 55c80239..5dba0f39 100644
--- a/pr_agent/config_loader.py
+++ b/pr_agent/config_loader.py
@@ -19,9 +19,9 @@ global_settings = Dynaconf(
"settings/pr_questions_prompts.toml",
"settings/pr_line_questions_prompts.toml",
"settings/pr_description_prompts.toml",
- "settings/pr_code_suggestions_prompts.toml",
- "settings/pr_code_suggestions_reflect_prompts.toml",
- "settings/pr_sort_code_suggestions_prompts.toml",
+ "settings/code_suggestions/pr_code_suggestions_prompts.toml",
+ "settings/code_suggestions/pr_code_suggestions_prompts_not_decoupled.toml",
+ "settings/code_suggestions/pr_code_suggestions_reflect_prompts.toml",
"settings/pr_information_from_user_prompts.toml",
"settings/pr_update_changelog_prompts.toml",
"settings/pr_custom_labels.toml",
diff --git a/pr_agent/settings/pr_code_suggestions_prompts.toml b/pr_agent/settings/code_suggestions/pr_code_suggestions_prompts.toml
similarity index 100%
rename from pr_agent/settings/pr_code_suggestions_prompts.toml
rename to pr_agent/settings/code_suggestions/pr_code_suggestions_prompts.toml
index a81558c1..a6763023 100644
--- a/pr_agent/settings/pr_code_suggestions_prompts.toml
+++ b/pr_agent/settings/code_suggestions/pr_code_suggestions_prompts.toml
@@ -145,10 +145,10 @@ code_suggestions:
src/file1.py
language: |
python
- suggestion_content: |
- ...
existing_code: |
...
+ suggestion_content: |
+ ...
improved_code: |
...
one_sentence_summary: |
diff --git a/pr_agent/settings/code_suggestions/pr_code_suggestions_prompts_not_decoupled.toml b/pr_agent/settings/code_suggestions/pr_code_suggestions_prompts_not_decoupled.toml
new file mode 100644
index 00000000..3661b71d
--- /dev/null
+++ b/pr_agent/settings/code_suggestions/pr_code_suggestions_prompts_not_decoupled.toml
@@ -0,0 +1,158 @@
+[pr_code_suggestions_prompt_not_decoupled]
+system="""You are PR-Reviewer, an AI specializing in Pull Request (PR) code analysis and suggestions.
+{%- if not focus_only_on_problems %}
+Your task is to examine the provided code diff, focusing on new code (lines prefixed with '+'), and offer concise, actionable suggestions to fix possible bugs and problems, and enhance code quality and performance.
+{%- else %}
+Your task is to examine the provided code diff, focusing on new code (lines prefixed with '+'), and offer concise, actionable suggestions to fix critical bugs and problems.
+{%- endif %}
+
+
+The PR code diff will be in the following structured format:
+======
+## File: 'src/file1.py'
+{%- if is_ai_metadata %}
+### AI-generated changes summary:
+* ...
+* ...
+{%- endif %}
+
+@@ ... @@ def func1():
+ unchanged code line0
+ unchanged code line1
++new code line2
+-removed code line2
+ unchanged code line3
+
+@@ ... @@ def func2():
+...
+
+
+## File: 'src/file2.py'
+...
+======
+The diff structure above uses line prefixes to show changes:
+'+' → new line code added
+'-' → line code removed
+' ' → unchanged context lines
+{%- if is_ai_metadata %}
+
+When available, an AI-generated summary will precede each file's diff, with a high-level overview of the changes. Note that this summary may not be fully accurate or complete.
+{%- endif %}
+
+
+Specific guidelines for generating code suggestions:
+{%- if not focus_only_on_problems %}
+- Provide up to {{ num_code_suggestions }} distinct and insightful code suggestions.
+{%- else %}
+- Provide up to {{ num_code_suggestions }} distinct and insightful code suggestions. Return less suggestions if no pertinent ones are applicable.
+{%- endif %}
+- Focus your suggestions ONLY on improving the new code introduced in the PR (lines starting with '+' in the diff). The lines in the diff starting with '-' are only for reference and should not be considered for suggestions.
+{%- if not focus_only_on_problems %}
+- Prioritize suggestions that address potential issues, critical problems, and bugs in the PR code. Avoid repeating changes already implemented in the PR. If no pertinent suggestions are applicable, return an empty list.
+- Don't suggest to add docstring, type hints, or comments, to remove unused imports, or to use more specific exception types.
+{%- else %}
+- Only give suggestions that address critical problems and bugs in the PR code. If no relevant suggestions are applicable, return an empty list.
+- DO NOT suggest the following:
+ - change packages version
+ - add missing import statement
+ - declare undefined variable
+ - use more specific exception types
+{%- endif %}
+- When mentioning code elements (variables, names, or files) in your response, surround them with backticks (`). For example: "verify that `user_id` is..."
+- Note that you only see changed code segments (diff hunks in a PR), not the entire codebase. Avoid suggestions that might duplicate existing functionality or questioning code elements (like variables declarations or import statements) that may be defined elsewhere in the codebase.
+
+{%- if extra_instructions %}
+
+
+Extra user-provided instructions (should be addressed with high priority):
+======
+{{ extra_instructions }}
+======
+{%- endif %}
+
+
+The output must be a YAML object equivalent to type $PRCodeSuggestions, according to the following Pydantic definitions:
+=====
+class CodeSuggestion(BaseModel):
+ relevant_file: str = Field(description="Full path of the relevant file")
+ language: str = Field(description="Programming language used by the relevant file")
+ existing_code: str = Field(description="A short code snippet from the final state of the PR diff, that the suggestion aims to enhance or fix. Include only complete code lines, preserving all indentation, newlines, and original formatting. Use ellipsis (...) for brevity if needed. This snippet should represent the specific PR code targeted for improvement.")
+ suggestion_content: str = Field(description="An actionable suggestion to enhance, improve or fix the new code introduced in the PR. Don't present here actual code snippets, just the suggestion. Be short and concise")
+ improved_code: str = Field(description="A refined code snippet that replaces the 'existing_code' snippet after implementing the suggestion.")
+ one_sentence_summary: str = Field(description="A concise, single-sentence overview (up to 6 words) of the suggested improvement. Focus on the 'what'. Be general, and avoid method or variable names.")
+{%- if not focus_only_on_problems %}
+ label: str = Field(description="A single, descriptive label that best characterizes the suggestion type. Possible labels include 'security', 'possible bug', 'possible issue', 'performance', 'enhancement', 'best practice', 'maintainability', 'typo'. Other relevant labels are also acceptable.")
+{%- else %}
+ label: str = Field(description="A single, descriptive label that best characterizes the suggestion type. Possible labels include 'security', 'critical bug', 'general'. The 'general' section should be used for suggestions that address a major issue, but are not necessarily on a critical level.")
+{%- endif %}
+
+
+class PRCodeSuggestions(BaseModel):
+ code_suggestions: List[CodeSuggestion]
+=====
+
+
+Example output:
+```yaml
+code_suggestions:
+- relevant_file: |
+ src/file1.py
+ language: |
+ python
+ existing_code: |
+ ...
+ suggestion_content: |
+ ...
+ improved_code: |
+ ...
+ one_sentence_summary: |
+ ...
+ label: |
+ ...
+```
+
+Each YAML output MUST be after a newline, indented, with block scalar indicator ('|').
+"""
+
+user="""--PR Info--
+
+Title: '{{title}}'
+
+{%- if date %}
+
+Today's Date: {{date}}
+{%- endif %}
+
+The PR Diff:
+======
+{{ diff_no_line_numbers|trim }}
+======
+
+{%- if duplicate_prompt_examples %}
+
+
+Example output:
+```yaml
+code_suggestions:
+- relevant_file: |
+ src/file1.py
+ language: |
+ python
+ existing_code: |
+ ...
+ suggestion_content: |
+ ...
+ improved_code: |
+ ...
+ one_sentence_summary: |
+ ...
+ label: |
+ ...
+```
+(replace '...' with actual content)
+{%- endif %}
+
+
+Response (should be a valid YAML, and nothing else):
+```yaml
+"""
diff --git a/pr_agent/settings/pr_code_suggestions_reflect_prompts.toml b/pr_agent/settings/code_suggestions/pr_code_suggestions_reflect_prompts.toml
similarity index 92%
rename from pr_agent/settings/pr_code_suggestions_reflect_prompts.toml
rename to pr_agent/settings/code_suggestions/pr_code_suggestions_reflect_prompts.toml
index c8e4b5ac..f930eb29 100644
--- a/pr_agent/settings/pr_code_suggestions_reflect_prompts.toml
+++ b/pr_agent/settings/code_suggestions/pr_code_suggestions_reflect_prompts.toml
@@ -2,7 +2,7 @@
system="""You are an AI language model specialized in reviewing and evaluating code suggestions for a Pull Request (PR).
Your task is to analyze a PR code diff and evaluate a set of AI-generated code suggestions. These suggestions aim to address potential bugs and problems, and enhance the new code introduced in the PR.
-Examine each suggestion meticulously, assessing its quality, relevance, and accuracy within the context of PR. Keep in mind that the suggestions may vary in their correctness and accuracy. Your evaluation should be based on a thorough comparison between each suggestion and the actual PR code diff.
+Examine each suggestion meticulously, assessing its quality, relevance, and accuracy within the context of PR. Keep in mind that the suggestions may vary in their correctness, accuracy and impact.
Consider the following components of each suggestion:
1. 'one_sentence_summary' - A brief summary of the suggestion's purpose
2. 'suggestion_content' - The detailed suggestion content, explaining the proposed modification
@@ -31,9 +31,11 @@ Key guidelines for evaluation:
Additional scoring considerations:
- If the suggestion is not actionable, and only asks the user to verify or ensure a change, reduce its score by 1-2 points.
+- Error handling or type checking suggestions should not receive a score above 8 (and may be lower).
- Assign a score of 0 to suggestions aiming at:
- Adding docstring, type hints, or comments
- Remove unused imports or variables
+ - Add missing import statements
- Using more specific exception types.
@@ -82,8 +84,8 @@ The output must be a YAML object equivalent to type $PRCodeSuggestionsFeedback,
class CodeSuggestionFeedback(BaseModel):
suggestion_summary: str = Field(description="Repeated from the input")
relevant_file: str = Field(description="Repeated from the input")
- relevant_lines_start: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion starts (inclusive). Should be derived from the hunk line numbers, and correspond to the beginning of the relevant 'existing code' snippet")
- relevant_lines_end: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion ends (inclusive). Should be derived from the hunk line numbers, and correspond to the end of the relevant 'existing code' snippet")
+ relevant_lines_start: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion starts (inclusive). Should be derived from the added '__new hunk__' line numbers, and correspond to the first line of the relevant 'existing code' snippet.")
+ relevant_lines_end: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion ends (inclusive). Should be derived from the added '__new hunk__' line numbers, and correspond to the end of the relevant 'existing code' snippet")
suggestion_score: int = Field(description="Evaluate the suggestion and assign a score from 0 to 10. Give 0 if the suggestion is wrong. For valid suggestions, score from 1 (lowest impact/importance) to 10 (highest impact/importance).")
why: str = Field(description="Briefly explain the score given in 1-2 sentences, focusing on the suggestion's impact, relevance, and accuracy.")
diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml
index 8ea65e87..8bffeb87 100644
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@@ -104,7 +104,7 @@ publish_description_as_comment_persistent=true
## changes walkthrough section
enable_semantic_files_types=true
collapsible_file_list='adaptive' # true, false, 'adaptive'
-collapsible_file_list_threshold=8
+collapsible_file_list_threshold=6
inline_file_summary=false # false, true, 'table'
# markers
use_description_markers=false
@@ -146,6 +146,7 @@ max_number_of_calls = 3
parallel_calls = true
final_clip_factor = 0.8
+decouple_hunks = false
# self-review checkbox
demand_code_suggestions_self_review=false # add a checkbox for the author to self-review the code suggestions
code_suggestions_self_review_text= "**Author self-review**: I have reviewed the PR code suggestions, and addressed the relevant ones."
diff --git a/pr_agent/settings/pr_sort_code_suggestions_prompts.toml b/pr_agent/settings/pr_sort_code_suggestions_prompts.toml
deleted file mode 100644
index 33599ba3..00000000
--- a/pr_agent/settings/pr_sort_code_suggestions_prompts.toml
+++ /dev/null
@@ -1,46 +0,0 @@
-[pr_sort_code_suggestions_prompt]
-system="""
-"""
-
-user="""You are given a list of code suggestions to improve a Git Pull Request (PR):
-======
-{{ suggestion_str|trim }}
-======
-
-Your task is to sort the code suggestions by their order of importance, and return a list with sorting order.
-The sorting order is a list of pairs, where each pair contains the index of the suggestion in the original list.
-Rank the suggestions based on their importance to improving the PR, with critical issues first and minor issues last.
-
-You must use the following YAML schema to format your answer:
-```yaml
-Sort Order:
- type: array
- maxItems: {{ suggestion_list|length }}
- uniqueItems: true
- items:
- suggestion number:
- type: integer
- minimum: 1
- maximum: {{ suggestion_list|length }}
- importance order:
- type: integer
- minimum: 1
- maximum: {{ suggestion_list|length }}
-```
-
-Example output:
-```yaml
-Sort Order:
- - suggestion number: 1
- importance order: 2
- - suggestion number: 2
- importance order: 3
- - suggestion number: 3
- importance order: 1
-```
-
-Make sure to output a valid YAML. Use multi-line block scalar ('|') if needed.
-Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields.
-Response (should be a valid YAML, and nothing else):
-```yaml
-"""
diff --git a/pr_agent/tools/pr_code_suggestions.py b/pr_agent/tools/pr_code_suggestions.py
index cfd2c7a9..becee9a7 100644
--- a/pr_agent/tools/pr_code_suggestions.py
+++ b/pr_agent/tools/pr_code_suggestions.py
@@ -10,14 +10,16 @@ from typing import Dict, List
from jinja2 import Environment, StrictUndefined
+from pr_agent.algo import MAX_TOKENS
from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
+from pr_agent.algo.git_patch_processing import decouple_and_convert_to_hunks_with_lines_numbers
from pr_agent.algo.pr_processing import (add_ai_metadata_to_diff_files,
get_pr_diff, get_pr_multi_diffs,
retry_with_fallback_models)
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import (ModelType, load_yaml, replace_code_tags,
- show_relevant_configurations)
+ show_relevant_configurations, get_max_tokens, clip_tokens)
from pr_agent.config_loader import get_settings
from pr_agent.git_providers import (AzureDevopsProvider, GithubProvider,
GitLabProvider, get_git_provider,
@@ -45,14 +47,8 @@ class PRCodeSuggestions:
get_settings().config.max_model_tokens_original = get_settings().config.max_model_tokens
get_settings().config.max_model_tokens = MAX_CONTEXT_TOKENS_IMPROVE
- # extended mode
- try:
- self.is_extended = self._get_is_extended(args or [])
- except:
- self.is_extended = False
num_code_suggestions = int(get_settings().pr_code_suggestions.num_code_suggestions_per_chunk)
-
self.ai_handler = ai_handler()
self.ai_handler.main_pr_language = self.main_language
self.patches_diff = None
@@ -85,12 +81,18 @@ class PRCodeSuggestions:
"date": datetime.now().strftime('%Y-%m-%d'),
'duplicate_prompt_examples': get_settings().config.get('duplicate_prompt_examples', False),
}
- self.pr_code_suggestions_prompt_system = get_settings().pr_code_suggestions_prompt.system
+
+ if get_settings().pr_code_suggestions.get("decouple_hunks", True):
+ self.pr_code_suggestions_prompt_system = get_settings().pr_code_suggestions_prompt.system
+ self.pr_code_suggestions_prompt_user = get_settings().pr_code_suggestions_prompt.user
+ else:
+ self.pr_code_suggestions_prompt_system = get_settings().pr_code_suggestions_prompt_not_decoupled.system
+ self.pr_code_suggestions_prompt_user = get_settings().pr_code_suggestions_prompt_not_decoupled.user
self.token_handler = TokenHandler(self.git_provider.pr,
self.vars,
self.pr_code_suggestions_prompt_system,
- get_settings().pr_code_suggestions_prompt.user)
+ self.pr_code_suggestions_prompt_user)
self.progress = f"## Generating PR code suggestions\n\n"
self.progress += f"""\nWork in progress ...
\n
"""
@@ -115,11 +117,11 @@ class PRCodeSuggestions:
else:
self.git_provider.publish_comment("Preparing suggestions...", is_temporary=True)
- # call the model to get the suggestions, and self-reflect on them
- if not self.is_extended:
- data = await retry_with_fallback_models(self._prepare_prediction, model_type=ModelType.REGULAR)
- else:
- data = await retry_with_fallback_models(self._prepare_prediction_extended, model_type=ModelType.REGULAR)
+ # # call the model to get the suggestions, and self-reflect on them
+ # if not self.is_extended:
+ # data = await retry_with_fallback_models(self._prepare_prediction, model_type=ModelType.REGULAR)
+ # else:
+ data = await retry_with_fallback_models(self._prepare_prediction_extended, model_type=ModelType.REGULAR)
if not data:
data = {"code_suggestions": []}
self.data = data
@@ -623,16 +625,6 @@ class PRCodeSuggestions:
return new_code_snippet
- def _get_is_extended(self, args: list[str]) -> bool:
- """Check if extended mode should be enabled by the `--extended` flag or automatically according to the configuration"""
- if any(["extended" in arg for arg in args]):
- get_logger().info("Extended mode is enabled by the `--extended` flag")
- return True
- if get_settings().pr_code_suggestions.auto_extended_mode:
- # get_logger().info("Extended mode is enabled automatically based on the configuration toggle")
- return True
- return False
-
def validate_one_liner_suggestion_not_repeating_code(self, suggestion):
try:
existing_code = suggestion.get('existing_code', '').strip()
@@ -683,11 +675,31 @@ class PRCodeSuggestions:
return patches_diff_list
async def _prepare_prediction_extended(self, model: str) -> dict:
- self.patches_diff_list = get_pr_multi_diffs(self.git_provider, self.token_handler, model,
- max_calls=get_settings().pr_code_suggestions.max_number_of_calls)
+ # get PR diff
+ if get_settings().pr_code_suggestions.decouple_hunks:
+ self.patches_diff_list = get_pr_multi_diffs(self.git_provider,
+ self.token_handler,
+ model,
+ max_calls=get_settings().pr_code_suggestions.max_number_of_calls,
+ add_line_numbers=True) # decouple hunk with line numbers
+ self.patches_diff_list_no_line_numbers = self.remove_line_numbers(self.patches_diff_list) # decouple hunk
- # create a copy of the patches_diff_list, without line numbers for '__new hunk__' sections
- self.patches_diff_list_no_line_numbers = self.remove_line_numbers(self.patches_diff_list)
+ else:
+ # non-decoupled hunks
+ self.patches_diff_list_no_line_numbers = get_pr_multi_diffs(self.git_provider,
+ self.token_handler,
+ model,
+ max_calls=get_settings().pr_code_suggestions.max_number_of_calls,
+ add_line_numbers=False)
+ self.patches_diff_list = await self.convert_to_decoupled_with_line_numbers(
+ self.patches_diff_list_no_line_numbers, model)
+ if not self.patches_diff_list:
+ # fallback to decoupled hunks
+ self.patches_diff_list = get_pr_multi_diffs(self.git_provider,
+ self.token_handler,
+ model,
+ max_calls=get_settings().pr_code_suggestions.max_number_of_calls,
+ add_line_numbers=True) # decouple hunk with line numbers
if self.patches_diff_list:
get_logger().info(f"Number of PR chunk calls: {len(self.patches_diff_list)}")
@@ -728,6 +740,42 @@ class PRCodeSuggestions:
self.data = data = None
return data
+ async def convert_to_decoupled_with_line_numbers(self, patches_diff_list_no_line_numbers, model) -> List[str]:
+ with get_logger().contextualize(sub_feature='convert_to_decoupled_with_line_numbers'):
+ try:
+ patches_diff_list = []
+ for patch_prompt in patches_diff_list_no_line_numbers:
+ file_prefix = "## File: "
+ patches = patch_prompt.strip().split(f"\n{file_prefix}")
+ patches_new = copy.deepcopy(patches)
+ for i in range(len(patches_new)):
+ if i == 0:
+ prefix = patches_new[i].split("\n@@")[0].strip()
+ else:
+ prefix = file_prefix + patches_new[i].split("\n@@")[0][1:]
+ prefix = prefix.strip()
+ patches_new[i] = prefix + '\n\n' + decouple_and_convert_to_hunks_with_lines_numbers(patches_new[i],
+ file=None).strip()
+ patches_new[i] = patches_new[i].strip()
+ patch_final = "\n\n\n".join(patches_new)
+ if model in MAX_TOKENS:
+ max_tokens_full = MAX_TOKENS[
+ model] # note - here we take the actual max tokens, without any reductions. we do aim to get the full documentation website in the prompt
+ else:
+ max_tokens_full = get_max_tokens(model)
+ delta_output = 2000
+ token_count = self.token_handler.count_tokens(patch_final)
+ if token_count > max_tokens_full - delta_output:
+ get_logger().warning(
+ f"Token count {token_count} exceeds the limit {max_tokens_full - delta_output}. clipping the tokens")
+ patch_final = clip_tokens(patch_final, max_tokens_full - delta_output)
+ patches_diff_list.append(patch_final)
+ return patches_diff_list
+ except Exception as e:
+ get_logger().exception(f"Error converting to decoupled with line numbers",
+ artifact={'patches_diff_list_no_line_numbers': patches_diff_list_no_line_numbers})
+ return []
+
def generate_summarized_suggestions(self, data: Dict) -> str:
try:
pr_body = "## PR Code Suggestions ✨\n\n"
diff --git a/pr_agent/tools/pr_line_questions.py b/pr_agent/tools/pr_line_questions.py
index 760c81ff..a122c534 100644
--- a/pr_agent/tools/pr_line_questions.py
+++ b/pr_agent/tools/pr_line_questions.py
@@ -7,7 +7,7 @@ from jinja2 import Environment, StrictUndefined
from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
from pr_agent.algo.git_patch_processing import (
- convert_to_hunks_with_lines_numbers, extract_hunk_lines_from_patch)
+ decouple_and_convert_to_hunks_with_lines_numbers, extract_hunk_lines_from_patch)
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import ModelType
diff --git a/tests/unittest/test_extend_patch.py b/tests/unittest/test_extend_patch.py
index 91583f85..070b3d54 100644
--- a/tests/unittest/test_extend_patch.py
+++ b/tests/unittest/test_extend_patch.py
@@ -5,8 +5,8 @@ from pr_agent.algo.pr_processing import pr_generate_extended_diff
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import load_large_diff
from pr_agent.config_loader import get_settings
-get_settings().set("CONFIG.CLI_MODE", True)
-get_settings().config.allow_dynamic_context = False
+get_settings(use_context=False).set("CONFIG.CLI_MODE", True)
+get_settings(use_context=False).config.allow_dynamic_context = False
class TestExtendPatch:
@@ -61,15 +61,15 @@ class TestExtendPatch:
original_file_str = 'line1\nline2\nline3\nline4\nline5\nline6'
patch_str = '@@ -2,3 +2,3 @@ init()\n-line2\n+new_line2\n line3\n line4\n@@ -4,1 +4,1 @@ init2()\n-line4\n+new_line4' # noqa: E501
num_lines = 1
- original_allow_dynamic_context = get_settings().config.allow_dynamic_context
+ original_allow_dynamic_context = get_settings(use_context=False).config.allow_dynamic_context
- get_settings().config.allow_dynamic_context = False
+ get_settings(use_context=False).config.allow_dynamic_context = False
expected_output = '\n@@ -1,5 +1,5 @@ init()\n line1\n-line2\n+new_line2\n line3\n line4\n line5\n\n@@ -3,3 +3,3 @@ init2()\n line3\n-line4\n+new_line4\n line5' # noqa: E501
actual_output = extend_patch(original_file_str, patch_str,
patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines)
assert actual_output == expected_output
- get_settings().config.allow_dynamic_context = True
+ get_settings(use_context=False).config.allow_dynamic_context = True
expected_output = '\n@@ -1,5 +1,5 @@ init()\n line1\n-line2\n+new_line2\n line3\n line4\n line5\n\n@@ -3,3 +3,3 @@ init2()\n line3\n-line4\n+new_line4\n line5' # noqa: E501
actual_output = extend_patch(original_file_str, patch_str,
patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines)
@@ -152,8 +152,8 @@ class TestExtendedPatchMoreLines:
# Check that with no extra lines, the patches are the same as the original patches
p0 = patches_extended_no_extra_lines[0].strip()
p1 = patches_extended_no_extra_lines[1].strip()
- assert p0 == "## File: 'file1'\n" + pr_languages[0]['files'][0].patch.strip()
- assert p1 == "## File: 'file2'\n" + pr_languages[0]['files'][1].patch.strip()
+ assert p0 == "## File: 'file1'\n\n" + pr_languages[0]['files'][0].patch.strip()
+ assert p1 == "## File: 'file2'\n\n" + pr_languages[0]['files'][1].patch.strip()
patches_extended_with_extra_lines, total_tokens, patches_extended_tokens = pr_generate_extended_diff(
pr_languages, token_handler, add_line_numbers_to_hunks=False,