mirror of
https://github.com/qodo-ai/pr-agent.git
synced 2025-07-02 03:40:38 +08:00
Add decoupled and non-decoupled modes for code suggestions
This commit is contained in:
@ -285,7 +285,7 @@ def handle_patch_deletions(patch: str, original_file_content_str: str,
|
||||
return patch
|
||||
|
||||
|
||||
def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
|
||||
def decouple_and_convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
|
||||
"""
|
||||
Convert a given patch string into a string with line numbers for each hunk, indicating the new and old content of
|
||||
the file.
|
||||
@ -317,11 +317,17 @@ __old hunk__
|
||||
line6
|
||||
...
|
||||
"""
|
||||
# if the file was deleted, return a message indicating that the file was deleted
|
||||
if hasattr(file, 'edit_type') and file.edit_type == EDIT_TYPE.DELETED:
|
||||
return f"\n\n## File '{file.filename.strip()}' was deleted\n"
|
||||
|
||||
patch_with_lines_str = f"\n\n## File: '{file.filename.strip()}'\n"
|
||||
# Add a header for the file
|
||||
if file:
|
||||
# if the file was deleted, return a message indicating that the file was deleted
|
||||
if hasattr(file, 'edit_type') and file.edit_type == EDIT_TYPE.DELETED:
|
||||
return f"\n\n## File '{file.filename.strip()}' was deleted\n"
|
||||
|
||||
patch_with_lines_str = f"\n\n## File: '{file.filename.strip()}'\n"
|
||||
else:
|
||||
patch_with_lines_str = ""
|
||||
|
||||
patch_lines = patch.splitlines()
|
||||
RE_HUNK_HEADER = re.compile(
|
||||
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
|
||||
|
@ -7,7 +7,8 @@ from github import RateLimitExceededException
|
||||
|
||||
from pr_agent.algo.file_filter import filter_ignored
|
||||
from pr_agent.algo.git_patch_processing import (
|
||||
convert_to_hunks_with_lines_numbers, extend_patch, handle_patch_deletions)
|
||||
extend_patch, handle_patch_deletions,
|
||||
decouple_and_convert_to_hunks_with_lines_numbers)
|
||||
from pr_agent.algo.language_handler import sort_files_by_main_languages
|
||||
from pr_agent.algo.token_handler import TokenHandler
|
||||
from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo
|
||||
@ -50,7 +51,7 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler,
|
||||
PATCH_EXTRA_LINES_AFTER = cap_and_log_extra_lines(PATCH_EXTRA_LINES_AFTER, "after")
|
||||
|
||||
try:
|
||||
diff_files = git_provider.get_diff_files()
|
||||
diff_files_original = git_provider.get_diff_files()
|
||||
except RateLimitExceededException as e:
|
||||
get_logger().error(f"Rate limit exceeded for git provider API. original message {e}")
|
||||
raise
|
||||
@ -144,7 +145,7 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler,
|
||||
def get_pr_diff_multiple_patchs(git_provider: GitProvider, token_handler: TokenHandler, model: str,
|
||||
add_line_numbers_to_hunks: bool = False, disable_extra_lines: bool = False):
|
||||
try:
|
||||
diff_files = git_provider.get_diff_files()
|
||||
diff_files_original = git_provider.get_diff_files()
|
||||
except RateLimitExceededException as e:
|
||||
get_logger().error(f"Rate limit exceeded for git provider API. original message {e}")
|
||||
raise
|
||||
@ -188,9 +189,10 @@ def pr_generate_extended_diff(pr_languages: list,
|
||||
continue
|
||||
|
||||
if add_line_numbers_to_hunks:
|
||||
full_extended_patch = convert_to_hunks_with_lines_numbers(extended_patch, file)
|
||||
full_extended_patch = decouple_and_convert_to_hunks_with_lines_numbers(extended_patch, file)
|
||||
else:
|
||||
full_extended_patch = f"\n\n## File: '{file.filename.strip()}'\n{extended_patch.rstrip()}\n"
|
||||
extended_patch = extended_patch.replace('\n@@ ', '\n\n@@ ') # add extra line before each hunk
|
||||
full_extended_patch = f"\n\n## File: '{file.filename.strip()}'\n\n{extended_patch.strip()}\n"
|
||||
|
||||
# add AI-summary metadata to the patch
|
||||
if file.ai_file_summary and get_settings().get("config.enable_ai_metadata", False):
|
||||
@ -233,7 +235,7 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo
|
||||
continue
|
||||
|
||||
if convert_hunks_to_line_numbers:
|
||||
patch = convert_to_hunks_with_lines_numbers(patch, file)
|
||||
patch = decouple_and_convert_to_hunks_with_lines_numbers(patch, file)
|
||||
|
||||
## add AI-summary metadata to the patch (disabled, since we are in the compressed diff)
|
||||
# if file.ai_file_summary and get_settings().config.get('config.is_auto_command', False):
|
||||
@ -437,7 +439,7 @@ def get_pr_multi_diffs(git_provider: GitProvider,
|
||||
|
||||
# Add line numbers and metadata to the patch
|
||||
if add_line_numbers:
|
||||
patch = convert_to_hunks_with_lines_numbers(patch, file)
|
||||
patch = decouple_and_convert_to_hunks_with_lines_numbers(patch, file)
|
||||
else:
|
||||
patch = f"\n\n## File: '{file.filename.strip()}'\n\n{patch.strip()}\n"
|
||||
|
||||
@ -488,7 +490,7 @@ def get_pr_multi_diffs(git_provider: GitProvider,
|
||||
# Add the last chunk
|
||||
if patches:
|
||||
final_diff = "\n".join(patches)
|
||||
final_diff_list.append(final_diff)
|
||||
final_diff_list.append(final_diff.strip())
|
||||
|
||||
return final_diff_list
|
||||
|
||||
|
@ -19,9 +19,9 @@ global_settings = Dynaconf(
|
||||
"settings/pr_questions_prompts.toml",
|
||||
"settings/pr_line_questions_prompts.toml",
|
||||
"settings/pr_description_prompts.toml",
|
||||
"settings/pr_code_suggestions_prompts.toml",
|
||||
"settings/pr_code_suggestions_reflect_prompts.toml",
|
||||
"settings/pr_sort_code_suggestions_prompts.toml",
|
||||
"settings/code_suggestions/pr_code_suggestions_prompts.toml",
|
||||
"settings/code_suggestions/pr_code_suggestions_prompts_not_decoupled.toml",
|
||||
"settings/code_suggestions/pr_code_suggestions_reflect_prompts.toml",
|
||||
"settings/pr_information_from_user_prompts.toml",
|
||||
"settings/pr_update_changelog_prompts.toml",
|
||||
"settings/pr_custom_labels.toml",
|
||||
|
@ -145,10 +145,10 @@ code_suggestions:
|
||||
src/file1.py
|
||||
language: |
|
||||
python
|
||||
suggestion_content: |
|
||||
...
|
||||
existing_code: |
|
||||
...
|
||||
suggestion_content: |
|
||||
...
|
||||
improved_code: |
|
||||
...
|
||||
one_sentence_summary: |
|
@ -0,0 +1,158 @@
|
||||
[pr_code_suggestions_prompt_not_decoupled]
|
||||
system="""You are PR-Reviewer, an AI specializing in Pull Request (PR) code analysis and suggestions.
|
||||
{%- if not focus_only_on_problems %}
|
||||
Your task is to examine the provided code diff, focusing on new code (lines prefixed with '+'), and offer concise, actionable suggestions to fix possible bugs and problems, and enhance code quality and performance.
|
||||
{%- else %}
|
||||
Your task is to examine the provided code diff, focusing on new code (lines prefixed with '+'), and offer concise, actionable suggestions to fix critical bugs and problems.
|
||||
{%- endif %}
|
||||
|
||||
|
||||
The PR code diff will be in the following structured format:
|
||||
======
|
||||
## File: 'src/file1.py'
|
||||
{%- if is_ai_metadata %}
|
||||
### AI-generated changes summary:
|
||||
* ...
|
||||
* ...
|
||||
{%- endif %}
|
||||
|
||||
@@ ... @@ def func1():
|
||||
unchanged code line0
|
||||
unchanged code line1
|
||||
+new code line2
|
||||
-removed code line2
|
||||
unchanged code line3
|
||||
|
||||
@@ ... @@ def func2():
|
||||
...
|
||||
|
||||
|
||||
## File: 'src/file2.py'
|
||||
...
|
||||
======
|
||||
The diff structure above uses line prefixes to show changes:
|
||||
'+' → new line code added
|
||||
'-' → line code removed
|
||||
' ' → unchanged context lines
|
||||
{%- if is_ai_metadata %}
|
||||
|
||||
When available, an AI-generated summary will precede each file's diff, with a high-level overview of the changes. Note that this summary may not be fully accurate or complete.
|
||||
{%- endif %}
|
||||
|
||||
|
||||
Specific guidelines for generating code suggestions:
|
||||
{%- if not focus_only_on_problems %}
|
||||
- Provide up to {{ num_code_suggestions }} distinct and insightful code suggestions.
|
||||
{%- else %}
|
||||
- Provide up to {{ num_code_suggestions }} distinct and insightful code suggestions. Return less suggestions if no pertinent ones are applicable.
|
||||
{%- endif %}
|
||||
- Focus your suggestions ONLY on improving the new code introduced in the PR (lines starting with '+' in the diff). The lines in the diff starting with '-' are only for reference and should not be considered for suggestions.
|
||||
{%- if not focus_only_on_problems %}
|
||||
- Prioritize suggestions that address potential issues, critical problems, and bugs in the PR code. Avoid repeating changes already implemented in the PR. If no pertinent suggestions are applicable, return an empty list.
|
||||
- Don't suggest to add docstring, type hints, or comments, to remove unused imports, or to use more specific exception types.
|
||||
{%- else %}
|
||||
- Only give suggestions that address critical problems and bugs in the PR code. If no relevant suggestions are applicable, return an empty list.
|
||||
- DO NOT suggest the following:
|
||||
- change packages version
|
||||
- add missing import statement
|
||||
- declare undefined variable
|
||||
- use more specific exception types
|
||||
{%- endif %}
|
||||
- When mentioning code elements (variables, names, or files) in your response, surround them with backticks (`). For example: "verify that `user_id` is..."
|
||||
- Note that you only see changed code segments (diff hunks in a PR), not the entire codebase. Avoid suggestions that might duplicate existing functionality or questioning code elements (like variables declarations or import statements) that may be defined elsewhere in the codebase.
|
||||
|
||||
{%- if extra_instructions %}
|
||||
|
||||
|
||||
Extra user-provided instructions (should be addressed with high priority):
|
||||
======
|
||||
{{ extra_instructions }}
|
||||
======
|
||||
{%- endif %}
|
||||
|
||||
|
||||
The output must be a YAML object equivalent to type $PRCodeSuggestions, according to the following Pydantic definitions:
|
||||
=====
|
||||
class CodeSuggestion(BaseModel):
|
||||
relevant_file: str = Field(description="Full path of the relevant file")
|
||||
language: str = Field(description="Programming language used by the relevant file")
|
||||
existing_code: str = Field(description="A short code snippet from the final state of the PR diff, that the suggestion aims to enhance or fix. Include only complete code lines, preserving all indentation, newlines, and original formatting. Use ellipsis (...) for brevity if needed. This snippet should represent the specific PR code targeted for improvement.")
|
||||
suggestion_content: str = Field(description="An actionable suggestion to enhance, improve or fix the new code introduced in the PR. Don't present here actual code snippets, just the suggestion. Be short and concise")
|
||||
improved_code: str = Field(description="A refined code snippet that replaces the 'existing_code' snippet after implementing the suggestion.")
|
||||
one_sentence_summary: str = Field(description="A concise, single-sentence overview (up to 6 words) of the suggested improvement. Focus on the 'what'. Be general, and avoid method or variable names.")
|
||||
{%- if not focus_only_on_problems %}
|
||||
label: str = Field(description="A single, descriptive label that best characterizes the suggestion type. Possible labels include 'security', 'possible bug', 'possible issue', 'performance', 'enhancement', 'best practice', 'maintainability', 'typo'. Other relevant labels are also acceptable.")
|
||||
{%- else %}
|
||||
label: str = Field(description="A single, descriptive label that best characterizes the suggestion type. Possible labels include 'security', 'critical bug', 'general'. The 'general' section should be used for suggestions that address a major issue, but are not necessarily on a critical level.")
|
||||
{%- endif %}
|
||||
|
||||
|
||||
class PRCodeSuggestions(BaseModel):
|
||||
code_suggestions: List[CodeSuggestion]
|
||||
=====
|
||||
|
||||
|
||||
Example output:
|
||||
```yaml
|
||||
code_suggestions:
|
||||
- relevant_file: |
|
||||
src/file1.py
|
||||
language: |
|
||||
python
|
||||
existing_code: |
|
||||
...
|
||||
suggestion_content: |
|
||||
...
|
||||
improved_code: |
|
||||
...
|
||||
one_sentence_summary: |
|
||||
...
|
||||
label: |
|
||||
...
|
||||
```
|
||||
|
||||
Each YAML output MUST be after a newline, indented, with block scalar indicator ('|').
|
||||
"""
|
||||
|
||||
user="""--PR Info--
|
||||
|
||||
Title: '{{title}}'
|
||||
|
||||
{%- if date %}
|
||||
|
||||
Today's Date: {{date}}
|
||||
{%- endif %}
|
||||
|
||||
The PR Diff:
|
||||
======
|
||||
{{ diff_no_line_numbers|trim }}
|
||||
======
|
||||
|
||||
{%- if duplicate_prompt_examples %}
|
||||
|
||||
|
||||
Example output:
|
||||
```yaml
|
||||
code_suggestions:
|
||||
- relevant_file: |
|
||||
src/file1.py
|
||||
language: |
|
||||
python
|
||||
existing_code: |
|
||||
...
|
||||
suggestion_content: |
|
||||
...
|
||||
improved_code: |
|
||||
...
|
||||
one_sentence_summary: |
|
||||
...
|
||||
label: |
|
||||
...
|
||||
```
|
||||
(replace '...' with actual content)
|
||||
{%- endif %}
|
||||
|
||||
|
||||
Response (should be a valid YAML, and nothing else):
|
||||
```yaml
|
||||
"""
|
@ -146,6 +146,7 @@ max_number_of_calls = 3
|
||||
parallel_calls = true
|
||||
|
||||
final_clip_factor = 0.8
|
||||
decouple_hunks = false
|
||||
# self-review checkbox
|
||||
demand_code_suggestions_self_review=false # add a checkbox for the author to self-review the code suggestions
|
||||
code_suggestions_self_review_text= "**Author self-review**: I have reviewed the PR code suggestions, and addressed the relevant ones."
|
||||
|
@ -1,46 +0,0 @@
|
||||
[pr_sort_code_suggestions_prompt]
|
||||
system="""
|
||||
"""
|
||||
|
||||
user="""You are given a list of code suggestions to improve a Git Pull Request (PR):
|
||||
======
|
||||
{{ suggestion_str|trim }}
|
||||
======
|
||||
|
||||
Your task is to sort the code suggestions by their order of importance, and return a list with sorting order.
|
||||
The sorting order is a list of pairs, where each pair contains the index of the suggestion in the original list.
|
||||
Rank the suggestions based on their importance to improving the PR, with critical issues first and minor issues last.
|
||||
|
||||
You must use the following YAML schema to format your answer:
|
||||
```yaml
|
||||
Sort Order:
|
||||
type: array
|
||||
maxItems: {{ suggestion_list|length }}
|
||||
uniqueItems: true
|
||||
items:
|
||||
suggestion number:
|
||||
type: integer
|
||||
minimum: 1
|
||||
maximum: {{ suggestion_list|length }}
|
||||
importance order:
|
||||
type: integer
|
||||
minimum: 1
|
||||
maximum: {{ suggestion_list|length }}
|
||||
```
|
||||
|
||||
Example output:
|
||||
```yaml
|
||||
Sort Order:
|
||||
- suggestion number: 1
|
||||
importance order: 2
|
||||
- suggestion number: 2
|
||||
importance order: 3
|
||||
- suggestion number: 3
|
||||
importance order: 1
|
||||
```
|
||||
|
||||
Make sure to output a valid YAML. Use multi-line block scalar ('|') if needed.
|
||||
Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields.
|
||||
Response (should be a valid YAML, and nothing else):
|
||||
```yaml
|
||||
"""
|
@ -10,14 +10,16 @@ from typing import Dict, List
|
||||
|
||||
from jinja2 import Environment, StrictUndefined
|
||||
|
||||
from pr_agent.algo import MAX_TOKENS
|
||||
from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
|
||||
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
|
||||
from pr_agent.algo.git_patch_processing import decouple_and_convert_to_hunks_with_lines_numbers
|
||||
from pr_agent.algo.pr_processing import (add_ai_metadata_to_diff_files,
|
||||
get_pr_diff, get_pr_multi_diffs,
|
||||
retry_with_fallback_models)
|
||||
from pr_agent.algo.token_handler import TokenHandler
|
||||
from pr_agent.algo.utils import (ModelType, load_yaml, replace_code_tags,
|
||||
show_relevant_configurations)
|
||||
show_relevant_configurations, get_max_tokens, clip_tokens)
|
||||
from pr_agent.config_loader import get_settings
|
||||
from pr_agent.git_providers import (AzureDevopsProvider, GithubProvider,
|
||||
GitLabProvider, get_git_provider,
|
||||
@ -45,14 +47,8 @@ class PRCodeSuggestions:
|
||||
get_settings().config.max_model_tokens_original = get_settings().config.max_model_tokens
|
||||
get_settings().config.max_model_tokens = MAX_CONTEXT_TOKENS_IMPROVE
|
||||
|
||||
# extended mode
|
||||
try:
|
||||
self.is_extended = self._get_is_extended(args or [])
|
||||
except:
|
||||
self.is_extended = False
|
||||
num_code_suggestions = int(get_settings().pr_code_suggestions.num_code_suggestions_per_chunk)
|
||||
|
||||
|
||||
self.ai_handler = ai_handler()
|
||||
self.ai_handler.main_pr_language = self.main_language
|
||||
self.patches_diff = None
|
||||
@ -85,12 +81,18 @@ class PRCodeSuggestions:
|
||||
"date": datetime.now().strftime('%Y-%m-%d'),
|
||||
'duplicate_prompt_examples': get_settings().config.get('duplicate_prompt_examples', False),
|
||||
}
|
||||
self.pr_code_suggestions_prompt_system = get_settings().pr_code_suggestions_prompt.system
|
||||
|
||||
if get_settings().pr_code_suggestions.get("decouple_hunks", True):
|
||||
self.pr_code_suggestions_prompt_system = get_settings().pr_code_suggestions_prompt.system
|
||||
self.pr_code_suggestions_prompt_user = get_settings().pr_code_suggestions_prompt.user
|
||||
else:
|
||||
self.pr_code_suggestions_prompt_system = get_settings().pr_code_suggestions_prompt_not_decoupled.system
|
||||
self.pr_code_suggestions_prompt_user = get_settings().pr_code_suggestions_prompt_not_decoupled.user
|
||||
|
||||
self.token_handler = TokenHandler(self.git_provider.pr,
|
||||
self.vars,
|
||||
self.pr_code_suggestions_prompt_system,
|
||||
get_settings().pr_code_suggestions_prompt.user)
|
||||
self.pr_code_suggestions_prompt_user)
|
||||
|
||||
self.progress = f"## Generating PR code suggestions\n\n"
|
||||
self.progress += f"""\nWork in progress ...<br>\n<img src="https://codium.ai/images/pr_agent/dual_ball_loading-crop.gif" width=48>"""
|
||||
@ -115,11 +117,11 @@ class PRCodeSuggestions:
|
||||
else:
|
||||
self.git_provider.publish_comment("Preparing suggestions...", is_temporary=True)
|
||||
|
||||
# call the model to get the suggestions, and self-reflect on them
|
||||
if not self.is_extended:
|
||||
data = await retry_with_fallback_models(self._prepare_prediction, model_type=ModelType.REGULAR)
|
||||
else:
|
||||
data = await retry_with_fallback_models(self._prepare_prediction_extended, model_type=ModelType.REGULAR)
|
||||
# # call the model to get the suggestions, and self-reflect on them
|
||||
# if not self.is_extended:
|
||||
# data = await retry_with_fallback_models(self._prepare_prediction, model_type=ModelType.REGULAR)
|
||||
# else:
|
||||
data = await retry_with_fallback_models(self._prepare_prediction_extended, model_type=ModelType.REGULAR)
|
||||
if not data:
|
||||
data = {"code_suggestions": []}
|
||||
self.data = data
|
||||
@ -623,16 +625,6 @@ class PRCodeSuggestions:
|
||||
|
||||
return new_code_snippet
|
||||
|
||||
def _get_is_extended(self, args: list[str]) -> bool:
|
||||
"""Check if extended mode should be enabled by the `--extended` flag or automatically according to the configuration"""
|
||||
if any(["extended" in arg for arg in args]):
|
||||
get_logger().info("Extended mode is enabled by the `--extended` flag")
|
||||
return True
|
||||
if get_settings().pr_code_suggestions.auto_extended_mode:
|
||||
# get_logger().info("Extended mode is enabled automatically based on the configuration toggle")
|
||||
return True
|
||||
return False
|
||||
|
||||
def validate_one_liner_suggestion_not_repeating_code(self, suggestion):
|
||||
try:
|
||||
existing_code = suggestion.get('existing_code', '').strip()
|
||||
@ -683,11 +675,31 @@ class PRCodeSuggestions:
|
||||
return patches_diff_list
|
||||
|
||||
async def _prepare_prediction_extended(self, model: str) -> dict:
|
||||
self.patches_diff_list = get_pr_multi_diffs(self.git_provider, self.token_handler, model,
|
||||
max_calls=get_settings().pr_code_suggestions.max_number_of_calls)
|
||||
# get PR diff
|
||||
if get_settings().pr_code_suggestions.decouple_hunks:
|
||||
self.patches_diff_list = get_pr_multi_diffs(self.git_provider,
|
||||
self.token_handler,
|
||||
model,
|
||||
max_calls=get_settings().pr_code_suggestions.max_number_of_calls,
|
||||
add_line_numbers=True) # decouple hunk with line numbers
|
||||
self.patches_diff_list_no_line_numbers = self.remove_line_numbers(self.patches_diff_list) # decouple hunk
|
||||
|
||||
# create a copy of the patches_diff_list, without line numbers for '__new hunk__' sections
|
||||
self.patches_diff_list_no_line_numbers = self.remove_line_numbers(self.patches_diff_list)
|
||||
else:
|
||||
# non-decoupled hunks
|
||||
self.patches_diff_list_no_line_numbers = get_pr_multi_diffs(self.git_provider,
|
||||
self.token_handler,
|
||||
model,
|
||||
max_calls=get_settings().pr_code_suggestions.max_number_of_calls,
|
||||
add_line_numbers=False)
|
||||
self.patches_diff_list = await self.convert_to_decoupled_with_line_numbers(
|
||||
self.patches_diff_list_no_line_numbers, model)
|
||||
if not self.patches_diff_list:
|
||||
# fallback to decoupled hunks
|
||||
self.patches_diff_list = get_pr_multi_diffs(self.git_provider,
|
||||
self.token_handler,
|
||||
model,
|
||||
max_calls=get_settings().pr_code_suggestions.max_number_of_calls,
|
||||
add_line_numbers=True) # decouple hunk with line numbers
|
||||
|
||||
if self.patches_diff_list:
|
||||
get_logger().info(f"Number of PR chunk calls: {len(self.patches_diff_list)}")
|
||||
@ -728,6 +740,42 @@ class PRCodeSuggestions:
|
||||
self.data = data = None
|
||||
return data
|
||||
|
||||
async def convert_to_decoupled_with_line_numbers(self, patches_diff_list_no_line_numbers, model) -> List[str]:
|
||||
with get_logger().contextualize(sub_feature='convert_to_decoupled_with_line_numbers'):
|
||||
try:
|
||||
patches_diff_list = []
|
||||
for patch_prompt in patches_diff_list_no_line_numbers:
|
||||
file_prefix = "## File: "
|
||||
patches = patch_prompt.strip().split(f"\n{file_prefix}")
|
||||
patches_new = copy.deepcopy(patches)
|
||||
for i in range(len(patches_new)):
|
||||
if i == 0:
|
||||
prefix = patches_new[i].split("\n@@")[0].strip()
|
||||
else:
|
||||
prefix = file_prefix + patches_new[i].split("\n@@")[0][1:]
|
||||
prefix = prefix.strip()
|
||||
patches_new[i] = prefix + '\n\n' + decouple_and_convert_to_hunks_with_lines_numbers(patches_new[i],
|
||||
file=None).strip()
|
||||
patches_new[i] = patches_new[i].strip()
|
||||
patch_final = "\n\n\n".join(patches_new)
|
||||
if model in MAX_TOKENS:
|
||||
max_tokens_full = MAX_TOKENS[
|
||||
model] # note - here we take the actual max tokens, without any reductions. we do aim to get the full documentation website in the prompt
|
||||
else:
|
||||
max_tokens_full = get_max_tokens(model)
|
||||
delta_output = 2000
|
||||
token_count = self.token_handler.count_tokens(patch_final)
|
||||
if token_count > max_tokens_full - delta_output:
|
||||
get_logger().warning(
|
||||
f"Token count {token_count} exceeds the limit {max_tokens_full - delta_output}. clipping the tokens")
|
||||
patch_final = clip_tokens(patch_final, max_tokens_full - delta_output)
|
||||
patches_diff_list.append(patch_final)
|
||||
return patches_diff_list
|
||||
except Exception as e:
|
||||
get_logger().exception(f"Error converting to decoupled with line numbers",
|
||||
artifact={'patches_diff_list_no_line_numbers': patches_diff_list_no_line_numbers})
|
||||
return []
|
||||
|
||||
def generate_summarized_suggestions(self, data: Dict) -> str:
|
||||
try:
|
||||
pr_body = "## PR Code Suggestions ✨\n\n"
|
||||
|
@ -7,7 +7,7 @@ from jinja2 import Environment, StrictUndefined
|
||||
from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
|
||||
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
|
||||
from pr_agent.algo.git_patch_processing import (
|
||||
convert_to_hunks_with_lines_numbers, extract_hunk_lines_from_patch)
|
||||
decouple_and_convert_to_hunks_with_lines_numbers, extract_hunk_lines_from_patch)
|
||||
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
|
||||
from pr_agent.algo.token_handler import TokenHandler
|
||||
from pr_agent.algo.utils import ModelType
|
||||
|
@ -5,8 +5,8 @@ from pr_agent.algo.pr_processing import pr_generate_extended_diff
|
||||
from pr_agent.algo.token_handler import TokenHandler
|
||||
from pr_agent.algo.utils import load_large_diff
|
||||
from pr_agent.config_loader import get_settings
|
||||
get_settings().set("CONFIG.CLI_MODE", True)
|
||||
get_settings().config.allow_dynamic_context = False
|
||||
get_settings(use_context=False).set("CONFIG.CLI_MODE", True)
|
||||
get_settings(use_context=False).config.allow_dynamic_context = False
|
||||
|
||||
|
||||
class TestExtendPatch:
|
||||
@ -61,15 +61,15 @@ class TestExtendPatch:
|
||||
original_file_str = 'line1\nline2\nline3\nline4\nline5\nline6'
|
||||
patch_str = '@@ -2,3 +2,3 @@ init()\n-line2\n+new_line2\n line3\n line4\n@@ -4,1 +4,1 @@ init2()\n-line4\n+new_line4' # noqa: E501
|
||||
num_lines = 1
|
||||
original_allow_dynamic_context = get_settings().config.allow_dynamic_context
|
||||
original_allow_dynamic_context = get_settings(use_context=False).config.allow_dynamic_context
|
||||
|
||||
get_settings().config.allow_dynamic_context = False
|
||||
get_settings(use_context=False).config.allow_dynamic_context = False
|
||||
expected_output = '\n@@ -1,5 +1,5 @@ init()\n line1\n-line2\n+new_line2\n line3\n line4\n line5\n\n@@ -3,3 +3,3 @@ init2()\n line3\n-line4\n+new_line4\n line5' # noqa: E501
|
||||
actual_output = extend_patch(original_file_str, patch_str,
|
||||
patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines)
|
||||
assert actual_output == expected_output
|
||||
|
||||
get_settings().config.allow_dynamic_context = True
|
||||
get_settings(use_context=False).config.allow_dynamic_context = True
|
||||
expected_output = '\n@@ -1,5 +1,5 @@ init()\n line1\n-line2\n+new_line2\n line3\n line4\n line5\n\n@@ -3,3 +3,3 @@ init2()\n line3\n-line4\n+new_line4\n line5' # noqa: E501
|
||||
actual_output = extend_patch(original_file_str, patch_str,
|
||||
patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines)
|
||||
@ -152,8 +152,8 @@ class TestExtendedPatchMoreLines:
|
||||
# Check that with no extra lines, the patches are the same as the original patches
|
||||
p0 = patches_extended_no_extra_lines[0].strip()
|
||||
p1 = patches_extended_no_extra_lines[1].strip()
|
||||
assert p0 == "## File: 'file1'\n" + pr_languages[0]['files'][0].patch.strip()
|
||||
assert p1 == "## File: 'file2'\n" + pr_languages[0]['files'][1].patch.strip()
|
||||
assert p0 == "## File: 'file1'\n\n" + pr_languages[0]['files'][0].patch.strip()
|
||||
assert p1 == "## File: 'file2'\n\n" + pr_languages[0]['files'][1].patch.strip()
|
||||
|
||||
patches_extended_with_extra_lines, total_tokens, patches_extended_tokens = pr_generate_extended_diff(
|
||||
pr_languages, token_handler, add_line_numbers_to_hunks=False,
|
||||
|
Reference in New Issue
Block a user