mirror of
https://github.com/qodo-ai/pr-agent.git
synced 2025-07-07 06:10:39 +08:00
Compare commits
38 Commits
hl/keep_on
...
hl/test_do
Author | SHA1 | Date | |
---|---|---|---|
5fb8bc1927 | |||
e9315c7d28 | |||
e878bbbe36 | |||
67b61d722d | |||
7d89b82967 | |||
c5f9bbbf92 | |||
a5e5a82952 | |||
ccbb62b50a | |||
1df36c6a44 | |||
9e5e9afe92 | |||
5e43c202dd | |||
37e6608e68 | |||
f64d5f1e2a | |||
8fdf174dec | |||
29d4f98b19 | |||
737792d83c | |||
7e5889061c | |||
755e04cf65 | |||
44d6c95714 | |||
14610d5375 | |||
f9c832d6cb | |||
c2bec614e5 | |||
49725e92f2 | |||
a1e32d8331 | |||
0293412a42 | |||
10ec0a1812 | |||
69b68b78f5 | |||
c5bc4b44ff | |||
39e5102a2e | |||
6c82bc9a3e | |||
54f41dd603 | |||
094f641fb5 | |||
a35a75eb34 | |||
5a7c118b56 | |||
cf9e0fbbc5 | |||
ef9af261ed | |||
ff79776410 | |||
ec3f2fb485 |
@ -1,5 +1,5 @@
|
||||
|
||||
## Installation
|
||||
### Installation
|
||||
|
||||
To get started with PR-Agent quickly, you first need to acquire two tokens:
|
||||
|
||||
|
@ -1,3 +1,6 @@
|
||||
## Unreleased
|
||||
- review tool now posts persistent comments by default
|
||||
|
||||
## [Version 0.9] - 2023-10-29
|
||||
- codiumai/pr-agent:0.9
|
||||
- codiumai/pr-agent:0.9-github_app
|
||||
|
2
Usage.md
2
Usage.md
@ -173,7 +173,7 @@ push_commands = [
|
||||
"/auto_review -i --pr_reviewer.remove_previous_review_comment=true",
|
||||
]
|
||||
```
|
||||
The means that when new code is pused to the PR, the PR-Agent will run the `describe` and incremental `auto_review` tools.
|
||||
The means that when new code is pushed to the PR, the PR-Agent will run the `describe` and incremental `auto_review` tools.
|
||||
For the describe tool, the `add_original_user_description` and `keep_original_user_title` parameters will be set to true.
|
||||
For the `auto_review` tool, it will run in incremental mode, and the `remove_previous_review_comment` parameter will be set to true.
|
||||
|
||||
|
@ -27,6 +27,8 @@ Under the section 'pr_description', the [configuration file](./../pr_agent/setti
|
||||
|
||||
- `extra_instructions`: Optional extra instructions to the tool. For example: "focus on the changes in the file X. Ignore change in ...".
|
||||
- To enable `custom labels`, apply the configuration changes described [here](./GENERATE_CUSTOM_LABELS.md#configuration-changes)
|
||||
- `enable_pr_type`: if set to false, it will not show the `PR type` as a text value in the description content. Default is true.
|
||||
|
||||
### Markers template
|
||||
|
||||
markers enable to easily integrate user's content and auto-generated content, with a template-like mechanism.
|
||||
|
@ -24,6 +24,8 @@ Under the section 'pr_reviewer', the [configuration file](./../pr_agent/settings
|
||||
- `num_code_suggestions`: number of code suggestions provided by the 'review' tool. Default is 4.
|
||||
- `inline_code_comments`: if set to true, the tool will publish the code suggestions as comments on the code diff. Default is false.
|
||||
- `automatic_review`: if set to false, no automatic reviews will be done. Default is true.
|
||||
- `remove_previous_review_comment`: if set to true, the tool will remove the previous review comment before adding a new one. Default is false.
|
||||
- `persistent_comment`: if set to true, the review comment will be persistent. Default is true.
|
||||
- `extra_instructions`: Optional extra instructions to the tool. For example: "focus on the changes in the file X. Ignore change in ...".
|
||||
- To enable `custom labels`, apply the configuration changes described [here](./GENERATE_CUSTOM_LABELS.md#configuration-changes)
|
||||
#### Incremental Mode
|
||||
|
@ -46,10 +46,13 @@ class PRAgent:
|
||||
apply_repo_settings(pr_url)
|
||||
|
||||
# Then, apply user specific settings if exists
|
||||
request = request.replace("'", "\\'")
|
||||
lexer = shlex.shlex(request, posix=True)
|
||||
lexer.whitespace_split = True
|
||||
action, *args = list(lexer)
|
||||
if isinstance(request, str):
|
||||
request = request.replace("'", "\\'")
|
||||
lexer = shlex.shlex(request, posix=True)
|
||||
lexer.whitespace_split = True
|
||||
action, *args = list(lexer)
|
||||
else:
|
||||
action, *args = request
|
||||
args = update_settings_from_args(args)
|
||||
|
||||
action = action.lstrip("/").lower()
|
||||
|
@ -8,6 +8,7 @@ MAX_TOKENS = {
|
||||
'gpt-4': 8000,
|
||||
'gpt-4-0613': 8000,
|
||||
'gpt-4-32k': 32000,
|
||||
'gpt-4-1106-preview': 128000, # 128K, but may be limited by config.max_model_tokens
|
||||
'claude-instant-1': 100000,
|
||||
'claude-2': 100000,
|
||||
'command-nightly': 4096,
|
||||
|
@ -23,7 +23,7 @@ def filter_ignored(files):
|
||||
|
||||
# keep filenames that _don't_ match the ignore regex
|
||||
for r in compiled_patterns:
|
||||
files = [f for f in files if not r.match(f.filename)]
|
||||
files = [f for f in files if (f.filename and not r.match(f.filename))]
|
||||
|
||||
except Exception as e:
|
||||
print(f"Could not filter file list: {e}")
|
||||
|
@ -3,6 +3,7 @@ from __future__ import annotations
|
||||
import re
|
||||
|
||||
from pr_agent.config_loader import get_settings
|
||||
from pr_agent.git_providers.git_provider import EDIT_TYPE
|
||||
from pr_agent.log import get_logger
|
||||
|
||||
|
||||
@ -115,7 +116,7 @@ def omit_deletion_hunks(patch_lines) -> str:
|
||||
|
||||
|
||||
def handle_patch_deletions(patch: str, original_file_content_str: str,
|
||||
new_file_content_str: str, file_name: str) -> str:
|
||||
new_file_content_str: str, file_name: str, edit_type: EDIT_TYPE = EDIT_TYPE.UNKNOWN) -> str:
|
||||
"""
|
||||
Handle entire file or deletion patches.
|
||||
|
||||
@ -132,7 +133,7 @@ def handle_patch_deletions(patch: str, original_file_content_str: str,
|
||||
str: The modified patch with deletion hunks omitted.
|
||||
|
||||
"""
|
||||
if not new_file_content_str:
|
||||
if not new_file_content_str and edit_type != EDIT_TYPE.ADDED:
|
||||
# logic for handling deleted files - don't show patch, just show that the file was deleted
|
||||
if get_settings().config.verbosity_level > 0:
|
||||
get_logger().info(f"Processing file: {file_name}, minimizing deletion file")
|
||||
|
@ -7,18 +7,20 @@ from typing import Any, Callable, List, Tuple
|
||||
|
||||
from github import RateLimitExceededException
|
||||
|
||||
from pr_agent.algo import MAX_TOKENS
|
||||
from pr_agent.algo.git_patch_processing import convert_to_hunks_with_lines_numbers, extend_patch, handle_patch_deletions
|
||||
from pr_agent.algo.language_handler import sort_files_by_main_languages
|
||||
from pr_agent.algo.file_filter import filter_ignored
|
||||
from pr_agent.algo.token_handler import TokenHandler, get_token_encoder
|
||||
from pr_agent.algo.utils import get_max_tokens
|
||||
from pr_agent.config_loader import get_settings
|
||||
from pr_agent.git_providers.git_provider import FilePatchInfo, GitProvider
|
||||
from pr_agent.git_providers.git_provider import FilePatchInfo, GitProvider, EDIT_TYPE
|
||||
from pr_agent.log import get_logger
|
||||
|
||||
DELETED_FILES_ = "Deleted files:\n"
|
||||
|
||||
MORE_MODIFIED_FILES_ = "More modified files:\n"
|
||||
MORE_MODIFIED_FILES_ = "Additional modified files (insufficient token budget to process):\n"
|
||||
|
||||
ADDED_FILES_ = "Additional added files (insufficient token budget to process):\n"
|
||||
|
||||
OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD = 1000
|
||||
OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD = 600
|
||||
@ -64,14 +66,17 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: s
|
||||
pr_languages, token_handler, add_line_numbers_to_hunks, patch_extra_lines=PATCH_EXTRA_LINES)
|
||||
|
||||
# if we are under the limit, return the full diff
|
||||
if total_tokens + OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD < MAX_TOKENS[model]:
|
||||
if total_tokens + OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD < get_max_tokens(model):
|
||||
return "\n".join(patches_extended)
|
||||
|
||||
# if we are over the limit, start pruning
|
||||
patches_compressed, modified_file_names, deleted_file_names = \
|
||||
patches_compressed, modified_file_names, deleted_file_names, added_file_names = \
|
||||
pr_generate_compressed_diff(pr_languages, token_handler, model, add_line_numbers_to_hunks)
|
||||
|
||||
final_diff = "\n".join(patches_compressed)
|
||||
if added_file_names:
|
||||
added_list_str = ADDED_FILES_ + "\n".join(added_file_names)
|
||||
final_diff = final_diff + "\n\n" + added_list_str
|
||||
if modified_file_names:
|
||||
modified_list_str = MORE_MODIFIED_FILES_ + "\n".join(modified_file_names)
|
||||
final_diff = final_diff + "\n\n" + modified_list_str
|
||||
@ -122,7 +127,7 @@ def pr_generate_extended_diff(pr_languages: list,
|
||||
|
||||
|
||||
def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, model: str,
|
||||
convert_hunks_to_line_numbers: bool) -> Tuple[list, list, list]:
|
||||
convert_hunks_to_line_numbers: bool) -> Tuple[list, list, list, list]:
|
||||
"""
|
||||
Generate a compressed diff string for a pull request, using diff minimization techniques to reduce the number of
|
||||
tokens used.
|
||||
@ -148,6 +153,7 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo
|
||||
"""
|
||||
|
||||
patches = []
|
||||
added_files_list = []
|
||||
modified_files_list = []
|
||||
deleted_files_list = []
|
||||
# sort each one of the languages in top_langs by the number of tokens in the diff
|
||||
@ -165,7 +171,7 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo
|
||||
|
||||
# removing delete-only hunks
|
||||
patch = handle_patch_deletions(patch, original_file_content_str,
|
||||
new_file_content_str, file.filename)
|
||||
new_file_content_str, file.filename, file.edit_type)
|
||||
if patch is None:
|
||||
if not deleted_files_list:
|
||||
total_tokens += token_handler.count_tokens(DELETED_FILES_)
|
||||
@ -179,21 +185,26 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo
|
||||
new_patch_tokens = token_handler.count_tokens(patch)
|
||||
|
||||
# Hard Stop, no more tokens
|
||||
if total_tokens > MAX_TOKENS[model] - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD:
|
||||
if total_tokens > get_max_tokens(model) - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD:
|
||||
get_logger().warning(f"File was fully skipped, no more tokens: {file.filename}.")
|
||||
continue
|
||||
|
||||
# If the patch is too large, just show the file name
|
||||
if total_tokens + new_patch_tokens > MAX_TOKENS[model] - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD:
|
||||
if total_tokens + new_patch_tokens > get_max_tokens(model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD:
|
||||
# Current logic is to skip the patch if it's too large
|
||||
# TODO: Option for alternative logic to remove hunks from the patch to reduce the number of tokens
|
||||
# until we meet the requirements
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
get_logger().warning(f"Patch too large, minimizing it, {file.filename}")
|
||||
if not modified_files_list:
|
||||
total_tokens += token_handler.count_tokens(MORE_MODIFIED_FILES_)
|
||||
modified_files_list.append(file.filename)
|
||||
total_tokens += token_handler.count_tokens(file.filename) + 1
|
||||
if file.edit_type == EDIT_TYPE.ADDED:
|
||||
if not added_files_list:
|
||||
total_tokens += token_handler.count_tokens(ADDED_FILES_)
|
||||
added_files_list.append(file.filename)
|
||||
else:
|
||||
if not modified_files_list:
|
||||
total_tokens += token_handler.count_tokens(MORE_MODIFIED_FILES_)
|
||||
modified_files_list.append(file.filename)
|
||||
total_tokens += token_handler.count_tokens(file.filename) + 1
|
||||
continue
|
||||
|
||||
if patch:
|
||||
@ -206,7 +217,7 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
get_logger().info(f"Tokens: {total_tokens}, last filename: {file.filename}")
|
||||
|
||||
return patches, modified_files_list, deleted_files_list
|
||||
return patches, modified_files_list, deleted_files_list, added_files_list
|
||||
|
||||
|
||||
async def retry_with_fallback_models(f: Callable):
|
||||
@ -397,13 +408,13 @@ def get_pr_multi_diffs(git_provider: GitProvider,
|
||||
continue
|
||||
|
||||
# Remove delete-only hunks
|
||||
patch = handle_patch_deletions(patch, original_file_content_str, new_file_content_str, file.filename)
|
||||
patch = handle_patch_deletions(patch, original_file_content_str, new_file_content_str, file.filename, file.edit_type)
|
||||
if patch is None:
|
||||
continue
|
||||
|
||||
patch = convert_to_hunks_with_lines_numbers(patch, file)
|
||||
new_patch_tokens = token_handler.count_tokens(patch)
|
||||
if patch and (total_tokens + new_patch_tokens > MAX_TOKENS[model] - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD):
|
||||
if patch and (total_tokens + new_patch_tokens > get_max_tokens(model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD):
|
||||
final_diff = "\n".join(patches)
|
||||
final_diff_list.append(final_diff)
|
||||
patches = []
|
||||
|
@ -9,6 +9,8 @@ from typing import Any, List
|
||||
|
||||
import yaml
|
||||
from starlette_context import context
|
||||
|
||||
from pr_agent.algo import MAX_TOKENS
|
||||
from pr_agent.config_loader import get_settings, global_settings
|
||||
from pr_agent.log import get_logger
|
||||
|
||||
@ -285,6 +287,7 @@ def _fix_key_value(key: str, value: str):
|
||||
|
||||
|
||||
def load_yaml(review_text: str) -> dict:
|
||||
test = 1
|
||||
review_text = review_text.removeprefix('```yaml').rstrip('`')
|
||||
try:
|
||||
data = yaml.safe_load(review_text)
|
||||
@ -295,6 +298,21 @@ def load_yaml(review_text: str) -> dict:
|
||||
|
||||
def try_fix_yaml(review_text: str) -> dict:
|
||||
review_text_lines = review_text.split('\n')
|
||||
|
||||
# first fallback - try to convert 'relevant line: ...' to relevant line: |-\n ...'
|
||||
review_text_lines_copy = review_text_lines.copy()
|
||||
for i in range(0, len(review_text_lines_copy)):
|
||||
if 'relevant line:' in review_text_lines_copy[i] and not '|-' in review_text_lines_copy[i]:
|
||||
review_text_lines_copy[i] = review_text_lines_copy[i].replace('relevant line: ',
|
||||
'relevant line: |-\n ')
|
||||
try:
|
||||
data = yaml.load('\n'.join(review_text_lines_copy), Loader=yaml.SafeLoader)
|
||||
get_logger().info(f"Successfully parsed AI prediction after adding |-\n to relevant line")
|
||||
return data
|
||||
except:
|
||||
get_logger().debug(f"Failed to parse AI prediction after adding |-\n to relevant line")
|
||||
|
||||
# second fallback - try to remove last lines
|
||||
data = {}
|
||||
for i in range(1, len(review_text_lines)):
|
||||
review_text_lines_tmp = '\n'.join(review_text_lines[:-i])
|
||||
@ -326,18 +344,33 @@ def set_custom_labels(variables):
|
||||
variables["custom_labels_examples"] = f" - {list(labels.keys())[0]}"
|
||||
|
||||
|
||||
def get_user_labels(current_labels):
|
||||
## Only keep labels that has been added by the user
|
||||
if current_labels is None:
|
||||
current_labels = []
|
||||
user_labels = []
|
||||
for label in current_labels:
|
||||
if label in ['Bug fix', 'Tests', 'Refactoring', 'Enhancement', 'Documentation', 'Other']:
|
||||
continue
|
||||
if get_settings().config.enable_custom_labels:
|
||||
if label in get_settings().custom_labels:
|
||||
def get_user_labels(current_labels: List[str] = None):
|
||||
"""
|
||||
Only keep labels that has been added by the user
|
||||
"""
|
||||
try:
|
||||
if current_labels is None:
|
||||
current_labels = []
|
||||
user_labels = []
|
||||
for label in current_labels:
|
||||
if label.lower() in ['bug fix', 'tests', 'refactoring', 'enhancement', 'documentation', 'other']:
|
||||
continue
|
||||
user_labels.append(label)
|
||||
if user_labels:
|
||||
get_logger().info(f"Keeping user labels: {user_labels}")
|
||||
if get_settings().config.enable_custom_labels:
|
||||
if label in get_settings().custom_labels:
|
||||
continue
|
||||
user_labels.append(label)
|
||||
if user_labels:
|
||||
get_logger().info(f"Keeping user labels: {user_labels}")
|
||||
except Exception as e:
|
||||
get_logger().exception(f"Failed to get user labels: {e}")
|
||||
return current_labels
|
||||
return user_labels
|
||||
|
||||
|
||||
def get_max_tokens(model):
|
||||
settings = get_settings()
|
||||
max_tokens_model = MAX_TOKENS[model]
|
||||
if settings.config.max_model_tokens:
|
||||
max_tokens_model = min(settings.config.max_model_tokens, max_tokens_model)
|
||||
# get_logger().debug(f"limiting max tokens to {max_tokens_model}")
|
||||
return max_tokens_model
|
||||
|
@ -8,6 +8,8 @@ from pr_agent.log import setup_logger
|
||||
|
||||
setup_logger()
|
||||
|
||||
|
||||
|
||||
def run(inargs=None):
|
||||
parser = argparse.ArgumentParser(description='AI based pull request analyzer', usage=
|
||||
"""\
|
||||
@ -51,9 +53,9 @@ For example: 'python cli.py --pr_url=... review --pr_reviewer.extra_instructions
|
||||
command = args.command.lower()
|
||||
get_settings().set("CONFIG.CLI_MODE", True)
|
||||
if args.issue_url:
|
||||
result = asyncio.run(PRAgent().handle_request(args.issue_url, command + " " + " ".join(args.rest)))
|
||||
result = asyncio.run(PRAgent().handle_request(args.issue_url, [command] + args.rest))
|
||||
else:
|
||||
result = asyncio.run(PRAgent().handle_request(args.pr_url, command + " " + " ".join(args.rest)))
|
||||
result = asyncio.run(PRAgent().handle_request(args.pr_url, [command] + args.rest))
|
||||
if not result:
|
||||
parser.print_help()
|
||||
|
||||
|
@ -9,7 +9,7 @@ from starlette_context import context
|
||||
from ..algo.pr_processing import find_line_number_of_relevant_line_in_file
|
||||
from ..config_loader import get_settings
|
||||
from ..log import get_logger
|
||||
from .git_provider import FilePatchInfo, GitProvider
|
||||
from .git_provider import FilePatchInfo, GitProvider, EDIT_TYPE
|
||||
|
||||
|
||||
class BitbucketProvider(GitProvider):
|
||||
@ -132,17 +132,44 @@ class BitbucketProvider(GitProvider):
|
||||
diff.old.get_data("links")
|
||||
)
|
||||
new_file_content_str = self._get_pr_file_content(diff.new.get_data("links"))
|
||||
diff_files.append(
|
||||
FilePatchInfo(
|
||||
original_file_content_str,
|
||||
new_file_content_str,
|
||||
diff_split[index],
|
||||
diff.new.path,
|
||||
)
|
||||
file_patch_canonic_structure = FilePatchInfo(
|
||||
original_file_content_str,
|
||||
new_file_content_str,
|
||||
diff_split[index],
|
||||
diff.new.path,
|
||||
)
|
||||
|
||||
if diff.data['status'] == 'added':
|
||||
file_patch_canonic_structure.edit_type = EDIT_TYPE.ADDED
|
||||
elif diff.data['status'] == 'removed':
|
||||
file_patch_canonic_structure.edit_type = EDIT_TYPE.DELETED
|
||||
elif diff.data['status'] == 'modified':
|
||||
file_patch_canonic_structure.edit_type = EDIT_TYPE.MODIFIED
|
||||
elif diff.data['status'] == 'renamed':
|
||||
file_patch_canonic_structure.edit_type = EDIT_TYPE.RENAMED
|
||||
diff_files.append(file_patch_canonic_structure)
|
||||
|
||||
|
||||
self.diff_files = diff_files
|
||||
return diff_files
|
||||
|
||||
def publish_persistent_comment(self, pr_comment: str, initial_text: str, updated_text: str):
|
||||
try:
|
||||
for comment in self.pr.comments():
|
||||
body = comment.raw
|
||||
if initial_text in body:
|
||||
if updated_text:
|
||||
pr_comment_updated = pr_comment.replace(initial_text, updated_text)
|
||||
else:
|
||||
pr_comment_updated = pr_comment
|
||||
d = {"content": {"raw": pr_comment_updated}}
|
||||
response = comment._update_data(comment.put(None, data=d))
|
||||
return
|
||||
except Exception as e:
|
||||
get_logger().exception(f"Failed to update persistent review, error: {e}")
|
||||
pass
|
||||
self.publish_comment(pr_comment)
|
||||
|
||||
def publish_comment(self, pr_comment: str, is_temporary: bool = False):
|
||||
comment = self.pr.comment(pr_comment)
|
||||
if is_temporary:
|
||||
@ -288,6 +315,11 @@ class BitbucketProvider(GitProvider):
|
||||
})
|
||||
|
||||
response = requests.request("PUT", self.bitbucket_pull_request_api_url, headers=self.headers, data=payload)
|
||||
try:
|
||||
if response.status_code != 200:
|
||||
get_logger().info(f"Failed to update description, error code: {response.status_code}")
|
||||
except:
|
||||
pass
|
||||
return response
|
||||
|
||||
# bitbucket does not support labels
|
||||
|
@ -13,6 +13,7 @@ class EDIT_TYPE(Enum):
|
||||
DELETED = 2
|
||||
MODIFIED = 3
|
||||
RENAMED = 4
|
||||
UNKNOWN = 5
|
||||
|
||||
|
||||
@dataclass
|
||||
@ -22,7 +23,7 @@ class FilePatchInfo:
|
||||
patch: str
|
||||
filename: str
|
||||
tokens: int = -1
|
||||
edit_type: EDIT_TYPE = EDIT_TYPE.MODIFIED
|
||||
edit_type: EDIT_TYPE = EDIT_TYPE.UNKNOWN
|
||||
old_filename: str = None
|
||||
|
||||
|
||||
@ -43,6 +44,9 @@ class GitProvider(ABC):
|
||||
def publish_comment(self, pr_comment: str, is_temporary: bool = False):
|
||||
pass
|
||||
|
||||
def publish_persistent_comment(self, pr_comment: str, initial_text: str, updated_text: str):
|
||||
self.publish_comment(pr_comment)
|
||||
|
||||
@abstractmethod
|
||||
def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
|
||||
pass
|
||||
@ -94,16 +98,16 @@ class GitProvider(ABC):
|
||||
def get_pr_description(self, *, full: bool = True) -> str:
|
||||
from pr_agent.config_loader import get_settings
|
||||
from pr_agent.algo.pr_processing import clip_tokens
|
||||
max_tokens = get_settings().get("CONFIG.MAX_DESCRIPTION_TOKENS", None)
|
||||
max_tokens_description = get_settings().get("CONFIG.MAX_DESCRIPTION_TOKENS", None)
|
||||
description = self.get_pr_description_full() if full else self.get_user_description()
|
||||
if max_tokens:
|
||||
return clip_tokens(description, max_tokens)
|
||||
if max_tokens_description:
|
||||
return clip_tokens(description, max_tokens_description)
|
||||
return description
|
||||
|
||||
def get_user_description(self) -> str:
|
||||
description = (self.get_pr_description_full() or "").strip()
|
||||
# if the existing description wasn't generated by the pr-agent, just return it as-is
|
||||
if not description.startswith("## PR Type"):
|
||||
if not any(description.startswith(header) for header in ("## PR Type", "## PR Description")):
|
||||
return description
|
||||
# if the existing description was generated by the pr-agent, but it doesn't contain the user description,
|
||||
# return nothing (empty string) because it means there is no user description
|
||||
@ -153,6 +157,8 @@ def get_main_pr_language(languages, files) -> str:
|
||||
# validate that the specific commit uses the main language
|
||||
extension_list = []
|
||||
for file in files:
|
||||
if not file:
|
||||
continue
|
||||
if isinstance(file, str):
|
||||
file = FilePatchInfo(base_file=None, head_file=None, patch=None, filename=file)
|
||||
extension_list.append(file.filename.rsplit('.')[-1])
|
||||
|
@ -13,7 +13,7 @@ from ..algo.utils import load_large_diff
|
||||
from ..config_loader import get_settings
|
||||
from ..log import get_logger
|
||||
from ..servers.utils import RateLimitExceeded
|
||||
from .git_provider import FilePatchInfo, GitProvider, IncrementalPR
|
||||
from .git_provider import FilePatchInfo, GitProvider, IncrementalPR, EDIT_TYPE
|
||||
|
||||
|
||||
class GithubProvider(GitProvider):
|
||||
@ -129,7 +129,20 @@ class GithubProvider(GitProvider):
|
||||
if not patch:
|
||||
patch = load_large_diff(file.filename, new_file_content_str, original_file_content_str)
|
||||
|
||||
diff_files.append(FilePatchInfo(original_file_content_str, new_file_content_str, patch, file.filename))
|
||||
if file.status == 'added':
|
||||
edit_type = EDIT_TYPE.ADDED
|
||||
elif file.status == 'removed':
|
||||
edit_type = EDIT_TYPE.DELETED
|
||||
elif file.status == 'renamed':
|
||||
edit_type = EDIT_TYPE.RENAMED
|
||||
elif file.status == 'modified':
|
||||
edit_type = EDIT_TYPE.MODIFIED
|
||||
else:
|
||||
get_logger().error(f"Unknown edit type: {file.status}")
|
||||
edit_type = EDIT_TYPE.UNKNOWN
|
||||
file_patch_canonical_structure = FilePatchInfo(original_file_content_str, new_file_content_str, patch,
|
||||
file.filename, edit_type=edit_type)
|
||||
diff_files.append(file_patch_canonical_structure)
|
||||
|
||||
self.diff_files = diff_files
|
||||
return diff_files
|
||||
@ -141,10 +154,24 @@ class GithubProvider(GitProvider):
|
||||
def publish_description(self, pr_title: str, pr_body: str):
|
||||
self.pr.edit(title=pr_title, body=pr_body)
|
||||
|
||||
def publish_persistent_comment(self, pr_comment: str, initial_text: str, updated_text: str):
|
||||
prev_comments = list(self.pr.get_issue_comments())
|
||||
for comment in prev_comments:
|
||||
body = comment.body
|
||||
if body.startswith(initial_text):
|
||||
if updated_text:
|
||||
pr_comment_updated = pr_comment.replace(initial_text, updated_text)
|
||||
else:
|
||||
pr_comment_updated = pr_comment
|
||||
response = comment.edit(pr_comment_updated)
|
||||
return
|
||||
self.publish_comment(pr_comment)
|
||||
|
||||
def publish_comment(self, pr_comment: str, is_temporary: bool = False):
|
||||
if is_temporary and not get_settings().config.publish_output_progress:
|
||||
get_logger().debug(f"Skipping publish_comment for temporary comment: {pr_comment}")
|
||||
return
|
||||
|
||||
response = self.pr.create_issue_comment(pr_comment)
|
||||
if hasattr(response, "user") and hasattr(response.user, "login"):
|
||||
self.github_user_id = response.user.login
|
||||
|
@ -136,6 +136,21 @@ class GitLabProvider(GitProvider):
|
||||
except Exception as e:
|
||||
get_logger().exception(f"Could not update merge request {self.id_mr} description: {e}")
|
||||
|
||||
def publish_persistent_comment(self, pr_comment: str, initial_text: str, updated_text: str):
|
||||
try:
|
||||
for comment in self.mr.notes.list(get_all=True)[::-1]:
|
||||
if comment.body.startswith(initial_text):
|
||||
if updated_text:
|
||||
pr_comment_updated = pr_comment.replace(initial_text, updated_text)
|
||||
else:
|
||||
pr_comment_updated = pr_comment
|
||||
response = self.mr.notes.update(comment.id, {'body': pr_comment_updated})
|
||||
return
|
||||
except Exception as e:
|
||||
get_logger().exception(f"Failed to update persistent review, error: {e}")
|
||||
pass
|
||||
self.publish_comment(pr_comment)
|
||||
|
||||
def publish_comment(self, mr_comment: str, is_temporary: bool = False):
|
||||
comment = self.mr.notes.create({'body': mr_comment})
|
||||
if is_temporary:
|
||||
|
@ -1,15 +1,16 @@
|
||||
[config]
|
||||
model="gpt-4"
|
||||
model="gpt-4" # "gpt-4-1106-preview"
|
||||
fallback_models=["gpt-3.5-turbo-16k"]
|
||||
git_provider="github"
|
||||
publish_output=true
|
||||
publish_output_progress=true
|
||||
verbosity_level=0 # 0,1,2
|
||||
verbosity_level=2 # 0,1,2
|
||||
use_extra_bad_extensions=false
|
||||
use_repo_settings_file=true
|
||||
ai_timeout=180
|
||||
max_description_tokens = 500
|
||||
max_commits_tokens = 500
|
||||
max_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities.
|
||||
patch_extra_lines = 3
|
||||
secret_provider="google_cloud_storage"
|
||||
cli_mode=false
|
||||
@ -25,6 +26,7 @@ inline_code_comments = false
|
||||
ask_and_reflect=false
|
||||
automatic_review=true
|
||||
remove_previous_review_comment=false
|
||||
persistent_comment=true
|
||||
extra_instructions = ""
|
||||
# specific configurations for incremental review (/review -i)
|
||||
require_all_thresholds_for_incremental_review=false
|
||||
|
@ -16,7 +16,7 @@ You must use the following YAML schema to format your answer:
|
||||
PR Type:
|
||||
type: array
|
||||
{%- if enable_custom_labels %}
|
||||
description: One or more labels that describe the PR type. Don't output the description in the parentheses.
|
||||
description: Labels that are applicable to the Pull Request. Don't output the description in the parentheses. If none of the labels is relevant to the PR, output an empty array.
|
||||
{%- endif %}
|
||||
items:
|
||||
type: string
|
||||
|
@ -30,7 +30,7 @@ PR Type:
|
||||
{%- if enable_custom_labels %}
|
||||
PR Labels:
|
||||
type: array
|
||||
description: One or more labels that describe the PR labels. Don't output the description in the parentheses.
|
||||
description: Labels that are applicable to the Pull Request. Don't output the description in the parentheses. If none of the labels is relevant to the PR, output an empty array.
|
||||
items:
|
||||
type: string
|
||||
enum:
|
||||
|
@ -152,7 +152,8 @@ PR Analysis:
|
||||
Focused PR: no, because ...
|
||||
{%- endif %}
|
||||
{%- if require_estimate_effort_to_review %}
|
||||
Estimated effort to review [1-5]: 3, because ...
|
||||
Estimated effort to review [1-5]: |-
|
||||
3, because ...
|
||||
{%- endif %}
|
||||
PR Feedback:
|
||||
General PR suggestions: |-
|
||||
|
@ -117,7 +117,15 @@ class PRReviewer:
|
||||
if get_settings().config.publish_output:
|
||||
get_logger().info('Pushing PR review...')
|
||||
previous_review_comment = self._get_previous_review_comment()
|
||||
self.git_provider.publish_comment(pr_comment)
|
||||
|
||||
# publish the review
|
||||
if get_settings().pr_reviewer.persistent_comment and not self.incremental.is_incremental:
|
||||
self.git_provider.publish_persistent_comment(pr_comment,
|
||||
initial_text="## PR Analysis",
|
||||
updated_text="## PR Analysis (updated)")
|
||||
else:
|
||||
self.git_provider.publish_comment(pr_comment)
|
||||
|
||||
self.git_provider.remove_initial_comment()
|
||||
if previous_review_comment:
|
||||
self._remove_previous_review_comment(previous_review_comment)
|
||||
@ -156,7 +164,6 @@ class PRReviewer:
|
||||
variables["diff"] = self.patches_diff # update diff
|
||||
|
||||
environment = Environment(undefined=StrictUndefined)
|
||||
# set_custom_labels(variables)
|
||||
system_prompt = environment.from_string(get_settings().pr_review_prompt.system).render(variables)
|
||||
user_prompt = environment.from_string(get_settings().pr_review_prompt.user).render(variables)
|
||||
|
||||
|
@ -8,8 +8,8 @@ import pinecone
|
||||
from pinecone_datasets import Dataset, DatasetMetadata
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from pr_agent.algo import MAX_TOKENS
|
||||
from pr_agent.algo.token_handler import TokenHandler
|
||||
from pr_agent.algo.utils import get_max_tokens
|
||||
from pr_agent.config_loader import get_settings
|
||||
from pr_agent.git_providers import get_git_provider
|
||||
from pr_agent.log import get_logger
|
||||
@ -197,7 +197,7 @@ class PRSimilarIssue:
|
||||
username = issue.user.login
|
||||
created_at = str(issue.created_at)
|
||||
if len(issue_str) < 8000 or \
|
||||
self.token_handler.count_tokens(issue_str) < MAX_TOKENS[MODEL]: # fast reject first
|
||||
self.token_handler.count_tokens(issue_str) < get_max_tokens(MODEL): # fast reject first
|
||||
issue_record = Record(
|
||||
id=issue_key + "." + "issue",
|
||||
text=issue_str,
|
||||
|
31
tests/unittest/try_fix_yaml.py
Normal file
31
tests/unittest/try_fix_yaml.py
Normal file
@ -0,0 +1,31 @@
|
||||
|
||||
# Generated by CodiumAI
|
||||
from pr_agent.algo.utils import try_fix_yaml
|
||||
|
||||
|
||||
import pytest
|
||||
|
||||
class TestTryFixYaml:
|
||||
|
||||
# The function successfully parses a valid YAML string.
|
||||
def test_valid_yaml(self):
|
||||
review_text = "key: value\n"
|
||||
expected_output = {"key": "value"}
|
||||
assert try_fix_yaml(review_text) == expected_output
|
||||
|
||||
# The function adds '|-' to 'relevant line:' if it is not already present and successfully parses the YAML string.
|
||||
def test_add_relevant_line(self):
|
||||
review_text = "relevant line: value: 3\n"
|
||||
expected_output = {"relevant line": "value: 3"}
|
||||
assert try_fix_yaml(review_text) == expected_output
|
||||
|
||||
# The function removes the last line(s) of the YAML string and successfully parses the YAML string.
|
||||
def test_remove_last_line(self):
|
||||
review_text = "key: value\nextra invalid line\n"
|
||||
expected_output = {"key": "value"}
|
||||
assert try_fix_yaml(review_text) == expected_output
|
||||
|
||||
# The YAML string is empty.
|
||||
def test_empty_yaml_fixed(self):
|
||||
review_text = ""
|
||||
assert try_fix_yaml(review_text) is None
|
Reference in New Issue
Block a user