Merge remote-tracking branch 'origin/main' into ok/gitlab_fix

This commit is contained in:
Ori Kotek
2024-06-03 14:15:46 +03:00
91 changed files with 2626 additions and 1330 deletions

View File

@ -73,6 +73,9 @@ class PRAgent:
args = update_settings_from_args(args)
action = action.lstrip("/").lower()
if action not in command2class:
get_logger().debug(f"Unknown command: {action}")
return False
with get_logger().contextualize(command=action):
get_logger().info("PR-Agent request handler started", analytics=True)
if action == "reflect_and_review":

View File

@ -1,8 +1,9 @@
MAX_TOKENS = {
'text-embedding-ada-002': 8000,
'gpt-3.5-turbo': 4000,
'gpt-3.5-turbo': 16000,
'gpt-3.5-turbo-0125': 16000,
'gpt-3.5-turbo-0613': 4000,
'gpt-3.5-turbo-0301': 4000,
'gpt-3.5-turbo-1106': 16000,
'gpt-3.5-turbo-16k': 16000,
'gpt-3.5-turbo-16k-0613': 16000,
'gpt-4': 8000,
@ -10,6 +11,11 @@ MAX_TOKENS = {
'gpt-4-32k': 32000,
'gpt-4-1106-preview': 128000, # 128K, but may be limited by config.max_model_tokens
'gpt-4-0125-preview': 128000, # 128K, but may be limited by config.max_model_tokens
'gpt-4o': 128000, # 128K, but may be limited by config.max_model_tokens
'gpt-4o-2024-05-13': 128000, # 128K, but may be limited by config.max_model_tokens
'gpt-4-turbo-preview': 128000, # 128K, but may be limited by config.max_model_tokens
'gpt-4-turbo-2024-04-09': 128000, # 128K, but may be limited by config.max_model_tokens
'gpt-4-turbo': 128000, # 128K, but may be limited by config.max_model_tokens
'claude-instant-1': 100000,
'claude-2': 100000,
'command-nightly': 4096,
@ -23,4 +29,11 @@ MAX_TOKENS = {
'anthropic.claude-v1': 100000,
'anthropic.claude-v2': 100000,
'anthropic/claude-3-opus-20240229': 100000,
'bedrock/anthropic.claude-instant-v1': 100000,
'bedrock/anthropic.claude-v2': 100000,
'bedrock/anthropic.claude-v2:1': 100000,
'bedrock/anthropic.claude-3-sonnet-20240229-v1:0': 100000,
'bedrock/anthropic.claude-3-haiku-20240307-v1:0': 100000,
'groq/llama3-8b-8192': 8192,
'groq/llama3-70b-8192': 8192,
}

View File

@ -15,7 +15,7 @@ class BaseAiHandler(ABC):
pass
@abstractmethod
async def chat_completion(self, model: str, system: str, user: str, temperature: float = 0.2):
async def chat_completion(self, model: str, system: str, user: str, temperature: float = 0.2, img_path: str = None):
"""
This method should be implemented to return a chat completion from the AI model.
Args:

View File

@ -1,5 +1,5 @@
import os
import requests
import boto3
import litellm
import openai
@ -36,6 +36,8 @@ class LiteLLMAIHandler(BaseAiHandler):
assert litellm_token, "LITELLM_TOKEN is required"
os.environ["LITELLM_TOKEN"] = litellm_token
litellm.use_client = True
if get_settings().get("LITELLM.DROP_PARAMS", None):
litellm.drop_params = get_settings().litellm.drop_params
if get_settings().get("OPENAI.ORG", None):
litellm.organization = get_settings().openai.org
if get_settings().get("OPENAI.API_TYPE", None):
@ -50,8 +52,8 @@ class LiteLLMAIHandler(BaseAiHandler):
litellm.anthropic_key = get_settings().anthropic.key
if get_settings().get("COHERE.KEY", None):
litellm.cohere_key = get_settings().cohere.key
if get_settings().get("REPLICATE.KEY", None):
litellm.replicate_key = get_settings().replicate.key
if get_settings().get("GROQ.KEY", None):
litellm.api_key = get_settings().groq.key
if get_settings().get("REPLICATE.KEY", None):
litellm.replicate_key = get_settings().replicate.key
if get_settings().get("HUGGINGFACE.KEY", None):
@ -59,6 +61,9 @@ class LiteLLMAIHandler(BaseAiHandler):
if get_settings().get("HUGGINGFACE.API_BASE", None) and 'huggingface' in get_settings().config.model:
litellm.api_base = get_settings().huggingface.api_base
self.api_base = get_settings().huggingface.api_base
if get_settings().get("OLLAMA.API_BASE", None) :
litellm.api_base = get_settings().ollama.api_base
self.api_base = get_settings().ollama.api_base
if get_settings().get("HUGGINGFACE.REPITITION_PENALTY", None):
self.repetition_penalty = float(get_settings().huggingface.repetition_penalty)
if get_settings().get("VERTEXAI.VERTEX_PROJECT", None):
@ -68,11 +73,24 @@ class LiteLLMAIHandler(BaseAiHandler):
)
if get_settings().get("AWS.BEDROCK_REGION", None):
litellm.AmazonAnthropicConfig.max_tokens_to_sample = 2000
litellm.AmazonAnthropicClaude3Config.max_tokens = 2000
self.aws_bedrock_client = boto3.client(
service_name="bedrock-runtime",
region_name=get_settings().aws.bedrock_region,
)
def prepare_logs(self, response, system, user, resp, finish_reason):
response_log = response.dict().copy()
response_log['system'] = system
response_log['user'] = user
response_log['output'] = resp
response_log['finish_reason'] = finish_reason
if hasattr(self, 'main_pr_language'):
response_log['main_pr_language'] = self.main_pr_language
else:
response_log['main_pr_language'] = 'unknown'
return response_log
@property
def deployment_id(self):
"""
@ -84,13 +102,27 @@ class LiteLLMAIHandler(BaseAiHandler):
retry=retry_if_exception_type((openai.APIError, openai.APIConnectionError, openai.Timeout)), # No retry on RateLimitError
stop=stop_after_attempt(OPENAI_RETRIES)
)
async def chat_completion(self, model: str, system: str, user: str, temperature: float = 0.2):
async def chat_completion(self, model: str, system: str, user: str, temperature: float = 0.2, img_path: str = None):
try:
resp, finish_reason = None, None
deployment_id = self.deployment_id
if self.azure:
model = 'azure/' + model
messages = [{"role": "system", "content": system}, {"role": "user", "content": user}]
if img_path:
try:
# check if the image link is alive
r = requests.head(img_path, allow_redirects=True)
if r.status_code == 404:
error_msg = f"The image link is not [alive](img_path).\nPlease repost the original image as a comment, and send the question again with 'quote reply' (see [instructions](https://pr-agent-docs.codium.ai/tools/ask/#ask-on-images-using-the-pr-code-as-context))."
get_logger().error(error_msg)
return f"{error_msg}", "error"
except Exception as e:
get_logger().error(f"Error fetching image: {img_path}", e)
return f"Error fetching image: {img_path}", "error"
messages[1]["content"] = [{"type": "text", "text": messages[1]["content"]},
{"type": "image_url", "image_url": {"url": img_path}}]
kwargs = {
"model": model,
"deployment_id": deployment_id,
@ -125,11 +157,14 @@ class LiteLLMAIHandler(BaseAiHandler):
else:
resp = response["choices"][0]['message']['content']
finish_reason = response["choices"][0]["finish_reason"]
# usage = response.get("usage")
get_logger().debug(f"\nAI response:\n{resp}")
get_logger().debug("Full_response", artifact=response)
# log the full response for debugging
response_log = self.prepare_logs(response, system, user, resp, finish_reason)
get_logger().debug("Full_response", artifact=response_log)
# for CLI debugging
if get_settings().config.verbosity_level >= 2:
get_logger().info(f"\nAI response:\n{resp}")
return resp, finish_reason
return resp, finish_reason

View File

@ -23,7 +23,10 @@ def extend_patch(original_file_str, patch_str, num_lines) -> str:
return patch_str
if type(original_file_str) == bytes:
original_file_str = original_file_str.decode('utf-8')
try:
original_file_str = original_file_str.decode('utf-8')
except UnicodeDecodeError:
return ""
original_lines = original_file_str.splitlines()
patch_lines = patch_str.splitlines()

View File

@ -9,7 +9,7 @@ from pr_agent.algo.git_patch_processing import convert_to_hunks_with_lines_numbe
from pr_agent.algo.language_handler import sort_files_by_main_languages
from pr_agent.algo.file_filter import filter_ignored
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import get_max_tokens, ModelType
from pr_agent.algo.utils import get_max_tokens, clip_tokens, ModelType
from pr_agent.config_loader import get_settings
from pr_agent.git_providers.git_provider import GitProvider
from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo
@ -87,22 +87,34 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: s
# if we are over the limit, start pruning
get_logger().info(f"Tokens: {total_tokens}, total tokens over limit: {get_max_tokens(model)}, "
f"pruning diff.")
patches_compressed, modified_file_names, deleted_file_names, added_file_names = \
patches_compressed, modified_file_names, deleted_file_names, added_file_names, total_tokens_new = \
pr_generate_compressed_diff(pr_languages, token_handler, model, add_line_numbers_to_hunks)
# Insert additional information about added, modified, and deleted files if there is enough space
max_tokens = get_max_tokens(model) - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD
curr_token = total_tokens_new # == token_handler.count_tokens(final_diff)+token_handler.prompt_tokens
final_diff = "\n".join(patches_compressed)
if added_file_names:
delta_tokens = 10
if added_file_names and (max_tokens - curr_token) > delta_tokens:
added_list_str = ADDED_FILES_ + "\n".join(added_file_names)
final_diff = final_diff + "\n\n" + added_list_str
if modified_file_names:
added_list_str = clip_tokens(added_list_str, max_tokens - curr_token)
if added_list_str:
final_diff = final_diff + "\n\n" + added_list_str
curr_token += token_handler.count_tokens(added_list_str) + 2
if modified_file_names and (max_tokens - curr_token) > delta_tokens:
modified_list_str = MORE_MODIFIED_FILES_ + "\n".join(modified_file_names)
final_diff = final_diff + "\n\n" + modified_list_str
if deleted_file_names:
modified_list_str = clip_tokens(modified_list_str, max_tokens - curr_token)
if modified_list_str:
final_diff = final_diff + "\n\n" + modified_list_str
curr_token += token_handler.count_tokens(modified_list_str) + 2
if deleted_file_names and (max_tokens - curr_token) > delta_tokens:
deleted_list_str = DELETED_FILES_ + "\n".join(deleted_file_names)
final_diff = final_diff + "\n\n" + deleted_list_str
deleted_list_str = clip_tokens(deleted_list_str, max_tokens - curr_token)
if deleted_list_str:
final_diff = final_diff + "\n\n" + deleted_list_str
try:
get_logger().debug(f"After pruning, added_list_str: {added_list_str}, modified_list_str: {modified_list_str}, "
f"deleted_list_str: {deleted_list_str}")
f"deleted_list_str: {deleted_list_str}")
except Exception as e:
pass
return final_diff
@ -134,6 +146,9 @@ def pr_generate_extended_diff(pr_languages: list,
# extend each patch with extra lines of context
extended_patch = extend_patch(original_file_content_str, patch, num_lines=patch_extra_lines)
if not extended_patch:
get_logger().warning(f"Failed to extend patch for file: {file.filename}")
continue
full_extended_patch = f"\n\n## {file.filename}\n\n{extended_patch}\n"
if add_line_numbers_to_hunks:
@ -149,7 +164,7 @@ def pr_generate_extended_diff(pr_languages: list,
def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, model: str,
convert_hunks_to_line_numbers: bool) -> Tuple[list, list, list, list]:
convert_hunks_to_line_numbers: bool) -> Tuple[list, list, list, list, int]:
"""
Generate a compressed diff string for a pull request, using diff minimization techniques to reduce the number of
tokens used.
@ -195,10 +210,11 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo
patch = handle_patch_deletions(patch, original_file_content_str,
new_file_content_str, file.filename, file.edit_type)
if patch is None:
if not deleted_files_list:
total_tokens += token_handler.count_tokens(DELETED_FILES_)
deleted_files_list.append(file.filename)
total_tokens += token_handler.count_tokens(file.filename) + 1
# if not deleted_files_list:
# total_tokens += token_handler.count_tokens(DELETED_FILES_)
if file.filename not in deleted_files_list:
deleted_files_list.append(file.filename)
# total_tokens += token_handler.count_tokens(file.filename) + 1
continue
if convert_hunks_to_line_numbers:
@ -219,14 +235,17 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo
if get_settings().config.verbosity_level >= 2:
get_logger().warning(f"Patch too large, minimizing it, {file.filename}")
if file.edit_type == EDIT_TYPE.ADDED:
if not added_files_list:
total_tokens += token_handler.count_tokens(ADDED_FILES_)
added_files_list.append(file.filename)
# if not added_files_list:
# total_tokens += token_handler.count_tokens(ADDED_FILES_)
if file.filename not in added_files_list:
added_files_list.append(file.filename)
# total_tokens += token_handler.count_tokens(file.filename) + 1
else:
if not modified_files_list:
total_tokens += token_handler.count_tokens(MORE_MODIFIED_FILES_)
modified_files_list.append(file.filename)
total_tokens += token_handler.count_tokens(file.filename) + 1
# if not modified_files_list:
# total_tokens += token_handler.count_tokens(MORE_MODIFIED_FILES_)
if file.filename not in modified_files_list:
modified_files_list.append(file.filename)
# total_tokens += token_handler.count_tokens(file.filename) + 1
continue
if patch:
@ -239,7 +258,7 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo
if get_settings().config.verbosity_level >= 2:
get_logger().info(f"Tokens: {total_tokens}, last filename: {file.filename}")
return patches, modified_files_list, deleted_files_list, added_files_list
return patches, modified_files_list, deleted_files_list, added_files_list, total_tokens
async def retry_with_fallback_models(f: Callable, model_type: ModelType = ModelType.REGULAR):
@ -358,9 +377,25 @@ def get_pr_multi_diffs(git_provider: GitProvider,
patch = convert_to_hunks_with_lines_numbers(patch, file)
new_patch_tokens = token_handler.count_tokens(patch)
if patch and (token_handler.prompt_tokens + new_patch_tokens) > get_max_tokens(model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD:
get_logger().warning(f"Patch too large, skipping: {file.filename}")
continue
if patch and (token_handler.prompt_tokens + new_patch_tokens) > get_max_tokens(
model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD:
if get_settings().config.get('large_patch_policy', 'skip') == 'skip':
get_logger().warning(f"Patch too large, skipping: {file.filename}")
continue
elif get_settings().config.get('large_patch_policy') == 'clip':
delta_tokens = get_max_tokens(model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD - token_handler.prompt_tokens
patch_clipped = clip_tokens(patch, delta_tokens, delete_last_line=True, num_input_tokens=new_patch_tokens)
new_patch_tokens = token_handler.count_tokens(patch_clipped)
if patch_clipped and (token_handler.prompt_tokens + new_patch_tokens) > get_max_tokens(
model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD:
get_logger().warning(f"Patch too large, skipping: {file.filename}")
continue
else:
get_logger().info(f"Clipped large patch for file: {file.filename}")
patch = patch_clipped
else:
get_logger().warning(f"Patch too large, skipping: {file.filename}")
continue
if patch and (total_tokens + new_patch_tokens > get_max_tokens(model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD):
final_diff = "\n".join(patches)
@ -382,4 +417,4 @@ def get_pr_multi_diffs(git_provider: GitProvider,
final_diff = "\n".join(patches)
final_diff_list.append(final_diff)
return final_diff_list
return final_diff_list

View File

@ -1,12 +1,25 @@
from jinja2 import Environment, StrictUndefined
from tiktoken import encoding_for_model, get_encoding
from pr_agent.config_loader import get_settings
from threading import Lock
def get_token_encoder():
return encoding_for_model(get_settings().config.model) if "gpt" in get_settings().config.model else get_encoding(
"cl100k_base")
class TokenEncoder:
_encoder_instance = None
_model = None
_lock = Lock() # Create a lock object
@classmethod
def get_token_encoder(cls):
model = get_settings().config.model
if cls._encoder_instance is None or model != cls._model: # Check without acquiring the lock for performance
with cls._lock: # Lock acquisition to ensure thread safety
if cls._encoder_instance is None or model != cls._model:
cls._model = model
cls._encoder_instance = encoding_for_model(cls._model) if "gpt" in cls._model else get_encoding(
"cl100k_base")
return cls._encoder_instance
class TokenHandler:
"""
@ -31,7 +44,7 @@ class TokenHandler:
- system: The system string.
- user: The user string.
"""
self.encoder = get_token_encoder()
self.encoder = TokenEncoder.get_token_encoder()
if pr is not None:
self.prompt_tokens = self._get_system_user_tokens(pr, self.encoder, vars, system, user)

View File

@ -2,8 +2,10 @@ from __future__ import annotations
import difflib
import json
import os
import re
import textwrap
import time
from datetime import datetime
from enum import Enum
from typing import Any, List, Tuple
@ -12,7 +14,7 @@ import yaml
from starlette_context import context
from pr_agent.algo import MAX_TOKENS
from pr_agent.algo.token_handler import get_token_encoder
from pr_agent.algo.token_handler import TokenEncoder
from pr_agent.config_loader import get_settings, global_settings
from pr_agent.algo.types import FilePatchInfo
from pr_agent.log import get_logger
@ -67,13 +69,15 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
output_data (dict): A dictionary containing data to be converted to markdown format.
Returns:
str: The markdown formatted text generated from the input dictionary.
"""
"""
emojis = {
"Possible issues": "🔍",
"Can be split": "🔀",
"Possible issues": "",
"Score": "🏅",
"Relevant tests": "🧪",
"Focused PR": "",
"Relevant ticket": "🎫",
"Security concerns": "🔒",
"Insights from user's answers": "📝",
"Code feedback": "🤖",
@ -81,9 +85,9 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
}
markdown_text = ""
if not incremental_review:
markdown_text += f"## PR Review\n\n"
markdown_text += f"## PR Review 🔍\n\n"
else:
markdown_text += f"## Incremental PR Review\n\n"
markdown_text += f"## Incremental PR Review 🔍\n\n"
markdown_text += f"⏮️ Review for commits since previous PR-Agent review {incremental_review}.\n\n"
if gfm_supported:
markdown_text += "<table>\n<tr>\n"
@ -94,7 +98,8 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
for key, value in output_data['review'].items():
if value is None or value == '' or value == {} or value == []:
continue
if key.lower() != 'can_be_split':
continue
key_nice = key.replace('_', ' ').capitalize()
emoji = emojis.get(key_nice, "")
if gfm_supported:
@ -103,6 +108,8 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
if 'security concerns' in key_nice.lower():
value = emphasize_header(value.strip())
markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n\n{value}\n\n</td></tr>\n"
elif 'can be split' in key_nice.lower():
markdown_text += process_can_be_split(emoji, value)
elif 'possible issues' in key_nice.lower():
value = value.strip()
issues = value.split('\n- ')
@ -154,6 +161,38 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
return markdown_text
def process_can_be_split(emoji, value):
# key_nice = "Can this PR be split?"
key_nice = "Multiple PR themes"
markdown_text = ""
if not value or isinstance(value, list) and len(value) == 1:
value = "No"
markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n\n{value}\n\n</td></tr>\n"
else:
number_of_splits = len(value)
markdown_text += f"<tr><td rowspan={number_of_splits}> {emoji}&nbsp;<strong>{key_nice}</strong></td>\n"
for i, split in enumerate(value):
title = split.get('title', '')
relevant_files = split.get('relevant_files', [])
if i == 0:
markdown_text += f"<td><details><summary>\nSub-PR theme: <strong>{title}</strong></summary>\n\n"
markdown_text += f"<hr>\n"
markdown_text += f"Relevant files:\n"
markdown_text += f"<ul>\n"
for file in relevant_files:
markdown_text += f"<li>{file}</li>\n"
markdown_text += f"</ul>\n\n</details></td></tr>\n"
else:
markdown_text += f"<tr>\n<td><details><summary>\nSub-PR theme: <strong>{title}</strong></summary>\n\n"
markdown_text += f"<hr>\n"
markdown_text += f"Relevant files:\n"
markdown_text += f"<ul>\n"
for file in relevant_files:
markdown_text += f"<li>{file}</li>\n"
markdown_text += f"</ul>\n\n</details></td></tr>\n"
return markdown_text
def parse_code_suggestion(code_suggestion: dict, i: int = 0, gfm_supported: bool = True) -> str:
"""
Convert a dictionary of data into markdown format.
@ -319,7 +358,7 @@ def convert_str_to_datetime(date_str):
return datetime.strptime(date_str, datetime_format)
def load_large_diff(filename, new_file_content_str: str, original_file_content_str: str) -> str:
def load_large_diff(filename, new_file_content_str: str, original_file_content_str: str, show_warning: bool = True) -> str:
"""
Generate a patch for a modified file by comparing the original content of the file with the new content provided as
input.
@ -338,7 +377,7 @@ def load_large_diff(filename, new_file_content_str: str, original_file_content_s
try:
diff = difflib.unified_diff(original_file_content_str.splitlines(keepends=True),
new_file_content_str.splitlines(keepends=True))
if get_settings().config.verbosity_level >= 2:
if get_settings().config.verbosity_level >= 2 and show_warning:
get_logger().warning(f"File was modified, but no patch was found. Manually creating patch: {filename}.")
patch = ''.join(diff)
except Exception:
@ -433,7 +472,8 @@ def try_fix_yaml(response_text: str, keys_fix_yaml: List[str] = []) -> dict:
except:
pass
# third fallback - try to remove leading and trailing curly brackets
# third fallback - try to remove leading and trailing curly brackets
response_text_copy = response_text.strip().rstrip().removeprefix('{').removesuffix('}').rstrip(':\n')
try:
data = yaml.safe_load(response_text_copy)
@ -515,7 +555,7 @@ def get_max_tokens(model):
return max_tokens_model
def clip_tokens(text: str, max_tokens: int, add_three_dots=True) -> str:
def clip_tokens(text: str, max_tokens: int, add_three_dots=True, num_input_tokens=None, delete_last_line=False) -> str:
"""
Clip the number of tokens in a string to a maximum number of tokens.
@ -530,16 +570,30 @@ def clip_tokens(text: str, max_tokens: int, add_three_dots=True) -> str:
return text
try:
encoder = get_token_encoder()
num_input_tokens = len(encoder.encode(text))
if num_input_tokens is None:
encoder = TokenEncoder.get_token_encoder()
num_input_tokens = len(encoder.encode(text))
if num_input_tokens <= max_tokens:
return text
if max_tokens < 0:
return ""
# calculate the number of characters to keep
num_chars = len(text)
chars_per_token = num_chars / num_input_tokens
num_output_chars = int(chars_per_token * max_tokens)
clipped_text = text[:num_output_chars]
if add_three_dots:
clipped_text += "...(truncated)"
factor = 0.9 # reduce by 10% to be safe
num_output_chars = int(factor * chars_per_token * max_tokens)
# clip the text
if num_output_chars > 0:
clipped_text = text[:num_output_chars]
if delete_last_line:
clipped_text = clipped_text.rsplit('\n', 1)[0]
if add_three_dots:
clipped_text += "\n...(truncated)"
else: # if the text is empty
clipped_text = ""
return clipped_text
except Exception as e:
get_logger().warning(f"Failed to clip tokens: {e}")
@ -625,3 +679,89 @@ def find_line_number_of_relevant_line_in_file(diff_files: List[FilePatchInfo],
absolute_position = start2 + delta - 1
break
return position, absolute_position
def validate_and_await_rate_limit(rate_limit_status=None, git_provider=None, get_rate_limit_status_func=None):
if git_provider and not rate_limit_status:
rate_limit_status = {'resources': git_provider.github_client.get_rate_limit().raw_data}
if not rate_limit_status:
rate_limit_status = get_rate_limit_status_func()
# validate that the rate limit is not exceeded
is_rate_limit = False
for key, value in rate_limit_status['resources'].items():
if value['remaining'] == 0:
print(f"key: {key}, value: {value}")
is_rate_limit = True
sleep_time_sec = value['reset'] - datetime.now().timestamp()
sleep_time_hour = sleep_time_sec / 3600.0
print(f"Rate limit exceeded. Sleeping for {sleep_time_hour} hours")
if sleep_time_sec > 0:
time.sleep(sleep_time_sec+1)
if git_provider:
rate_limit_status = {'resources': git_provider.github_client.get_rate_limit().raw_data}
else:
rate_limit_status = get_rate_limit_status_func()
return is_rate_limit
def get_largest_component(pr_url):
from pr_agent.tools.pr_analyzer import PRAnalyzer
publish_output = get_settings().config.publish_output
get_settings().config.publish_output = False # disable publish output
analyzer = PRAnalyzer(pr_url)
methods_dict_files = analyzer.run_sync()
get_settings().config.publish_output = publish_output
max_lines_changed = 0
file_b = ""
component_name_b = ""
for file in methods_dict_files:
for method in methods_dict_files[file]:
try:
if methods_dict_files[file][method]['num_plus_lines'] > max_lines_changed:
max_lines_changed = methods_dict_files[file][method]['num_plus_lines']
file_b = file
component_name_b = method
except:
pass
if component_name_b:
get_logger().info(f"Using the largest changed component: '{component_name_b}'")
return component_name_b, file_b
else:
return None, None
def github_action_output(output_data: dict, key_name: str):
try:
if not get_settings().get('github_action_config.enable_output', False):
return
key_data = output_data.get(key_name, {})
with open(os.environ['GITHUB_OUTPUT'], 'a') as fh:
print(f"{key_name}={json.dumps(key_data, indent=None, ensure_ascii=False)}", file=fh)
except Exception as e:
get_logger().error(f"Failed to write to GitHub Action output: {e}")
return
def show_relevant_configurations(relevant_section: str) -> str:
forbidden_keys = ['ai_disclaimer', 'ai_disclaimer_title', 'ANALYTICS_FOLDER', 'secret_provider',
'trial_prefix_message', 'no_eligible_message', 'identity_provider', 'ALLOWED_REPOS','APP_NAME']
markdown_text = ""
markdown_text += "\n<hr>\n<details> <summary><strong>🛠️ Relevant configurations:</strong></summary> \n\n"
markdown_text +="<br>These are the relevant [configurations](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml) for this tool:\n\n"
markdown_text += f"**[config**]\n```yaml\n\n"
for key, value in get_settings().config.items():
if key in forbidden_keys:
continue
markdown_text += f"{key}: {value}\n"
markdown_text += "\n```\n"
markdown_text += f"\n**[{relevant_section}]**\n```yaml\n\n"
for key, value in get_settings().get(relevant_section, {}).items():
if key in forbidden_keys:
continue
markdown_text += f"{key}: {value}\n"
markdown_text += "\n```"
markdown_text += "\n</details>\n"
return markdown_text

View File

@ -9,48 +9,59 @@ from pr_agent.log import setup_logger
log_level = os.environ.get("LOG_LEVEL", "INFO")
setup_logger(log_level)
def run(inargs=None):
def set_parser():
parser = argparse.ArgumentParser(description='AI based pull request analyzer', usage=
"""\
Usage: cli.py --pr-url=<URL on supported git hosting service> <command> [<args>].
For example:
- cli.py --pr_url=... review
- cli.py --pr_url=... describe
- cli.py --pr_url=... improve
- cli.py --pr_url=... ask "write me a poem about this PR"
- cli.py --pr_url=... reflect
- cli.py --issue_url=... similar_issue
"""\
Usage: cli.py --pr-url=<URL on supported git hosting service> <command> [<args>].
For example:
- cli.py --pr_url=... review
- cli.py --pr_url=... describe
- cli.py --pr_url=... improve
- cli.py --pr_url=... ask "write me a poem about this PR"
- cli.py --pr_url=... reflect
- cli.py --issue_url=... similar_issue
Supported commands:
- review / review_pr - Add a review that includes a summary of the PR and specific suggestions for improvement.
Supported commands:
- review / review_pr - Add a review that includes a summary of the PR and specific suggestions for improvement.
- ask / ask_question [question] - Ask a question about the PR.
- ask / ask_question [question] - Ask a question about the PR.
- describe / describe_pr - Modify the PR title and description based on the PR's contents.
- describe / describe_pr - Modify the PR title and description based on the PR's contents.
- improve / improve_code - Suggest improvements to the code in the PR as pull request comments ready to commit.
Extended mode ('improve --extended') employs several calls, and provides a more thorough feedback
- improve / improve_code - Suggest improvements to the code in the PR as pull request comments ready to commit.
Extended mode ('improve --extended') employs several calls, and provides a more thorough feedback
- reflect - Ask the PR author questions about the PR.
- reflect - Ask the PR author questions about the PR.
- update_changelog - Update the changelog based on the PR's contents.
- update_changelog - Update the changelog based on the PR's contents.
- add_docs
- add_docs
- generate_labels
- generate_labels
Configuration:
To edit any configuration parameter from 'configuration.toml', just add -config_path=<value>.
For example: 'python cli.py --pr_url=... review --pr_reviewer.extra_instructions="focus on the file: ..."'
""")
Configuration:
To edit any configuration parameter from 'configuration.toml', just add -config_path=<value>.
For example: 'python cli.py --pr_url=... review --pr_reviewer.extra_instructions="focus on the file: ..."'
""")
parser.add_argument('--pr_url', type=str, help='The URL of the PR to review', default=None)
parser.add_argument('--issue_url', type=str, help='The URL of the Issue to review', default=None)
parser.add_argument('command', type=str, help='The', choices=commands, default='review')
parser.add_argument('rest', nargs=argparse.REMAINDER, default=[])
args = parser.parse_args(inargs)
return parser
def run_command(pr_url, command):
# Preparing the command
run_command_str = f"--pr_url={pr_url} {command.lstrip('/')}"
args = set_parser().parse_args(run_command_str.split())
# Run the command. Feedback will appear in GitHub PR comments
run(args=args)
def run(inargs=None, args=None):
parser = set_parser()
if not args:
args = parser.parse_args(inargs)
if not args.pr_url and not args.issue_url:
parser.print_help()
return

23
pr_agent/cli_pip.py Normal file
View File

@ -0,0 +1,23 @@
from pr_agent import cli
from pr_agent.config_loader import get_settings
def main():
# Fill in the following values
provider = "github" # GitHub provider
user_token = "..." # GitHub user token
openai_key = "..." # OpenAI key
pr_url = "..." # PR URL, for example 'https://github.com/Codium-ai/pr-agent/pull/809'
command = "/review" # Command to run (e.g. '/review', '/describe', '/ask="What is the purpose of this PR?"')
# Setting the configurations
get_settings().set("CONFIG.git_provider", provider)
get_settings().set("openai.key", openai_key)
get_settings().set("github.user_token", user_token)
# Run the command. Feedback will appear in GitHub PR comments
cli.run_command(pr_url, command)
if __name__ == '__main__':
main()

View File

@ -21,6 +21,7 @@ global_settings = Dynaconf(
"settings/pr_line_questions_prompts.toml",
"settings/pr_description_prompts.toml",
"settings/pr_code_suggestions_prompts.toml",
"settings/pr_code_suggestions_reflect_prompts.toml",
"settings/pr_sort_code_suggestions_prompts.toml",
"settings/pr_information_from_user_prompts.toml",
"settings/pr_update_changelog_prompts.toml",

View File

@ -26,6 +26,7 @@ try:
CommentThread,
GitVersionDescriptor,
GitPullRequest,
GitPullRequestIterationChanges,
)
except ImportError:
AZURE_DEVOPS_AVAILABLE = False
@ -230,29 +231,58 @@ class AzureDevopsProvider(GitProvider):
base_sha = self.pr.last_merge_target_commit
head_sha = self.pr.last_merge_source_commit
commits = self.azure_devops_client.get_pull_request_commits(
project=self.workspace_slug,
# Get PR iterations
iterations = self.azure_devops_client.get_pull_request_iterations(
repository_id=self.repo_slug,
pull_request_id=self.pr_num,
project=self.workspace_slug
)
changes = None
if iterations:
iteration_id = iterations[-1].id # Get the last iteration (most recent changes)
# Get changes for the iteration
changes = self.azure_devops_client.get_pull_request_iteration_changes(
repository_id=self.repo_slug,
pull_request_id=self.pr_num,
iteration_id=iteration_id,
project=self.workspace_slug
)
diff_files = []
diffs = []
diff_types = {}
if changes:
for change in changes.change_entries:
item = change.additional_properties.get('item', {})
path = item.get('path', None)
if path:
diffs.append(path)
diff_types[path] = change.additional_properties.get('changeType', 'Unknown')
for c in commits:
changes_obj = self.azure_devops_client.get_changes(
project=self.workspace_slug,
repository_id=self.repo_slug,
commit_id=c.commit_id,
)
for i in changes_obj.changes:
if i["item"]["gitObjectType"] == "tree":
continue
diffs.append(i["item"]["path"])
diff_types[i["item"]["path"]] = i["changeType"]
# wrong implementation - gets all the files that were changed in any commit in the PR
# commits = self.azure_devops_client.get_pull_request_commits(
# project=self.workspace_slug,
# repository_id=self.repo_slug,
# pull_request_id=self.pr_num,
# )
#
# diff_files = []
# diffs = []
# diff_types = {}
diffs = list(set(diffs))
# for c in commits:
# changes_obj = self.azure_devops_client.get_changes(
# project=self.workspace_slug,
# repository_id=self.repo_slug,
# commit_id=c.commit_id,
# )
# for i in changes_obj.changes:
# if i["item"]["gitObjectType"] == "tree":
# continue
# diffs.append(i["item"]["path"])
# diff_types[i["item"]["path"]] = i["changeType"]
#
# diffs = list(set(diffs))
for file in diffs:
if not is_valid_file(file):
@ -273,12 +303,13 @@ class AzureDevopsProvider(GitProvider):
new_file_content_str = new_file_content_str.content
except Exception as error:
get_logger().error(
"Failed to retrieve new file content of %s at version %s. Error: %s",
file,
version,
str(error),
)
get_logger().error(f"Failed to retrieve new file content of {file} at version {version}. Error: {str(error)}")
# get_logger().error(
# "Failed to retrieve new file content of %s at version %s. Error: %s",
# file,
# version,
# str(error),
# )
new_file_content_str = ""
edit_type = EDIT_TYPE.MODIFIED
@ -303,17 +334,12 @@ class AzureDevopsProvider(GitProvider):
)
original_file_content_str = original_file_content_str.content
except Exception as error:
get_logger().error(
"Failed to retrieve original file content of %s at version %s. Error: %s",
file,
version,
str(error),
)
get_logger().error(f"Failed to retrieve original file content of {file} at version {version}. Error: {str(error)}")
original_file_content_str = ""
patch = load_large_diff(
file, new_file_content_str, original_file_content_str
)
file, new_file_content_str, original_file_content_str, show_warning=False
).rstrip()
diff_files.append(
FilePatchInfo(

View File

@ -1,13 +1,14 @@
import json
from typing import Optional, Tuple
from urllib.parse import urlparse
from urllib.parse import quote_plus, urlparse
import requests
from atlassian.bitbucket import Bitbucket
from starlette_context import context
from .git_provider import GitProvider
from pr_agent.algo.types import FilePatchInfo
from ..algo.types import EDIT_TYPE, FilePatchInfo
from ..algo.language_handler import is_valid_file
from ..algo.utils import load_large_diff, find_line_number_of_relevant_line_in_file
from ..config_loader import get_settings
from ..log import get_logger
@ -58,6 +59,9 @@ class BitbucketServerProvider(GitProvider):
return contents
except Exception:
return ""
def get_pr_id(self):
return self.pr_num
def publish_code_suggestions(self, code_suggestions: list) -> bool:
"""
@ -140,14 +144,8 @@ class BitbucketServerProvider(GitProvider):
if self.diff_files:
return self.diff_files
commits_in_pr = self.bitbucket_client.get_pull_requests_commits(
self.workspace_slug,
self.repo_slug,
self.pr_num
)
commit_list = list(commits_in_pr)
base_sha, head_sha = commit_list[0]['parents'][0]['id'], commit_list[-1]['id']
base_sha = self.pr.toRef['latestCommit']
head_sha = self.pr.fromRef['latestCommit']
diff_files = []
original_file_content_str = ""
@ -156,6 +154,10 @@ class BitbucketServerProvider(GitProvider):
changes = self.bitbucket_client.get_pull_requests_changes(self.workspace_slug, self.repo_slug, self.pr_num)
for change in changes:
file_path = change['path']['toString']
if not is_valid_file(file_path.split("/")[-1]):
get_logger().info(f"Skipping a non-code file: {file_path}")
continue
match change['type']:
case 'ADD':
edit_type = EDIT_TYPE.ADDED
@ -241,8 +243,11 @@ class BitbucketServerProvider(GitProvider):
}
}
response = requests.post(url=self._get_pr_comments_url(), json=payload, headers=self.headers)
return response
try:
requests.post(url=self._get_pr_comments_url(), json=payload, headers=self.headers).raise_for_status()
except Exception as e:
get_logger().error(f"Failed to publish inline comment to '{file}' at line {from_line}, error: {e}")
raise e
def generate_link_to_relevant_line_number(self, suggestion) -> str:
try:
@ -255,18 +260,37 @@ class BitbucketServerProvider(GitProvider):
position, absolute_position = find_line_number_of_relevant_line_in_file \
(diff_files, relevant_file, relevant_line_str)
if absolute_position != -1:
if self.pr:
link = f"{self.pr_url}/diff#{quote_plus(relevant_file)}?t={absolute_position}"
return link
else:
if get_settings().config.verbosity_level >= 2:
get_logger().info(f"Failed adding line link to '{relevant_file}' since PR not set")
else:
if get_settings().config.verbosity_level >= 2:
get_logger().info(f"Failed adding line link to '{relevant_file}' since position not found")
if absolute_position != -1 and self.pr_url:
link = f"{self.pr_url}/#L{relevant_file}T{absolute_position}"
link = f"{self.pr_url}/diff#{quote_plus(relevant_file)}?t={absolute_position}"
return link
except Exception as e:
if get_settings().config.verbosity_level >= 2:
get_logger().info(f"Failed adding line link, error: {e}")
get_logger().info(f"Failed adding line link to '{relevant_file}', error: {e}")
return ""
def publish_inline_comments(self, comments: list[dict]):
for comment in comments:
self.publish_inline_comment(comment['body'], comment['position'], comment['path'])
if 'position' in comment:
self.publish_inline_comment(comment['body'], comment['position'], comment['path'])
elif 'start_line' in comment: # multi-line comment
# note that bitbucket does not seem to support range - only a comment on a single line - https://community.developer.atlassian.com/t/api-post-endpoint-for-inline-pull-request-comments/60452
self.publish_inline_comment(comment['body'], comment['start_line'], comment['path'])
elif 'line' in comment: # single-line comment
self.publish_inline_comment(comment['body'], comment['line'], comment['path'])
else:
get_logger().error(f"Could not publish inline comment: {comment}")
def get_title(self):
return self.pr.title
@ -278,7 +302,10 @@ class BitbucketServerProvider(GitProvider):
return self.pr.fromRef['displayId']
def get_pr_description_full(self):
return self.pr.description
if hasattr(self.pr, "description"):
return self.pr.description
else:
return None
def get_user_id(self):
return 0
@ -305,7 +332,7 @@ class BitbucketServerProvider(GitProvider):
path_parts = parsed_url.path.strip("/").split("/")
if len(path_parts) < 6 or path_parts[4] != "pull-requests":
raise ValueError(
"The provided URL does not appear to be a Bitbucket PR URL"
f"The provided URL '{pr_url}' does not appear to be a Bitbucket PR URL"
)
workspace_slug = path_parts[1]
@ -313,7 +340,7 @@ class BitbucketServerProvider(GitProvider):
try:
pr_number = int(path_parts[5])
except ValueError as e:
raise ValueError("Unable to convert PR number to integer") from e
raise ValueError(f"Unable to convert PR number '{path_parts[5]}' to integer") from e
return workspace_slug, repo_slug, pr_number
@ -334,13 +361,18 @@ class BitbucketServerProvider(GitProvider):
raise NotImplementedError("Get commit messages function not implemented yet.")
# bitbucket does not support labels
def publish_description(self, pr_title: str, description: str):
payload = json.dumps({
payload = {
"version": self.pr.version,
"description": description,
"title": pr_title
})
response = requests.put(url=self.bitbucket_pull_request_api_url, headers=self.headers, data=payload)
return response
"title": pr_title,
"reviewers": self.pr.reviewers # needs to be sent otherwise gets wiped
}
try:
self.bitbucket_client.update_pull_request(self.workspace_slug, self.repo_slug, str(self.pr_num), payload)
except Exception as e:
get_logger().error(f"Failed to update pull request, error: {e}")
raise e
# bitbucket does not support labels
def publish_labels(self, pr_types: list):

View File

@ -10,7 +10,7 @@ from ..algo.utils import load_large_diff
from .git_provider import GitProvider
from ..config_loader import get_settings
from ..log import get_logger
from pr_agent.algo.language_handler import is_valid_file
class PullRequestCCMimic:
"""

View File

@ -71,7 +71,7 @@ class GitProvider(ABC):
# if the existing description was generated by the pr-agent, but it doesn't contain a user description,
# return nothing (empty string) because it means there is no user description
user_description_header = "## **user description**"
user_description_header = "### **user description**"
if user_description_header not in description_lowercase:
get_logger().info(f"Existing description was generated by the pr-agent, but it doesn't contain a user description")
return ""
@ -102,8 +102,8 @@ class GitProvider(ABC):
return original_user_description
def _possible_headers(self):
return ("## **user description**", "## **pr type**", "## **pr description**", "## **pr labels**", "## **type**", "## **description**",
"## **labels**", "### 🤖 generated by pr agent")
return ("### **user description**", "### **pr type**", "### **pr description**", "### **pr labels**", "### **type**", "### **description**",
"### **labels**", "### 🤖 generated by pr agent")
def _is_generated_by_pr_agent(self, description_lowercase: str) -> bool:
possible_headers = self._possible_headers()
@ -197,6 +197,12 @@ class GitProvider(ABC):
def calc_pr_statistics(self, pull_request_data: dict):
return {}
def get_num_of_files(self):
try:
return len(self.get_diff_files())
except Exception as e:
return -1
def get_main_pr_language(languages, files) -> str:
"""
@ -266,6 +272,7 @@ def get_main_pr_language(languages, files) -> str:
return main_language_str
class IncrementalPR:
def __init__(self, is_incremental: bool = False):
self.is_incremental = is_incremental

View File

@ -114,6 +114,11 @@ class GithubProvider(GitProvider):
self.git_files = self.pr.get_files()
return self.git_files
def get_num_of_files(self):
if self.git_files:
return self.git_files.totalCount
else:
return -1
@retry(exceptions=RateLimitExceeded,
tries=get_settings().github.ratelimit_retries, delay=2, backoff=2, jitter=(1, 3))
@ -142,6 +147,7 @@ class GithubProvider(GitProvider):
for file in files:
if not is_valid_file(file.filename):
get_logger().info(f"Skipping a non-code file: {file.filename}")
continue
new_file_content_str = self._get_pr_file_content(file, self.pr.head.sha) # communication with GitHub
@ -740,22 +746,4 @@ class GithubProvider(GitProvider):
return False
def calc_pr_statistics(self, pull_request_data: dict):
try:
out = {}
from datetime import datetime
created_at = pull_request_data['created_at']
closed_at = pull_request_data['closed_at']
closed_at_datetime = datetime.strptime(closed_at, "%Y-%m-%dT%H:%M:%SZ")
created_at_datetime = datetime.strptime(created_at, "%Y-%m-%dT%H:%M:%SZ")
difference = closed_at_datetime - created_at_datetime
out['hours'] = difference.total_seconds() / 3600
out['commits'] = pull_request_data['commits']
out['comments'] = pull_request_data['comments']
out['review_comments'] = pull_request_data['review_comments']
out['changed_files'] = pull_request_data['changed_files']
out['additions'] = pull_request_data['additions']
out['deletions'] = pull_request_data['deletions']
except Exception as e:
get_logger().exception(f"Failed to calculate PR statistics, error: {e}")
return {}
return out
return {}

View File

View File

@ -26,8 +26,9 @@ from pr_agent.git_providers.utils import apply_repo_settings
from pr_agent.log import get_logger
from fastapi import Request, Depends
from fastapi.security import HTTPBasic, HTTPBasicCredentials
from pr_agent.log import get_logger
from pr_agent.log import LoggingFormat, get_logger, setup_logger
setup_logger(fmt=LoggingFormat.JSON, level="DEBUG")
security = HTTPBasic()
router = APIRouter()
available_commands_rgx = re.compile(r"^\/(" + "|".join(command2class.keys()) + r")\s*")
@ -40,8 +41,15 @@ def handle_request(
):
log_context["action"] = body
log_context["api_url"] = url
with get_logger().contextualize(**log_context):
background_tasks.add_task(PRAgent().handle_request, url, body)
async def inner():
try:
with get_logger().contextualize(**log_context):
await PRAgent().handle_request(url, body)
except Exception as e:
get_logger().error(f"Failed to handle webhook: {e}")
background_tasks.add_task(inner)
# currently only basic auth is supported with azure webhooks

View File

@ -13,9 +13,10 @@ from starlette_context.middleware import RawContextMiddleware
from pr_agent.agent.pr_agent import PRAgent
from pr_agent.config_loader import get_settings
from pr_agent.log import get_logger
from pr_agent.log import LoggingFormat, get_logger, setup_logger
from pr_agent.servers.utils import verify_signature
setup_logger(fmt=LoggingFormat.JSON, level="DEBUG")
router = APIRouter()
@ -24,8 +25,15 @@ def handle_request(
):
log_context["action"] = body
log_context["api_url"] = url
with get_logger().contextualize(**log_context):
background_tasks.add_task(PRAgent().handle_request, url, body)
async def inner():
try:
with get_logger().contextualize(**log_context):
await PRAgent().handle_request(url, body)
except Exception as e:
get_logger().error(f"Failed to handle webhook: {e}")
background_tasks.add_task(inner)
@router.post("/")

View File

@ -46,19 +46,22 @@ async def run_action():
if not GITHUB_EVENT_PATH:
print("GITHUB_EVENT_PATH not set")
return
if not OPENAI_KEY:
print("OPENAI_KEY not set")
return
if not GITHUB_TOKEN:
print("GITHUB_TOKEN not set")
return
# Set the environment variables in the settings
get_settings().set("OPENAI.KEY", OPENAI_KEY)
if OPENAI_KEY:
get_settings().set("OPENAI.KEY", OPENAI_KEY)
else:
# Might not be set if the user is using models not from OpenAI
print("OPENAI_KEY not set")
if OPENAI_ORG:
get_settings().set("OPENAI.ORG", OPENAI_ORG)
get_settings().set("GITHUB.USER_TOKEN", GITHUB_TOKEN)
get_settings().set("GITHUB.DEPLOYMENT_TYPE", "user")
enable_output = get_setting_or_env("GITHUB_ACTION_CONFIG.ENABLE_OUTPUT", True)
get_settings().set("GITHUB_ACTION_CONFIG.ENABLE_OUTPUT", enable_output)
# Load the event payload
try:
@ -96,11 +99,14 @@ async def run_action():
# invoke by default all three tools
if auto_describe is None or is_true(auto_describe):
get_settings().pr_description.final_update_message = False # No final update message when auto_describe is enabled
await PRDescription(pr_url).run()
if auto_review is None or is_true(auto_review):
await PRReviewer(pr_url).run()
if auto_improve is None or is_true(auto_improve):
await PRCodeSuggestions(pr_url).run()
else:
get_logger().info(f"Skipping action: {action}")
# Handle issue comment event
elif GITHUB_EVENT_NAME == "issue_comment" or GITHUB_EVENT_NAME == "pull_request_review_comment":

View File

@ -86,8 +86,13 @@ async def handle_comments_on_pr(body: Dict[str, Any],
return {}
comment_body = body.get("comment", {}).get("body")
if comment_body and isinstance(comment_body, str) and not comment_body.lstrip().startswith("/"):
get_logger().info("Ignoring comment not starting with /")
return {}
if '/ask' in comment_body and comment_body.strip().startswith('> ![image]'):
comment_body_split = comment_body.split('/ask')
comment_body = '/ask' + comment_body_split[1] +' \n' +comment_body_split[0].strip().lstrip('>')
get_logger().info(f"Reformatting comment_body so command is at the beginning: {comment_body}")
else:
get_logger().info("Ignoring comment not starting with /")
return {}
disable_eyes = False
if "issue" in body and "pull_request" in body["issue"] and "url" in body["issue"]["pull_request"]:
api_url = body["issue"]["pull_request"]["url"]
@ -135,7 +140,7 @@ async def handle_new_pr_opened(body: Dict[str, Any],
if not (pull_request and api_url):
get_logger().info(f"Invalid PR event: {action=} {api_url=}")
return {}
if action in get_settings().github_app.handle_pr_actions: # ['opened', 'reopened', 'ready_for_review', 'review_requested']
if action in get_settings().github_app.handle_pr_actions: # ['opened', 'reopened', 'ready_for_review']
if get_identity_provider().verify_eligibility("github", sender_id, api_url) is not Eligibility.NOT_ELIGIBLE:
await _perform_auto_commands_github("pr_commands", agent, body, api_url, log_context)
else:
@ -224,19 +229,22 @@ def handle_closed_pr(body, event, action, log_context):
def get_log_context(body, event, action, build_number):
sender = ""
sender_id = ""
sender_type = ""
try:
sender = body.get("sender", {}).get("login")
sender_id = body.get("sender", {}).get("id")
sender_type = body.get("sender", {}).get("type")
repo = body.get("repository", {}).get("full_name", "")
git_org = body.get("organization", {}).get("login", "")
installation_id = body.get("installation", {}).get("id", "")
app_name = get_settings().get("CONFIG.APP_NAME", "Unknown")
log_context = {"action": action, "event": event, "sender": sender, "server_type": "github_app",
"request_id": uuid.uuid4().hex, "build_number": build_number, "app_name": app_name,
"repo": repo, "git_org": git_org}
"repo": repo, "git_org": git_org, "installation_id": installation_id}
except Exception as e:
get_logger().error("Failed to get log context", e)
log_context = {}
return log_context, sender, sender_id
return log_context, sender, sender_id, sender_type
async def handle_request(body: Dict[str, Any], event: str):
@ -251,7 +259,13 @@ async def handle_request(body: Dict[str, Any], event: str):
if not action:
return {}
agent = PRAgent()
log_context, sender, sender_id = get_log_context(body, event, action, build_number)
log_context, sender, sender_id, sender_type = get_log_context(body, event, action, build_number)
# logic to ignore PRs opened by bot
if get_settings().get("GITHUB_APP.IGNORE_BOT_PR", False) and sender_type == "Bot":
if 'pr-agent' not in sender:
get_logger().info(f"Ignoring PR from '{sender=}' because it is a bot")
return {}
# handle comments on PRs
if action == 'created':

View File

@ -22,116 +22,22 @@ class HelpMessage:
@staticmethod
def get_review_usage_guide():
output ="**Overview:**\n"
output +="The `review` tool scans the PR code changes, and generates a PR review. The tool can be triggered [automatically](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#github-app-automatic-tools-when-a-new-pr-is-opened) every time a new PR is opened, or can be invoked manually by commenting on any PR.\n"
output +=("The `review` tool scans the PR code changes, and generates a PR review which includes several types of feedbacks, such as possible PR issues, security threats and relevant test in the PR. More feedbacks can be [added](https://pr-agent-docs.codium.ai/tools/review/#general-configurations) by configuring the tool.\n\n"
"The tool can be triggered [automatically](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#github-app-automatic-tools-when-a-new-pr-is-opened) every time a new PR is opened, or can be invoked manually by commenting on any PR.\n")
output +="""\
When commenting, to edit [configurations](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L19) related to the review tool (`pr_reviewer` section), use the following template:
- When commenting, to edit [configurations](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L23) related to the review tool (`pr_reviewer` section), use the following template:
```
/review --pr_reviewer.some_config1=... --pr_reviewer.some_config2=...
```
With a [configuration file](https://pr-agent-docs.codium.ai/usage-guide/configuration_options/), use the following template:
- With a [configuration file](https://pr-agent-docs.codium.ai/usage-guide/configuration_options/), use the following template:
```
[pr_reviewer]
some_config1=...
some_config2=...
```
"""
output +="\n\n<table>"
# extra instructions
output += "<tr><td><details> <summary><strong> Utilizing extra instructions</strong></summary><hr>\n\n"
output += '''\
The `review` tool can be configured with extra instructions, which can be used to guide the model to a feedback tailored to the needs of your project.
Be specific, clear, and concise in the instructions. With extra instructions, you are the prompter. Specify the relevant sub-tool, and the relevant aspects of the PR that you want to emphasize.
Examples for extra instructions:
```
[pr_reviewer] # /review #
extra_instructions="""
In the 'possible issues' section, emphasize the following:
- Does the code logic cover relevant edge cases?
- Is the code logic clear and easy to understand?
- Is the code logic efficient?
...
"""
```
Use triple quotes to write multi-line instructions. Use bullet points to make the instructions more readable.
'''
output += "\n\n</details></td></tr>\n\n"
# automation
output += "<tr><td><details> <summary><strong> How to enable\\disable automation</strong></summary><hr>\n\n"
output += """\
- When you first install PR-Agent app, the [default mode](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#github-app-automatic-tools-when-a-new-pr-is-opened) for the `review` tool is:
```
pr_commands = ["/review", ...]
```
meaning the `review` tool will run automatically on every PR, with the default configuration.
Edit this field to enable/disable the tool, or to change the used configurations
"""
output += "\n\n</details></td></tr>\n\n"
# # code feedback
# output += "<tr><td><details> <summary><strong> About the 'Code feedback' section</strong></summary><hr>\n\n"
# output+="""\
# The `review` tool provides several type of feedbacks, one of them is code suggestions.
# If you are interested **only** in the code suggestions, it is recommended to use the [`improve`](https://github.com/Codium-ai/pr-agent/blob/main/docs/IMPROVE.md) feature instead, since it dedicated only to code suggestions, and usually gives better results.
# Use the `review` tool if you want to get a more comprehensive feedback, which includes code suggestions as well.
# """
# output += "\n\n</details></td></tr>\n\n"
# auto-labels
output += "<tr><td><details> <summary><strong> Auto-labels</strong></summary><hr>\n\n"
output+="""\
The `review` tool can auto-generate two specific types of labels for a PR:
- a `possible security issue` label, that detects possible [security issues](https://github.com/Codium-ai/pr-agent/blob/tr/user_description/pr_agent/settings/pr_reviewer_prompts.toml#L136) (`enable_review_labels_security` flag)
- a `Review effort [1-5]: x` label, where x is the estimated effort to review the PR (`enable_review_labels_effort` flag)
"""
output += "\n\n</details></td></tr>\n\n"
# extra sub-tools
output += "<tr><td><details> <summary><strong> Extra sub-tools</strong></summary><hr>\n\n"
output += """\
The `review` tool provides a collection of possible feedbacks about a PR.
It is recommended to review the [possible options](https://pr-agent-docs.codium.ai/tools/review/#enabledisable-features), and choose the ones relevant for your use case.
Some of the feature that are disabled by default are quite useful, and should be considered for enabling. For example:
`require_score_review`, `require_soc2_ticket`, and more.
"""
output += "\n\n</details></td></tr>\n\n"
output += "<tr><td><details> <summary><strong> Auto-approve PRs</strong></summary><hr>\n\n"
output += '''\
By invoking:
```
/review auto_approve
```
The tool will automatically approve the PR, and add a comment with the approval.
To ensure safety, the auto-approval feature is disabled by default. To enable auto-approval, you need to actively set in a pre-defined configuration file the following:
```
[pr_reviewer]
enable_auto_approval = true
```
(this specific flag cannot be set with a command line argument, only in the configuration file, committed to the repository)
You can also enable auto-approval only if the PR meets certain requirements, such as that the `estimated_review_effort` is equal or below a certain threshold, by adjusting the flag:
```
[pr_reviewer]
maximal_review_effort = 5
```
'''
output += "\n\n</details></td></tr>\n\n"
# general
output += "\n\n<tr><td><details> <summary><strong> More PR-Agent commands</strong></summary><hr> \n\n"
output += HelpMessage.get_general_bot_help_text()
output += "\n\n</details></td></tr>\n\n"
output += "</table>"
output += f"\n\nSee the [review usage](https://pr-agent-docs.codium.ai/tools/review/) page for a comprehensive guide on using this tool.\n\n"
output += f"\n\nSee the review [usage page](https://pr-agent-docs.codium.ai/tools/review/) for a comprehensive guide on using this tool.\n\n"
return output
@ -162,10 +68,9 @@ some_config2=...
output += """\
- When you first install the app, the [default mode](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#github-app-automatic-tools-when-a-new-pr-is-opened) for the describe tool is:
```
pr_commands = ["/describe --pr_description.add_original_user_description=true"
"--pr_description.keep_original_user_title=true", ...]
pr_commands = ["/describe", ...]
```
meaning the `describe` tool will run automatically on every PR, will keep the original title, and will add the original user description above the generated description.
meaning the `describe` tool will run automatically on every PR.
- Markers are an alternative way to control the generated description, to give maximal control to the user. If you set:
```
@ -222,7 +127,7 @@ Be specific, clear, and concise in the instructions. With extra instructions, yo
Examples for extra instructions:
```
[pr_description]
extra_instructions="""
extra_instructions="""\
- The PR title should be in the format: '<PR type>: <title>'
- The title should be short and concise (up to 10 words)
- ...
@ -254,16 +159,17 @@ It can be invoked manually by commenting on any PR:
/ask "..."
```
Note that the tool does not have "memory" of previous questions, and answers each question independently.
Note that the tool does not have "memory" of previous questions, and answers each question independently.
You can ask questions about the entire PR, about specific code lines, or about an image related to the PR code changes.
"""
output += "\n\n<table>"
# general
output += "\n\n<tr><td><details> <summary><strong> More PR-Agent commands</strong></summary><hr> \n\n"
output += HelpMessage.get_general_bot_help_text()
output += "\n\n</details></td></tr>\n\n"
output += "</table>"
# output += "\n\n<table>"
#
# # # general
# # output += "\n\n<tr><td><details> <summary><strong> More PR-Agent commands</strong></summary><hr> \n\n"
# # output += HelpMessage.get_general_bot_help_text()
# # output += "\n\n</details></td></tr>\n\n"
#
# output += "</table>"
output += f"\n\nSee the [ask usage](https://pr-agent-docs.codium.ai/tools/ask/) page for a comprehensive guide on using this tool.\n\n"
@ -273,16 +179,16 @@ Note that the tool does not have "memory" of previous questions, and answers eac
@staticmethod
def get_improve_usage_guide():
output = "**Overview:**\n"
output += "The `improve` tool scans the PR code changes, and automatically generates suggestions for improving the PR code. "
output += "The code suggestions tool, named `improve`, scans the PR code changes, and automatically generates code suggestions for improving the PR."
output += "The tool can be triggered [automatically](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#github-app-automatic-tools-when-a-new-pr-is-opened) every time a new PR is opened, or can be invoked manually by commenting on a PR.\n"
output += """\
When commenting, to edit [configurations](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L69) related to the improve tool (`pr_code_suggestions` section), use the following template:
- When commenting, to edit [configurations](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L78) related to the improve tool (`pr_code_suggestions` section), use the following template:
```
/improve --pr_code_suggestions.some_config1=... --pr_code_suggestions.some_config2=...
```
With a [configuration file](https://pr-agent-docs.codium.ai/usage-guide/configuration_options/), use the following template:
- With a [configuration file](https://pr-agent-docs.codium.ai/usage-guide/configuration_options/), use the following template:
```
[pr_code_suggestions]
@ -291,64 +197,7 @@ some_config2=...
```
"""
output += "\n\n<table>"
# automation
output += "<tr><td><details> <summary><strong> Enabling\\disabling automation </strong></summary><hr>\n\n"
output += """\
When you first install the app, the [default mode](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#github-app-automatic-tools-when-a-new-pr-is-opened) for the improve tool is:
```
pr_commands = ["/improve --pr_code_suggestions.summarize=true", ...]
```
meaning the `improve` tool will run automatically on every PR, with summarization enabled. Delete this line to disable the tool from running automatically.
"""
output += "\n\n</details></td></tr>\n\n"
# extra instructions
output += "<tr><td><details> <summary><strong> Utilizing extra instructions</strong></summary><hr>\n\n"
output += '''\
Extra instructions are very important for the `improve` tool, since they enable to guide the model to suggestions that are more relevant to the specific needs of the project.
Be specific, clear, and concise in the instructions. With extra instructions, you are the prompter. Specify relevant aspects that you want the model to focus on.
Examples for extra instructions:
```
[pr_code_suggestions] # /improve #
extra_instructions="""
Emphasize the following aspects:
- Does the code logic cover relevant edge cases?
- Is the code logic clear and easy to understand?
- Is the code logic efficient?
...
"""
```
Use triple quotes to write multi-line instructions. Use bullet points to make the instructions more readable.
'''
output += "\n\n</details></td></tr>\n\n"
# suggestions quality
output += "\n\n<tr><td><details> <summary><strong> A note on code suggestions quality</strong></summary><hr> \n\n"
output += """\
- While the current AI for code is getting better and better (GPT-4), it's not flawless. Not all the suggestions will be perfect, and a user should not accept all of them automatically.
- Suggestions are not meant to be simplistic. Instead, they aim to give deep feedback and raise questions, ideas and thoughts to the user, who can then use his judgment, experience, and understanding of the code base.
- Recommended to use the 'extra_instructions' field to guide the model to suggestions that are more relevant to the specific needs of the project, or use the [custom suggestions :gem:](https://pr-agent-docs.codium.ai/tools/custom_suggestions/) tool
- With large PRs, best quality will be obtained by using 'improve --extended' mode.
"""
output += "\n\n</details></td></tr>\n\n"\
# general
output += "\n\n<tr><td><details> <summary><strong> More PR-Agent commands</strong></summary><hr> \n\n"
output += HelpMessage.get_general_bot_help_text()
output += "\n\n</details></td></tr>\n\n"
output += "</table>"
output += f"\n\nSee the [improve usage](https://pr-agent-docs.codium.ai/tools/improve/) page for a more comprehensive guide on using this tool.\n\n"
output += f"\n\nSee the improve [usage page](https://pr-agent-docs.codium.ai/tools/improve/) for a comprehensive guide on using this tool.\n\n"
return output

View File

@ -29,6 +29,9 @@ key = "" # Optional, uncomment if you want to use Cohere. Acquire through https:
[replicate]
key = "" # Optional, uncomment if you want to use Replicate. Acquire through https://replicate.com/
[groq]
key = "" # Acquire through https://console.groq.com/keys
[huggingface]
key = "" # Optional, uncomment if you want to use Huggingface Inference API. Acquire through https://huggingface.co/docs/api-inference/quicktour
api_base = "" # the base url for your huggingface inference endpoint

View File

@ -1,7 +1,7 @@
[config]
model="gpt-4" # "gpt-4-0125-preview"
model_turbo="gpt-4-0125-preview"
fallback_models=["gpt-3.5-turbo-16k"]
model="gpt-4-turbo-2024-04-09"
model_turbo="gpt-4o"
fallback_models=["gpt-4-0125-preview"]
git_provider="github"
publish_output=true
publish_output_progress=true
@ -19,13 +19,15 @@ secret_provider="google_cloud_storage"
cli_mode=false
ai_disclaimer_title="" # Pro feature, title for a collapsible disclaimer to AI outputs
ai_disclaimer="" # Pro feature, full text for the AI disclaimer
output_relevant_configurations=false
large_patch_policy = "clip" # "clip", "skip"
[pr_reviewer] # /review #
# enable/disable features
require_focused_review=false
require_score_review=false
require_tests_review=true
require_estimate_effort_to_review=true
require_can_be_split_review=false
# soc2
require_soc2_ticket=false
soc2_ticket_prompt="Does the PR description include a link to ticket in a project management system (e.g., Jira, Asana, Trello, etc.) ?"
@ -44,7 +46,7 @@ enable_review_labels_effort=true
require_all_thresholds_for_incremental_review=false
minimal_commits_for_incremental_review=0
minimal_minutes_for_incremental_review=0
enable_help_text=true # Determines whether to include help text in the PR review. Enabled by default.
enable_help_text=false # Determines whether to include help text in the PR review. Enabled by default.
# auto approval
enable_auto_approval=false
maximal_review_effort=5
@ -52,15 +54,17 @@ maximal_review_effort=5
[pr_description] # /describe #
publish_labels=true
publish_description_as_comment=false
add_original_user_description=true
keep_original_user_title=true
generate_ai_title=false
use_bullet_points=true
extra_instructions = ""
enable_pr_type=true
final_update_message = true
enable_help_text=false
enable_help_comment=true
# describe as comment
publish_description_as_comment=false
publish_description_as_comment_persistent=true
## changes walkthrough section
enable_semantic_files_types=true
collapsible_file_list='adaptive' # true, false, 'adaptive'
@ -72,19 +76,23 @@ include_generated_by_header=true
#custom_labels = ['Bug fix', 'Tests', 'Bug fix with tests', 'Enhancement', 'Documentation', 'Other']
[pr_questions] # /ask #
enable_help_text=true
enable_help_text=false
[pr_code_suggestions] # /improve #
max_context_tokens=8000
num_code_suggestions=4
summarize = true
commitable_code_suggestions = false
extra_instructions = ""
rank_suggestions = false
enable_help_text=true
enable_help_text=false
persistent_comment=false
# suggestions scoring
self_reflect_on_suggestions=true
suggestions_score_threshold=0 # [0-10]. highly recommend not to set this value above 8, since above it may clip highly relevant suggestions
# params for '/improve --extended' mode
auto_extended_mode=true
num_code_suggestions_per_chunk=5
num_code_suggestions_per_chunk=4
max_number_of_calls = 3
parallel_calls = true
rank_extended_suggestions = false
@ -107,8 +115,13 @@ num_tests=3 # number of tests to generate. max 5.
avoid_mocks=true # if true, the generated tests will prefer to use real objects instead of mocks
file = "" # in case there are several components with the same name, you can specify the relevant file
class_name = "" # in case there are several methods with the same name in the same file, you can specify the relevant class name
enable_help_text=true
enable_help_text=false
[pr_improve_component] # /improve_component #
num_code_suggestions=4
extra_instructions = ""
file = "" # in case there are several components with the same name, you can specify the relevant file
class_name = ""
[checks] # /checks (pro feature) #
enable_auto_checks_feedback=true
@ -132,6 +145,7 @@ try_fix_invalid_inline_comments = true
# auto_review = true # set as env var in .github/workflows/pr-agent.yaml
# auto_describe = true # set as env var in .github/workflows/pr-agent.yaml
# auto_improve = true # set as env var in .github/workflows/pr-agent.yaml
# enable_output = true # set as env var in .github/workflows/pr-agent.yaml
[github_app]
# these toggles allows running the github app from custom deployments
@ -139,9 +153,9 @@ override_deployment_type = true
# settings for "pull_request" event
handle_pr_actions = ['opened', 'reopened', 'ready_for_review']
pr_commands = [
"/describe --pr_description.add_original_user_description=true --pr_description.keep_original_user_title=true",
"/describe --pr_description.final_update_message=false",
"/review --pr_reviewer.num_code_suggestions=0",
"/improve --pr_code_suggestions.summarize=true",
"/improve",
]
# settings for "pull_request" event with "synchronize" action - used to detect and handle push triggers for new commits
handle_push_trigger = false
@ -151,23 +165,24 @@ push_trigger_wait_for_initial_review = true
push_trigger_pending_tasks_backlog = true
push_trigger_pending_tasks_ttl = 300
push_commands = [
"/describe --pr_description.add_original_user_description=true --pr_description.keep_original_user_title=true",
"/describe",
"/review --pr_reviewer.num_code_suggestions=0",
]
ignore_pr_title = []
ignore_bot_pr = true
[gitlab]
url = "https://gitlab.com" # URL to the gitlab service
pr_commands = [
"/describe --pr_description.add_original_user_description=true --pr_description.keep_original_user_title=true",
"/describe",
"/review --pr_reviewer.num_code_suggestions=0",
"/improve --pr_code_suggestions.summarize=true",
"/improve",
]
[bitbucket_app]
pr_commands = [
"/review --pr_reviewer.num_code_suggestions=0",
"/improve --pr_code_suggestions.summarize=false",
"/improve --pr_code_suggestions.commitable_code_suggestions=true",
]
@ -192,7 +207,8 @@ pr_commands = [
url = ""
[litellm]
#use_client = false
# use_client = false
# drop_params = false
[pr_similar_issue]
skip_comments = false

View File

@ -44,6 +44,7 @@ default = [
'ss',
'svg',
'tar',
'tgz',
'tsv',
'ttf',
'war',

View File

@ -1,8 +1,9 @@
[pr_code_suggestions_prompt]
system="""You are PR-Reviewer, a language model that specializes in suggesting code improvements for a Pull Request (PR).
Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR diff (lines starting with '+').
system="""You are PR-Reviewer, a language model that specializes in suggesting ways to improve for a Pull Request (PR) code.
Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR diff.
Example for the PR Diff format:
The format we will use to present the PR code diff:
======
## file: 'src/file1.py'
@ -26,22 +27,26 @@ __old hunk__
## file: 'src/file2.py'
...
======
- In this format, we separated each hunk of code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code that was removed.
- Code lines are prefixed symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code.
- We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference.
Specific instructions:
Specific instructions for generating code suggestions:
- Provide up to {{ num_code_suggestions }} code suggestions. The suggestions should be diverse and insightful.
- The suggestions should refer only to code from the '__new hunk__' sections, and focus on new lines of code (lines starting with '+').
- Prioritize suggestions that address major problems, issues and bugs in the PR code. As a second priority, suggestions should focus on enhancement, best practice, performance, maintainability, and other aspects.
- The suggestions should focus on ways to improve the new code in the PR, meaning focusing on lines from '__new hunk__' sections, starting with '+'. Use the '__old hunk__' sections to understand the context of the code changes.
- Prioritize suggestions that address possible issues, major problems, and bugs in the PR code.
- Don't suggest to add docstring, type hints, or comments, or to remove unused imports.
- Suggestions should not repeat code already present in the '__new hunk__' sections.
- Provide the exact line numbers range (inclusive) for each suggestion.
- Provide the exact line numbers range (inclusive) for each suggestion. Use the line numbers from the '__new hunk__' sections.
- When quoting variables or names from the code, use backticks (`) instead of single quote (').
- Take into account that you are reviewing a PR code diff, and that the entire codebase is not available for you as context. Hence, avoid suggestions that might conflict with unseen parts of the codebase.
{%- if extra_instructions %}
Extra instructions from the user:
Extra instructions from the user, that should be taken into account with high priority:
======
{{ extra_instructions }}
======
@ -54,17 +59,12 @@ class CodeSuggestion(BaseModel):
relevant_file: str = Field(description="the relevant file full path")
language: str = Field(description="the code language of the relevant file")
suggestion_content: str = Field(description="an actionable suggestion for meaningfully improving the new code introduced in the PR")
{%- if summarize_mode %}
existing_code: str = Field(description="a short code snippet from a '__new hunk__' section to illustrate the relevant existing code. Don't show the line numbers.")
improved_code: str = Field(description="a short code snippet to illustrate the improved code, after applying the suggestion.")
one_sentence_summary:str = Field(description="a short summary of the suggestion action, in a single sentence. Focus on the 'what'. Be general, and avoid method or variable names.")
{%- else %}
existing_code: str = Field(description="a code snippet, demonstrating the relevant code lines from a '__new hunk__' section. It must be contiguous, correctly formatted and indented, and without line numbers")
improved_code: str = Field(description="a new code snippet, that can be used to replace the relevant lines in '__new hunk__' code. Replacement suggestions should be complete, correctly formatted and indented, and without line numbers")
{%- endif %}
existing_code: str = Field(description="a short code snippet, demonstrating the relevant code lines from a '__new hunk__' section. It must be without line numbers. Use abbreviations if needed")
improved_code: str = Field(description="a new code snippet, that can be used to replace the relevant 'existing_code' lines in '__new hunk__' code after applying the suggestion")
one_sentence_summary: str = Field(description="a short summary of the suggestion action, in a single sentence. Focus on the 'what'. Be general, and avoid method or variable names.")
relevant_lines_start: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion starts (inclusive). Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above")
relevant_lines_end: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion ends (inclusive). Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above")
label: str = Field(description="a single label for the suggestion, to help the user understand the suggestion type. For example: 'security', 'bug', 'performance', 'enhancement', 'possible issue', 'best practice', 'maintainability', etc. Other labels are also allowed")
label: str = Field(description="a single label for the suggestion, to help the user understand the suggestion type. For example: 'security', 'possible bug', 'possible issue', 'performance', 'enhancement', 'best practice', 'maintainability', etc. Other labels are also allowed")
class PRCodeSuggestions(BaseModel):
code_suggestions: List[CodeSuggestion]
@ -80,7 +80,6 @@ code_suggestions:
python
suggestion_content: |
...
{%- if summarize_mode %}
existing_code: |
...
improved_code: |
@ -89,14 +88,6 @@ code_suggestions:
...
relevant_lines_start: 12
relevant_lines_end: 13
{%- else %}
existing_code: |
...
relevant_lines_start: 12
relevant_lines_end: 13
improved_code: |
...
{%- endif %}
label: |
...
```

View File

@ -0,0 +1,88 @@
[pr_code_suggestions_reflect_prompt]
system="""You are a language model that specializes in reviewing and evaluating suggestions for a Pull Request (PR) code.
Your input is a PR code, and a list of code suggestions that were generated for the PR.
Your goal is to inspect, review and score the suggestsions.
Be aware - the suggestions may not always be correct or accurate, and you should evaluate them in relation to the actual PR code diff presented. Sometimes the suggestion may ignore parts of the actual code diff, and in that case, you should give it a score of 0.
Specific instructions:
- Carefully review both the suggestion content, and the related PR code diff. Mistakes in the suggestions can occur. Make sure the suggestions are correct, and properly derived from the PR code diff.
- In addition to the exact code lines mentioned in each suggestion, review the code around them, to ensure that the suggestions are contextually accurate.
- Also check that the 'existing_code' and 'improved_code' fields correctly reflect the suggested changes.
- Make sure the suggestions focus on new code introduced in the PR, and not on existing code that was not changed.
- High scores (8 to 10) should be given to correct suggestions that address major bugs and issues, or security concerns. Lower scores (3 to 7) should be for correct suggestions addressing minor issues, code style, code readability, maintainability, etc. Don't give high scores to suggestions that are not crucial, and bring only small improvement or optimization.
- Order the feedback the same way the suggestions are ordered in the input.
The format that is used to present the PR code diff is as follows:
======
## file: 'src/file1.py'
@@ ... @@ def func1():
__new hunk__
12 code line1 that remained unchanged in the PR
13 +new hunk code line2 added in the PR
14 code line3 that remained unchanged in the PR
__old hunk__
code line1 that remained unchanged in the PR
-old hunk code line2 that was removed in the PR
code line3 that remained unchanged in the PR
@@ ... @@ def func2():
__new hunk__
...
__old hunk__
...
## file: 'src/file2.py'
...
======
- In this format, we separated each hunk of code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code that was removed.
- Code lines are prefixed symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code.
- We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference.
The output must be a YAML object equivalent to type $PRCodeSuggestionsFeedback, according to the following Pydantic definitions:
=====
class CodeSuggestionFeedback(BaseModel):
suggestion_summary: str = Field(description="repeated from the input")
relevant_file: str = Field(description="repeated from the input")
suggestion_score: int = Field(description="The actual output - the score of the suggestion, from 0 to 10. Give 0 if the suggestion is plain wrong. Otherwise, give a score from 1 to 10 (inclusive), where 1 is the lowest and 10 is the highest.")
why: str = Field(description="Short and concise explanation of why the suggestion received the score (one to two sentences).")
class PRCodeSuggestionsFeedback(BaseModel):
code_suggestions: List[CodeSuggestionFeedback]
=====
Example output:
```yaml
code_suggestions:
- suggestion_content: |
Use a more descriptive variable name here
relevant_file: "src/file1.py"
suggestion_score: 6
why: |
The variable name 't' is not descriptive enough
```
Each YAML output MUST be after a newline, indented, with block scalar indicator ('|').
"""
user="""You are given a Pull Request (PR) code diff:
======
{{ diff|trim }}
======
And here is a list of corresponding {{ num_code_suggestions }} code suggestions to improve this Pull Request code:
======
{{ suggestion_str|trim }}
======
Response (should be a valid YAML, and nothing else):
```yaml
"""

View File

@ -0,0 +1,68 @@
[pr_evaluate_prompt]
prompt="""\
You are the PR-task-evaluator, a language model that compares and ranks the quality of two responses provided in response to a lengthy task regarding a Pull Request (PR) code diff.
The task to be evaluated is:
***** Start of Task *****
{{pr_task|trim}}
***** End of Task *****
Response 1 to the task is:
***** Start of Response 1 *****
{{pr_response1|trim}}
***** End of Response 1 *****
Response 2 to the task is:
***** Start of Response 2 *****
{{pr_response2|trim}}
***** End of Response 2 *****
Guidelines to evaluate the responses:
- Thoroughly read the 'Task' part. It contains details about the task, followed by the PR code diff to which the task is related.
- Thoroughly read 'Response1' and 'Response2' parts. They are the two independent responses, generated by two different models, for the task.
After that, rank each response. Criterions to rank each response:
- How well does the response follow the specific task instructions and requirements?
- How well does the response analyze and understand the PR code diff?
- How well will a person perceive it as a good response that correctly addresses the task?
- How well does the reponse prioritize key feedback, related to the task instructions, that a human reader seeing that feedback would also consider as important?
- Don't neccessarily rank higher a response that is longer. A shorter response might be better if it is more concise, and still addresses the task better.
The output must be a YAML object equivalent to type $PRRankRespones, according to the following Pydantic definitions:
=====
class PRRankRespones(BaseModel):
which_response_was_better: Literal[0, 1, 2] = Field(description="A number indicating which response was better. 0 means both responses are equally good.")
why: str = Field(description="In a short and concise manner, explain why the chosen response is better than the other. Be specific and give examples if relevant.")
score_response1: int = Field(description="A score between 1 and 10, indicating the quality of the response1, based on the criterions mentioned in the prompt.")
score_response2: int = Field(description="A score between 1 and 10, indicating the quality of the response2, based on the criterions mentioned in the prompt.")
=====
Example output:
```yaml
which_response_was_better: "X"
why: "Response X is better because it is more practical, and addresses the task requirements better since ..."
score_response1: ...
score_response2: ...
```
Response (should be a valid YAML, and nothing else):
```yaml
"""

View File

@ -49,6 +49,12 @@ Extra instructions from the user:
The output must be a YAML object equivalent to type $PRReview, according to the following Pydantic definitions:
=====
{%- if require_can_be_split_review %}
class SubPR(BaseModel):
relevant_files: List[str] = Field(description="The relevant files of the sub-PR")
title: str = Field(description="Short and concise title for an independent and meaningful sub-PR, composed only from the relevant files")
{%- endif %}
class Review(BaseModel):
{%- if require_estimate_effort_to_review %}
estimated_effort_to_review_[1-5]: str = Field(description="Estimate, on a scale of 1-5 (inclusive), the time and effort required to review this PR by an experienced and knowledgeable developer. 1 means short and easy review , 5 means long and hard review. Take into account the size, complexity, quality, and the needed changes of the PR code diff. Explain your answer in a short and concise manner.")
@ -61,12 +67,12 @@ class Review(BaseModel):
{%- endif %}
{%- if question_str %}
insights_from_user_answers: str = Field(description="shortly summarize the insights you gained from the user's answers to the questions")
{%- endif %}
{%- if require_focused %}
focused_pr: str = Field(description="Is this a focused PR, in the sense that all the PR code diff changes are united under a single focused theme ? If the theme is too broad, or the PR code diff changes are too scattered, then the PR is not focused. Explain your answer shortly.")
{%- endif %}
possible_issues: str = Field(description="Does this PR code introduce clear issues, bugs, or major performance concerns? If there are no apparent issues, respond with 'No'. If there are any issues, describe them briefly. Use bullet points if more than one issue. Be specific, and provide examples if possible. Start each bullet point with a short specific header, such as: "- Possible Bug: ...", etc.")
security_concerns: str = Field(description="does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible")
{%- if require_can_be_split_review %}
can_be_split: List[SubPR] = Field(min_items=0, max_items=3, description="Can this PR, which contains {{ num_pr_files }} changed files in total, be divided into smaller sub-PRs with distinct tasks that can be reviewed and merged independently, regardless of the order ? Make sure that the sub-PRs are indeed independent, with no code dependencies between them, and that each sub-PR represent a meaningfull independent task. Output an empty list if the PR code does not needd to be split.")
{%- endif %}
{%- if num_code_suggestions > 0 %}
class CodeSuggestion(BaseModel):
@ -100,14 +106,18 @@ review:
{%- endif %}
relevant_tests: |
No
{%- if require_focused %}
focused_pr: |
no, because ...
{%- endif %}
possible_issues: |
No
security_concerns: |
No
{%- if require_can_be_split_review %}
can_be_split: |
- relevant_files:
- ...
- ...
title: ...
- ...
{%- endif %}
{%- if num_code_suggestions > 0 %}
code_feedback
- relevant_file: |

View File

@ -26,6 +26,8 @@ class PRAddDocs:
)
self.ai_handler = ai_handler()
self.ai_handler.main_pr_language = self.main_language
self.patches_diff = None
self.prediction = None
self.cli_mode = cli_mode

View File

@ -9,7 +9,7 @@ from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
from pr_agent.algo.pr_processing import get_pr_diff, get_pr_multi_diffs, retry_with_fallback_models
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import load_yaml, replace_code_tags, ModelType
from pr_agent.algo.utils import load_yaml, replace_code_tags, ModelType, show_relevant_configurations
from pr_agent.config_loader import get_settings
from pr_agent.git_providers import get_git_provider
from pr_agent.git_providers.git_provider import get_main_pr_language
@ -46,6 +46,7 @@ class PRCodeSuggestions:
num_code_suggestions = get_settings().pr_code_suggestions.num_code_suggestions
self.ai_handler = ai_handler()
self.ai_handler.main_pr_language = self.main_language
self.patches_diff = None
self.prediction = None
self.cli_mode = cli_mode
@ -56,7 +57,6 @@ class PRCodeSuggestions:
"language": self.main_language,
"diff": "", # empty diff for initial calculation
"num_code_suggestions": num_code_suggestions,
"summarize_mode": get_settings().pr_code_suggestions.summarize,
"extra_instructions": get_settings().pr_code_suggestions.extra_instructions,
"commit_messages_str": self.git_provider.get_commit_messages(),
}
@ -75,21 +75,22 @@ class PRCodeSuggestions:
relevant_configs = {'pr_code_suggestions': dict(get_settings().pr_code_suggestions),
'config': dict(get_settings().config)}
get_logger().debug("Relevant configs", artifacts=relevant_configs)
if get_settings().config.publish_output:
if get_settings().config.publish_output and get_settings().config.publish_output_progress:
if self.git_provider.is_supported("gfm_markdown"):
self.progress_response = self.git_provider.publish_comment(self.progress)
else:
self.git_provider.publish_comment("Preparing suggestions...", is_temporary=True)
if not self.is_extended:
await retry_with_fallback_models(self._prepare_prediction, ModelType.TURBO)
data = self._prepare_pr_code_suggestions()
data = await retry_with_fallback_models(self._prepare_prediction)
else:
data = await retry_with_fallback_models(self._prepare_prediction_extended, ModelType.TURBO)
data = await retry_with_fallback_models(self._prepare_prediction_extended)
if not data:
data = {"code_suggestions": []}
if (not data) or (not 'code_suggestions' in data) or (not data['code_suggestions']):
if data is None or 'code_suggestions' not in data or not data['code_suggestions']:
get_logger().error('No code suggestions found for PR.')
pr_body = "## PR Code Suggestions\n\nNo code suggestions found for PR."
pr_body = "## PR Code Suggestions\n\nNo code suggestions found for PR."
get_logger().debug(f"PR output", artifact=pr_body)
if self.progress_response:
self.git_provider.edit_comment(self.progress_response, body=pr_body)
@ -104,7 +105,8 @@ class PRCodeSuggestions:
if get_settings().config.publish_output:
self.git_provider.remove_initial_comment()
if get_settings().pr_code_suggestions.summarize and self.git_provider.is_supported("gfm_markdown"):
if ((not get_settings().pr_code_suggestions.commitable_code_suggestions) and
self.git_provider.is_supported("gfm_markdown")):
# generate summarized suggestions
pr_body = self.generate_summarized_suggestions(data)
@ -112,14 +114,29 @@ class PRCodeSuggestions:
# add usage guide
if get_settings().pr_code_suggestions.enable_help_text:
pr_body += "<hr>\n\n<details> <summary><strong>✨ Improve tool usage guide:</strong></summary><hr> \n\n"
pr_body += "<hr>\n\n<details> <summary><strong>💡 Tool usage guide:</strong></summary><hr> \n\n"
pr_body += HelpMessage.get_improve_usage_guide()
pr_body += "\n</details>\n"
if self.progress_response:
self.git_provider.edit_comment(self.progress_response, body=pr_body)
# Output the relevant configurations if enabled
if get_settings().get('config', {}).get('output_relevant_configurations', False):
pr_body += show_relevant_configurations(relevant_section='pr_code_suggestions')
if get_settings().pr_code_suggestions.persistent_comment:
final_update_message = False
self.git_provider.publish_persistent_comment(pr_body,
initial_header="## PR Code Suggestions ✨",
update_header=True,
name="suggestions",
final_update_message=final_update_message, )
if self.progress_response:
self.progress_response.delete()
else:
self.git_provider.publish_comment(pr_body)
if self.progress_response:
self.git_provider.edit_comment(self.progress_response, body=pr_body)
else:
self.git_provider.publish_comment(pr_body)
else:
self.push_inline_code_suggestions(data)
@ -136,7 +153,7 @@ class PRCodeSuggestions:
except Exception as e:
pass
async def _prepare_prediction(self, model: str):
async def _prepare_prediction(self, model: str) -> dict:
self.patches_diff = get_pr_diff(self.git_provider,
self.token_handler,
model,
@ -150,7 +167,10 @@ class PRCodeSuggestions:
get_logger().error(f"Error getting PR diff")
self.prediction = None
async def _get_prediction(self, model: str, patches_diff: str):
data = self.prediction
return data
async def _get_prediction(self, model: str, patches_diff: str) -> dict:
variables = copy.deepcopy(self.vars)
variables["diff"] = patches_diff # update diff
environment = Environment(undefined=StrictUndefined)
@ -159,7 +179,34 @@ class PRCodeSuggestions:
response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
system=system_prompt, user=user_prompt)
return response
# load suggestions from the AI response
data = self._prepare_pr_code_suggestions(response)
# self-reflect on suggestions
if get_settings().pr_code_suggestions.self_reflect_on_suggestions:
model = get_settings().config.model_turbo # use turbo model for self-reflection, since it is an easier task
response_reflect = await self.self_reflect_on_suggestions(data["code_suggestions"], patches_diff, model=model)
if response_reflect:
response_reflect_yaml = load_yaml(response_reflect)
code_suggestions_feedback = response_reflect_yaml["code_suggestions"]
if len(code_suggestions_feedback) == len(data["code_suggestions"]):
for i, suggestion in enumerate(data["code_suggestions"]):
try:
suggestion["score"] = code_suggestions_feedback[i]["suggestion_score"]
suggestion["score_why"] = code_suggestions_feedback[i]["why"]
except Exception as e: #
get_logger().error(f"Error processing suggestion score {i}",
artifact={"suggestion": suggestion,
"code_suggestions_feedback": code_suggestions_feedback[i]})
suggestion["score"] = 7
suggestion["score_why"] = ""
else:
# get_logger().error(f"Could not self-reflect on suggestions. using default score 7")
for i, suggestion in enumerate(data["code_suggestions"]):
suggestion["score"] = 7
suggestion["score_why"] = ""
return data
@staticmethod
def _truncate_if_needed(suggestion):
@ -173,19 +220,19 @@ class PRCodeSuggestions:
f"characters to {max_code_suggestion_length} characters")
return suggestion
def _prepare_pr_code_suggestions(self) -> Dict:
review = self.prediction.strip()
data = load_yaml(review,
def _prepare_pr_code_suggestions(self, predictions: str) -> Dict:
data = load_yaml(predictions.strip(),
keys_fix_yaml=["relevant_file", "suggestion_content", "existing_code", "improved_code"])
if isinstance(data, list):
data = {'code_suggestions': data}
# remove invalid suggestions
# remove or edit invalid suggestions
suggestion_list = []
one_sentence_summary_list = []
for i, suggestion in enumerate(data['code_suggestions']):
if get_settings().pr_code_suggestions.summarize:
if not suggestion or 'one_sentence_summary' not in suggestion or 'label' not in suggestion or 'relevant_file' not in suggestion:
try:
if (not suggestion or 'one_sentence_summary' not in suggestion or
'label' not in suggestion or 'relevant_file' not in suggestion):
get_logger().debug(f"Skipping suggestion {i + 1}, because it is invalid: {suggestion}")
continue
@ -193,15 +240,26 @@ class PRCodeSuggestions:
get_logger().debug(f"Skipping suggestion {i + 1}, because it is a duplicate: {suggestion}")
continue
if ('existing_code' in suggestion) and ('improved_code' in suggestion) and (
suggestion['existing_code'] != suggestion['improved_code']):
suggestion = self._truncate_if_needed(suggestion)
if get_settings().pr_code_suggestions.summarize:
if 'const' in suggestion['suggestion_content'] and 'instead' in suggestion['suggestion_content'] and 'let' in suggestion['suggestion_content']:
get_logger().debug(f"Skipping suggestion {i + 1}, because it uses 'const instead let': {suggestion}")
continue
if ('existing_code' in suggestion) and ('improved_code' in suggestion):
if suggestion['existing_code'] == suggestion['improved_code']:
get_logger().debug(
f"edited improved suggestion {i + 1}, because equal to existing code: {suggestion['existing_code']}")
if get_settings().pr_code_suggestions.commitable_code_suggestions:
suggestion['improved_code'] = "" # we need 'existing_code' to locate the code in the PR
else:
suggestion['existing_code'] = ""
suggestion = self._truncate_if_needed(suggestion)
one_sentence_summary_list.append(suggestion['one_sentence_summary'])
suggestion_list.append(suggestion)
else:
get_logger().debug(
f"Skipping suggestion {i + 1}, because existing code is equal to improved code {suggestion['existing_code']}")
suggestion_list.append(suggestion)
else:
get_logger().info(
f"Skipping suggestion {i + 1}, because it does not contain 'existing_code' or 'improved_code': {suggestion}")
except Exception as e:
get_logger().error(f"Error processing suggestion {i + 1}: {suggestion}, error: {e}")
data['code_suggestions'] = suggestion_list
return data
@ -228,7 +286,10 @@ class PRCodeSuggestions:
if new_code_snippet:
new_code_snippet = self.dedent_code(relevant_file, relevant_lines_start, new_code_snippet)
body = f"**Suggestion:** {content} [{label}]\n```suggestion\n" + new_code_snippet + "\n```"
if d.get('score'):
body = f"**Suggestion:** {content} [{label}, importance: {d.get('score')}]\n```suggestion\n" + new_code_snippet + "\n```"
else:
body = f"**Suggestion:** {content} [{label}]\n```suggestion\n" + new_code_snippet + "\n```"
code_suggestions.append({'body': body, 'relevant_file': relevant_file,
'relevant_lines_start': relevant_lines_start,
'relevant_lines_end': relevant_lines_end})
@ -277,7 +338,8 @@ class PRCodeSuggestions:
self.patches_diff_list = get_pr_multi_diffs(self.git_provider, self.token_handler, model,
max_calls=get_settings().pr_code_suggestions.max_number_of_calls)
if self.patches_diff_list:
get_logger().debug(f"PR diff", artifact=self.patches_diff_list)
get_logger().info(f"Number of PR chunk calls: {len(self.patches_diff_list)}")
get_logger().debug(f"PR diff:", artifact=self.patches_diff_list)
# parallelize calls to AI:
if get_settings().pr_code_suggestions.parallel_calls:
@ -290,14 +352,24 @@ class PRCodeSuggestions:
prediction = await self._get_prediction(model, patches_diff)
prediction_list.append(prediction)
data = {}
for prediction in prediction_list:
self.prediction = prediction
data_per_chunk = self._prepare_pr_code_suggestions()
if "code_suggestions" in data:
data["code_suggestions"].extend(data_per_chunk["code_suggestions"])
else:
data.update(data_per_chunk)
data = {"code_suggestions": []}
for j, predictions in enumerate(prediction_list): # each call adds an element to the list
if "code_suggestions" in predictions:
score_threshold = max(1, get_settings().pr_code_suggestions.suggestions_score_threshold)
for i, prediction in enumerate(predictions["code_suggestions"]):
try:
if get_settings().pr_code_suggestions.self_reflect_on_suggestions:
score = int(prediction["score"])
if score >= score_threshold:
data["code_suggestions"].append(prediction)
else:
get_logger().info(
f"Removing suggestions {i} from call {j}, because score is {score}, and score_threshold is {score_threshold}",
artifact=prediction)
else:
data["code_suggestions"].append(prediction)
except Exception as e:
get_logger().error(f"Error getting PR diff for suggestion {i} in call {j}, error: {e}")
self.data = data
else:
get_logger().error(f"Error getting PR diff")
@ -363,7 +435,7 @@ class PRCodeSuggestions:
def generate_summarized_suggestions(self, data: Dict) -> str:
try:
pr_body = "## PR Code Suggestions\n\n"
pr_body = "## PR Code Suggestions\n\n"
if len(data.get('code_suggestions', [])) == 0:
pr_body += "No suggestions found to improve this PR."
@ -375,13 +447,16 @@ class PRCodeSuggestions:
for ext in extensions:
extension_to_language[ext] = language
pr_body = "## PR Code Suggestions\n\n"
pr_body = "## PR Code Suggestions\n\n"
pr_body += "<table>"
header = f"Suggestions"
delta = 76
header = f"Suggestion"
delta = 66
header += "&nbsp; " * delta
pr_body += f"""<thead><tr><td>Category</td><td align=left>{header}</td></tr></thead>"""
if get_settings().pr_code_suggestions.self_reflect_on_suggestions:
pr_body += f"""<thead><tr><td>Category</td><td align=left>{header}</td><td align=center>Score</td></tr>"""
else:
pr_body += f"""<thead><tr><td>Category</td><td align=left>{header}</td></tr>"""
pr_body += """<tbody>"""
suggestions_labels = dict()
# add all suggestions related to each label
@ -391,13 +466,17 @@ class PRCodeSuggestions:
suggestions_labels[label] = []
suggestions_labels[label].append(suggestion)
# sort suggestions_labels by the suggestion with the highest score
if get_settings().pr_code_suggestions.self_reflect_on_suggestions:
suggestions_labels = dict(sorted(suggestions_labels.items(), key=lambda x: max([s['score'] for s in x[1]]), reverse=True))
# sort the suggestions inside each label group by score
for label, suggestions in suggestions_labels.items():
suggestions_labels[label] = sorted(suggestions, key=lambda x: x['score'], reverse=True)
for label, suggestions in suggestions_labels.items():
num_suggestions=len(suggestions)
# pr_body += f"""<tr><td><strong>{label}</strong></td>"""
pr_body += f"""<tr><td rowspan={num_suggestions}><strong>{label.capitalize()}</strong></td>\n"""
# pr_body += f"""<td>"""
# pr_body += f"""<details><summary>{len(suggestions)} suggestions</summary>"""
# pr_body += f"""<table>"""
for i, suggestion in enumerate(suggestions):
relevant_file = suggestion['relevant_file'].strip()
@ -408,8 +487,12 @@ class PRCodeSuggestions:
range_str = f"[{relevant_lines_start}]"
else:
range_str = f"[{relevant_lines_start}-{relevant_lines_end}]"
code_snippet_link = self.git_provider.get_line_link(relevant_file, relevant_lines_start,
relevant_lines_end)
try:
code_snippet_link = self.git_provider.get_line_link(relevant_file, relevant_lines_start,
relevant_lines_end)
except:
code_snippet_link = ""
# add html table for each suggestion
suggestion_content = suggestion['suggestion_content'].rstrip().rstrip()
@ -430,12 +513,11 @@ class PRCodeSuggestions:
pr_body += f"""<td>\n\n"""
else:
pr_body += f"""<tr><td>\n\n"""
suggestion_summary = suggestion['one_sentence_summary'].strip()
suggestion_summary = suggestion['one_sentence_summary'].strip().rstrip('.')
if '`' in suggestion_summary:
suggestion_summary = replace_code_tags(suggestion_summary)
# suggestion_summary = suggestion_summary + max((77-len(suggestion_summary)), 0)*"&nbsp;"
pr_body += f"""\n\n<details><summary>{suggestion_summary}</summary>\n\n___\n\n"""
pr_body += f"""\n\n<details><summary>{suggestion_summary}</summary>\n\n___\n\n"""
pr_body += f"""
**{suggestion_content}**
@ -443,14 +525,50 @@ class PRCodeSuggestions:
{example_code}
"""
if get_settings().pr_code_suggestions.self_reflect_on_suggestions:
pr_body +=f"\n\n<details><summary><b>Suggestion importance[1-10]: {suggestion['score']}</b></summary>\n\n"
pr_body += f"Why: {suggestion['score_why']}\n\n"
pr_body += f"</details>"
pr_body += f"</details>"
# # add another column for 'score'
if get_settings().pr_code_suggestions.self_reflect_on_suggestions:
pr_body += f"</td><td align=center>{suggestion['score']}\n\n"
pr_body += f"</td></tr>"
# pr_body += "</details>"
pr_body += """</td></tr>"""
# pr_body += """</td></tr>"""
pr_body += """</tr></tbody></table>"""
return pr_body
except Exception as e:
get_logger().info(f"Failed to publish summarized code suggestions, error: {e}")
return ""
async def self_reflect_on_suggestions(self, suggestion_list: List, patches_diff: str, model: str) -> str:
if not suggestion_list:
return ""
try:
suggestion_str = ""
for i, suggestion in enumerate(suggestion_list):
suggestion_str += f"suggestion {i + 1}: " + str(suggestion) + '\n\n'
variables = {'suggestion_list': suggestion_list,
'suggestion_str': suggestion_str,
"diff": patches_diff,
'num_code_suggestions': len(suggestion_list)}
environment = Environment(undefined=StrictUndefined)
system_prompt_reflect = environment.from_string(get_settings().pr_code_suggestions_reflect_prompt.system).render(
variables)
user_prompt_reflect = environment.from_string(get_settings().pr_code_suggestions_reflect_prompt.user).render(variables)
with get_logger().contextualize(command="self_reflect_on_suggestions"):
response_reflect, finish_reason_reflect = await self.ai_handler.chat_completion(model=model,
system=system_prompt_reflect,
user=user_prompt_reflect)
except Exception as e:
get_logger().info(f"Could not reflect on suggestions, error: {e}")
return ""
return response_reflect

View File

@ -9,7 +9,7 @@ from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import load_yaml, set_custom_labels, get_user_labels, ModelType
from pr_agent.algo.utils import load_yaml, set_custom_labels, get_user_labels, ModelType, show_relevant_configurations
from pr_agent.config_loader import get_settings
from pr_agent.git_providers import get_git_provider
from pr_agent.git_providers.git_provider import get_main_pr_language
@ -41,6 +41,7 @@ class PRDescription:
# Initialize the AI handler
self.ai_handler = ai_handler()
self.ai_handler.main_pr_language = self.main_pr_language
# Initialize the variables dictionary
@ -81,7 +82,7 @@ class PRDescription:
if get_settings().config.publish_output:
self.git_provider.publish_comment("Preparing PR description...", is_temporary=True)
await retry_with_fallback_models(self._prepare_prediction, ModelType.TURBO) # turbo model because larger context
await retry_with_fallback_models(self._prepare_prediction, ModelType.TURBO)
if self.prediction:
self._prepare_data()
@ -112,9 +113,13 @@ class PRDescription:
pr_body += HelpMessage.get_describe_usage_guide()
pr_body += "\n</details>\n"
elif get_settings().pr_description.enable_help_comment:
pr_body += "\n\n___\n\n> **PR-Agent usage**:"
pr_body += "\n\n___\n\n> 💡 **PR-Agent usage**:"
pr_body += "\n>Comment `/help` on the PR to get a list of all available PR-Agent tools and their descriptions\n\n"
# Output the relevant configurations if enabled
if get_settings().get('config', {}).get('output_relevant_configurations', False):
pr_body += show_relevant_configurations(relevant_section='pr_description')
if get_settings().config.publish_output:
# publish labels
if get_settings().pr_description.publish_labels and self.git_provider.is_supported("get_labels"):
@ -131,7 +136,14 @@ class PRDescription:
# publish description
if get_settings().pr_description.publish_description_as_comment:
full_markdown_description = f"## Title\n\n{pr_title}\n\n___\n{pr_body}"
self.git_provider.publish_comment(full_markdown_description)
if get_settings().pr_description.publish_description_as_comment_persistent:
self.git_provider.publish_persistent_comment(full_markdown_description,
initial_header="## Title",
update_header=True,
name="describe",
final_update_message=False, )
else:
self.git_provider.publish_comment(full_markdown_description)
else:
self.git_provider.publish_description(pr_title, pr_body)
@ -293,7 +305,7 @@ class PRDescription:
# Remove the 'PR Title' key from the dictionary
ai_title = self.data.pop('title', self.vars["title"])
if get_settings().pr_description.keep_original_user_title:
if (not get_settings().pr_description.generate_ai_title):
# Assign the original PR title to the 'title' variable
title = self.vars["title"]
else:
@ -309,7 +321,11 @@ class PRDescription:
value = self.file_label_dict
else:
key_publish = key.rstrip(':').replace("_", " ").capitalize()
pr_body += f"## **{key_publish}**\n"
if key_publish== "Type":
key_publish = "PR Type"
# elif key_publish == "Description":
# key_publish = "PR Description"
pr_body += f"### **{key_publish}**\n"
if 'walkthrough' in key.lower():
if self.git_provider.is_supported("gfm_markdown"):
pr_body += "<details> <summary>files:</summary>\n\n"
@ -321,7 +337,7 @@ class PRDescription:
pr_body += "</details>\n"
elif 'pr_files' in key.lower():
changes_walkthrough, pr_file_changes = self.process_pr_files_prediction(changes_walkthrough, value)
changes_walkthrough = f"## **Changes walkthrough**\n{changes_walkthrough}"
changes_walkthrough = f"### **Changes walkthrough** 📝\n{changes_walkthrough}"
else:
# if the value is a list, join its items by comma
if isinstance(value, list):

View File

@ -35,7 +35,8 @@ class PRGenerateLabels:
# Initialize the AI handler
self.ai_handler = ai_handler()
self.ai_handler.main_pr_language = self.main_pr_language
# Initialize the variables dictionary
self.vars = {
"title": self.git_provider.pr.title,

View File

@ -18,8 +18,8 @@ class PRHelpMessage:
relevant_configs = {'pr_help': dict(get_settings().pr_help),
'config': dict(get_settings().config)}
get_logger().debug("Relevant configs", artifacts=relevant_configs)
pr_comment = "## PR Agent Walkthrough\n\n"
pr_comment += "🤖 Welcome to the PR Agent, an AI-powered tool for automated pull request analysis, feedback, suggestions and more."""
pr_comment = "## PR Agent Walkthrough 🤖\n\n"
pr_comment += "Welcome to the PR Agent, an AI-powered tool for automated pull request analysis, feedback, suggestions and more."""
pr_comment += "\n\nHere is a list of tools you can use to interact with the PR Agent:\n"
base_path = "https://pr-agent-docs.codium.ai/tools"
@ -27,51 +27,58 @@ class PRHelpMessage:
tool_names.append(f"[DESCRIBE]({base_path}/describe/)")
tool_names.append(f"[REVIEW]({base_path}/review/)")
tool_names.append(f"[IMPROVE]({base_path}/improve/)")
tool_names.append(f"[ANALYZE]({base_path}/analyze/) 💎")
tool_names.append(f"[UPDATE CHANGELOG]({base_path}/update_changelog/)")
tool_names.append(f"[ADD DOCUMENTATION]({base_path}/documentation/) 💎")
tool_names.append(f"[ASK]({base_path}/ask/)")
tool_names.append(f"[GENERATE CUSTOM LABELS]({base_path}/custom_labels/)")
tool_names.append(f"[ADD DOCS]({base_path}/documentation/) 💎")
tool_names.append(f"[TEST]({base_path}/test/) 💎")
tool_names.append(f"[IMPROVE COMPONENT]({base_path}/improve_component/) 💎")
tool_names.append(f"[ANALYZE]({base_path}/analyze/) 💎")
tool_names.append(f"[ASK]({base_path}/ask/)")
tool_names.append(f"[GENERATE CUSTOM LABELS]({base_path}/custom_labels/) 💎")
tool_names.append(f"[CI FEEDBACK]({base_path}/ci_feedback/) 💎")
tool_names.append(f"[CUSTOM SUGGESTIONS]({base_path}/custom_suggestions/) 💎")
tool_names.append(f"[CUSTOM PROMPT]({base_path}/custom_prompt/) 💎")
tool_names.append(f"[SIMILAR ISSUE]({base_path}/similar_issues/)")
descriptions = []
descriptions.append("Generates PR description - title, type, summary, code walkthrough and labels")
descriptions.append("Adjustable feedback about the PR, possible issues, security concerns, review effort and more")
descriptions.append("Code suggestions for improving the PR.")
descriptions.append("Identifies code components that changed in the PR, and enables to interactively generate tests, docs, and code suggestions for each component.")
descriptions.append("Automatically updates the changelog.")
descriptions.append("Generates documentation to methods/functions/classes that changed in the PR.")
descriptions.append("Answering free-text questions about the PR.")
descriptions.append("Code suggestions for improving the PR")
descriptions.append("Automatically updates the changelog")
descriptions.append("Generates documentation to methods/functions/classes that changed in the PR")
descriptions.append("Generates unit tests for a specific component, based on the PR code change")
descriptions.append("Code suggestions for a specific component that changed in the PR")
descriptions.append("Identifies code components that changed in the PR, and enables to interactively generate tests, docs, and code suggestions for each component")
descriptions.append("Answering free-text questions about the PR")
descriptions.append("Generates custom labels for the PR, based on specific guidelines defined by the user")
descriptions.append("Generates unit tests for a specific component, based on the PR code change.")
descriptions.append("Generates feedback and analysis for a failed CI job.")
descriptions.append("Generates custom suggestions for improving the PR code, based on specific guidelines defined by the user.")
descriptions.append("Automatically retrieves and presents similar issues.")
descriptions.append("Generates feedback and analysis for a failed CI job")
descriptions.append("Generates custom suggestions for improving the PR code, derived only from a specific guidelines prompt defined by the user")
descriptions.append("Automatically retrieves and presents similar issues")
commands =[]
commands.append("`/describe`")
commands.append("`/review`")
commands.append("`/improve`")
commands.append("`/analyze`")
commands.append("`/update_changelog`")
commands.append("`/add_docs`")
commands.append("`/test`")
commands.append("`/improve_component`")
commands.append("`/analyze`")
commands.append("`/ask`")
commands.append("`/generate_labels`")
commands.append("`/test`")
commands.append("`/checks`")
commands.append("`/custom_suggestions`")
commands.append("`/custom_prompt`")
commands.append("`/similar_issue`")
checkbox_list = []
checkbox_list.append(" - [ ] Run <!-- /describe -->")
checkbox_list.append(" - [ ] Run <!-- /review -->")
checkbox_list.append(" - [ ] Run <!-- /improve -->")
checkbox_list.append(" - [ ] Run <!-- /analyze -->")
checkbox_list.append(" - [ ] Run <!-- /update_changelog -->")
checkbox_list.append(" - [ ] Run <!-- /add_docs -->")
checkbox_list.append(" - [ ] Run <!-- /test -->")
checkbox_list.append(" - [ ] Run <!-- /improve_component -->")
checkbox_list.append(" - [ ] Run <!-- /analyze -->")
checkbox_list.append("[*]")
checkbox_list.append("[*]")
checkbox_list.append("[*]")
checkbox_list.append("[*]")
checkbox_list.append("[*]")
@ -79,17 +86,17 @@ class PRHelpMessage:
checkbox_list.append("[*]")
checkbox_list.append("[*]")
if isinstance(self.git_provider, GithubProvider):
pr_comment += f"<table><tr align='center'><th align='center'>Tool</th><th align='center'>Description</th><th align='center'>Invoke Interactively :gem:</th></tr>"
if isinstance(self.git_provider, GithubProvider) and not get_settings().config.get('disable_checkboxes', False):
pr_comment += f"<table><tr align='left'><th align='left'>Tool</th><th align='left'>Description</th><th align='left'>Trigger Interactively :gem:</th></tr>"
for i in range(len(tool_names)):
pr_comment += f"\n<tr><td align='center'>\n\n<strong>{tool_names[i]}</strong></td>\n<td>{descriptions[i]}</td>\n<td>\n\n{checkbox_list[i]}\n</td></tr>"
pr_comment += f"\n<tr><td align='left'>\n\n<strong>{tool_names[i]}</strong></td>\n<td>{descriptions[i]}</td>\n<td>\n\n{checkbox_list[i]}\n</td></tr>"
pr_comment += "</table>\n\n"
pr_comment += f"""\n\n(1) Note that each tool be [triggered automatically](https://github.com/Codium-ai/pr-agent/blob/main/Usage.md#github-app-automatic-tools-for-pr-actions) when a new PR is opened, or called manually by [commenting on a PR](https://github.com/Codium-ai/pr-agent/blob/main/Usage.md#online-usage)."""
pr_comment += f"""\n\n(2) Tools marked with [*] require additional parameters to be passed. For example, to invoke the `/ask` tool, you need to comment on a PR: `/ask "<question content>"`. See the relevant documentation for each tool for more details."""
else:
pr_comment += f"<table><tr align='center'><th align='center'>Tool</th><th align='left'>Command</th><th align='left'>Description</th></tr>"
pr_comment += f"<table><tr align='left'><th align='left'>Tool</th><th align='left'>Command</th><th align='left'>Description</th></tr>"
for i in range(len(tool_names)):
pr_comment += f"\n<tr><td align='center'>\n\n<strong>{tool_names[i]}</strong></td><td>{commands[i]}</td><td>{descriptions[i]}</td></tr>"
pr_comment += f"\n<tr><td align='left'>\n\n<strong>{tool_names[i]}</strong></td><td>{commands[i]}</td><td>{descriptions[i]}</td></tr>"
pr_comment += "</table>\n\n"
pr_comment += f"""\n\nNote that each tool be [invoked automatically](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/) when a new PR is opened, or called manually by [commenting on a PR](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#online-usage)."""
if get_settings().config.publish_output:

View File

@ -21,6 +21,8 @@ class PRInformationFromUser:
self.git_provider.get_languages(), self.git_provider.get_files()
)
self.ai_handler = ai_handler()
self.ai_handler.main_pr_language = self.main_pr_language
self.vars = {
"title": self.git_provider.pr.title,
"branch": self.git_provider.get_pr_branch(),

View File

@ -22,8 +22,11 @@ class PR_LineQuestions:
def __init__(self, pr_url: str, args=None, ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):
self.question_str = self.parse_args(args)
self.git_provider = get_git_provider()(pr_url)
self.main_pr_language = get_main_pr_language(
self.git_provider.get_languages(), self.git_provider.get_files()
)
self.ai_handler = ai_handler()
self.ai_handler.main_pr_language = self.main_pr_language
self.vars = {
"title": self.git_provider.pr.title,

View File

@ -7,6 +7,7 @@ from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import ModelType
from pr_agent.config_loader import get_settings
from pr_agent.git_providers import get_git_provider
from pr_agent.git_providers.git_provider import get_main_pr_language
@ -17,11 +18,14 @@ from pr_agent.servers.help import HelpMessage
class PRQuestions:
def __init__(self, pr_url: str, args=None, ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):
question_str = self.parse_args(args)
self.pr_url = pr_url
self.git_provider = get_git_provider()(pr_url)
self.main_pr_language = get_main_pr_language(
self.git_provider.get_languages(), self.git_provider.get_files()
)
self.ai_handler = ai_handler()
self.ai_handler.main_pr_language = self.main_pr_language
self.question_str = question_str
self.vars = {
"title": self.git_provider.pr.title,
@ -47,19 +51,25 @@ class PRQuestions:
return question_str
async def run(self):
get_logger().info('Answering a PR question...')
get_logger().info(f'Answering a PR question about the PR {self.pr_url} ')
relevant_configs = {'pr_questions': dict(get_settings().pr_questions),
'config': dict(get_settings().config)}
get_logger().debug("Relevant configs", artifacts=relevant_configs)
if get_settings().config.publish_output:
self.git_provider.publish_comment("Preparing answer...", is_temporary=True)
await retry_with_fallback_models(self._prepare_prediction)
# identify image
img_path = self.idenfity_image_in_comment()
if img_path:
get_logger().debug(f"Image path identified", artifact=img_path)
await retry_with_fallback_models(self._prepare_prediction, model_type=ModelType.TURBO)
pr_comment = self._prepare_pr_answer()
get_logger().debug(f"PR output", artifact=pr_comment)
if self.git_provider.is_supported("gfm_markdown") and get_settings().pr_questions.enable_help_text:
pr_comment += "<hr>\n\n<details> <summary><strong>✨ Ask tool usage guide:</strong></summary><hr> \n\n"
pr_comment += "<hr>\n\n<details> <summary><strong>💡 Tool usage guide:</strong></summary><hr> \n\n"
pr_comment += HelpMessage.get_ask_usage_guide()
pr_comment += "\n</details>\n"
@ -68,6 +78,19 @@ class PRQuestions:
self.git_provider.remove_initial_comment()
return ""
def idenfity_image_in_comment(self):
img_path = ''
if '![image]' in self.question_str:
# assuming structure:
# /ask question ... > ![image](img_path)
img_path = self.question_str.split('![image]')[1].strip().strip('()')
self.vars['img_path'] = img_path
elif 'https://' in self.question_str and ('.png' in self.question_str or 'jpg' in self.question_str): # direct image link
# include https:// in the image path
img_path = 'https://' + self.question_str.split('https://')[1]
self.vars['img_path'] = img_path
return img_path
async def _prepare_prediction(self, model: str):
self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model)
if self.patches_diff:
@ -83,11 +106,17 @@ class PRQuestions:
environment = Environment(undefined=StrictUndefined)
system_prompt = environment.from_string(get_settings().pr_questions_prompt.system).render(variables)
user_prompt = environment.from_string(get_settings().pr_questions_prompt.user).render(variables)
response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
system=system_prompt, user=user_prompt)
if 'img_path' in variables:
img_path = self.vars['img_path']
response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
system=system_prompt, user=user_prompt,
img_path=img_path)
else:
response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
system=system_prompt, user=user_prompt)
return response
def _prepare_pr_answer(self) -> str:
answer_str = f"Question: {self.question_str}\n\n"
answer_str += f"Answer:\n{self.prediction.strip()}\n\n"
answer_str = f"### **Ask**❓\n{self.question_str}\n\n"
answer_str += f"### **Answer:**\n{self.prediction.strip()}\n\n"
return answer_str

View File

@ -8,7 +8,8 @@ from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import convert_to_markdown, load_yaml, ModelType
from pr_agent.algo.utils import convert_to_markdown, github_action_output, load_yaml, ModelType, \
show_relevant_configurations
from pr_agent.config_loader import get_settings
from pr_agent.git_providers import get_git_provider
from pr_agent.git_providers.git_provider import IncrementalPR, get_main_pr_language
@ -46,6 +47,8 @@ class PRReviewer:
if self.is_answer and not self.git_provider.is_supported("get_issue_comments"):
raise Exception(f"Answer mode is not supported for {get_settings().config.git_provider} for now")
self.ai_handler = ai_handler()
self.ai_handler.main_pr_language = self.main_language
self.patches_diff = None
self.prediction = None
@ -56,10 +59,11 @@ class PRReviewer:
"description": self.git_provider.get_pr_description(),
"language": self.main_language,
"diff": "", # empty diff for initial calculation
"num_pr_files": self.git_provider.get_num_of_files(),
"require_score": get_settings().pr_reviewer.require_score_review,
"require_tests": get_settings().pr_reviewer.require_tests_review,
"require_focused": get_settings().pr_reviewer.require_focused_review,
"require_estimate_effort_to_review": get_settings().pr_reviewer.require_estimate_effort_to_review,
'require_can_be_split_review': get_settings().pr_reviewer.require_can_be_split_review,
'num_code_suggestions': get_settings().pr_reviewer.num_code_suggestions,
'question_str': question_str,
'answer_str': answer_str,
@ -121,7 +125,7 @@ class PRReviewer:
if get_settings().config.publish_output:
self.git_provider.publish_comment("Preparing review...", is_temporary=True)
await retry_with_fallback_models(self._prepare_prediction, model_type=ModelType.TURBO)
await retry_with_fallback_models(self._prepare_prediction)
if not self.prediction:
self.git_provider.remove_initial_comment()
return None
@ -134,7 +138,7 @@ class PRReviewer:
if get_settings().pr_reviewer.persistent_comment and not self.incremental.is_incremental:
final_update_message = get_settings().pr_reviewer.final_update_message
self.git_provider.publish_persistent_comment(pr_review,
initial_header="## PR Review",
initial_header="## PR Review 🔍",
update_header=True,
final_update_message=final_update_message, )
else:
@ -189,6 +193,7 @@ class PRReviewer:
data = load_yaml(self.prediction.strip(),
keys_fix_yaml=["estimated_effort_to_review_[1-5]:", "security_concerns:", "possible_issues:",
"relevant_file:", "relevant_line:", "suggestion:"])
github_action_output(data, 'review')
if 'code_feedback' in data:
code_feedback = data['code_feedback']
@ -230,10 +235,14 @@ class PRReviewer:
# Add help text if gfm_markdown is supported
if self.git_provider.is_supported("gfm_markdown") and get_settings().pr_reviewer.enable_help_text:
markdown_text += "<hr>\n\n<details> <summary><strong>✨ Review tool usage guide:</strong></summary><hr> \n\n"
markdown_text += "<hr>\n\n<details> <summary><strong>💡 Tool usage guide:</strong></summary><hr> \n\n"
markdown_text += HelpMessage.get_review_usage_guide()
markdown_text += "\n</details>\n"
# Output the relevant configurations if enabled
if get_settings().get('config', {}).get('output_relevant_configurations', False):
markdown_text += show_relevant_configurations(relevant_section='pr_reviewer')
# Add custom labels from the review prediction (effort, security)
self.set_review_labels(data)
@ -354,6 +363,9 @@ class PRReviewer:
return True
def set_review_labels(self, data):
if not get_settings().config.publish_output:
return
if (get_settings().pr_reviewer.enable_review_labels_security or
get_settings().pr_reviewer.enable_review_labels_effort):
try:
@ -370,6 +382,8 @@ class PRReviewer:
review_labels.append('Possible security concern')
current_labels = self.git_provider.get_pr_labels(update=True)
if not current_labels:
current_labels = []
get_logger().debug(f"Current labels:\n{current_labels}")
if current_labels:
current_labels_filtered = [label for label in current_labels if

View File

@ -3,9 +3,6 @@ from enum import Enum
from typing import List
import openai
import pandas as pd
import pinecone
from pinecone_datasets import Dataset, DatasetMetadata
from pydantic import BaseModel, Field
from pr_agent.algo import MAX_TOKENS
@ -36,6 +33,12 @@ class PRSimilarIssue:
index_name = self.index_name = "codium-ai-pr-agent-issues"
if get_settings().pr_similar_issue.vectordb == "pinecone":
try:
import pinecone
from pinecone_datasets import Dataset, DatasetMetadata
import pandas as pd
except:
raise Exception("Please install 'pinecone' and 'pinecone_datasets' to use pinecone as vectordb")
# assuming pinecone api key and environment are set in secrets file
try:
api_key = get_settings().pinecone.api_key
@ -107,7 +110,10 @@ class PRSimilarIssue:
get_logger().info('No new issues to update')
elif get_settings().pr_similar_issue.vectordb == "lancedb":
import lancedb # import lancedb only if needed
try:
import lancedb # import lancedb only if needed
except:
raise Exception("Please install lancedb to use lancedb as vectordb")
self.db = lancedb.connect(get_settings().lancedb.uri)
self.table = None

View File

@ -8,7 +8,7 @@ from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import ModelType
from pr_agent.algo.utils import ModelType, show_relevant_configurations
from pr_agent.config_loader import get_settings
from pr_agent.git_providers import get_git_provider, GithubProvider
from pr_agent.git_providers.git_provider import get_main_pr_language
@ -26,7 +26,10 @@ class PRUpdateChangelog:
)
self.commit_changelog = get_settings().pr_update_changelog.push_changelog_changes
self._get_changlog_file() # self.changelog_file_str
self.ai_handler = ai_handler()
self.ai_handler.main_pr_language = self.main_language
self.patches_diff = None
self.prediction = None
self.cli_mode = cli_mode
@ -71,6 +74,11 @@ class PRUpdateChangelog:
await retry_with_fallback_models(self._prepare_prediction, model_type=ModelType.TURBO)
new_file_content, answer = self._prepare_changelog_update()
# Output the relevant configurations if enabled
if get_settings().get('config', {}).get('output_relevant_configurations', False):
answer += show_relevant_configurations(relevant_section='pr_update_changelog')
get_logger().debug(f"PR output", artifact=answer)
if get_settings().config.publish_output:
@ -78,7 +86,7 @@ class PRUpdateChangelog:
if self.commit_changelog:
self._push_changelog_update(new_file_content, answer)
else:
self.git_provider.publish_comment(f"**Changelog updates:**\n\n{answer}")
self.git_provider.publish_comment(f"**Changelog updates:** 🔄\n\n{answer}")
async def _prepare_prediction(self, model: str):
self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model)
@ -138,7 +146,7 @@ class PRUpdateChangelog:
self.git_provider.pr.create_review(commit=last_commit_id, comments=[d])
except Exception:
# we can't create a review for some reason, let's just publish a comment
self.git_provider.publish_comment(f"**Changelog updates:**\n\n{answer}")
self.git_provider.publish_comment(f"**Changelog updates: 🔄**\n\n{answer}")
def _get_default_changelog(self):
example_changelog = \