mirror of
https://github.com/qodo-ai/pr-agent.git
synced 2025-07-10 15:50:37 +08:00
Merge remote-tracking branch 'upstream/main' into abstract-BaseAiHandler
This commit is contained in:
179
pr_agent/tools/pr_add_docs.py
Normal file
179
pr_agent/tools/pr_add_docs.py
Normal file
@ -0,0 +1,179 @@
|
||||
import copy
|
||||
import textwrap
|
||||
from typing import Dict
|
||||
|
||||
from jinja2 import Environment, StrictUndefined
|
||||
|
||||
from pr_agent.algo.ai_handler import AiHandler
|
||||
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
|
||||
from pr_agent.algo.token_handler import TokenHandler
|
||||
from pr_agent.algo.utils import load_yaml
|
||||
from pr_agent.config_loader import get_settings
|
||||
from pr_agent.git_providers import get_git_provider
|
||||
from pr_agent.git_providers.git_provider import get_main_pr_language
|
||||
from pr_agent.log import get_logger
|
||||
|
||||
|
||||
class PRAddDocs:
|
||||
def __init__(self, pr_url: str, cli_mode=False, args: list = None):
|
||||
|
||||
self.git_provider = get_git_provider()(pr_url)
|
||||
self.main_language = get_main_pr_language(
|
||||
self.git_provider.get_languages(), self.git_provider.get_files()
|
||||
)
|
||||
|
||||
self.ai_handler = AiHandler()
|
||||
self.patches_diff = None
|
||||
self.prediction = None
|
||||
self.cli_mode = cli_mode
|
||||
self.vars = {
|
||||
"title": self.git_provider.pr.title,
|
||||
"branch": self.git_provider.get_pr_branch(),
|
||||
"description": self.git_provider.get_pr_description(),
|
||||
"language": self.main_language,
|
||||
"diff": "", # empty diff for initial calculation
|
||||
"extra_instructions": get_settings().pr_add_docs.extra_instructions,
|
||||
"commit_messages_str": self.git_provider.get_commit_messages(),
|
||||
'docs_for_language': get_docs_for_language(self.main_language,
|
||||
get_settings().pr_add_docs.docs_style),
|
||||
}
|
||||
self.token_handler = TokenHandler(self.git_provider.pr,
|
||||
self.vars,
|
||||
get_settings().pr_add_docs_prompt.system,
|
||||
get_settings().pr_add_docs_prompt.user)
|
||||
|
||||
async def run(self):
|
||||
try:
|
||||
get_logger().info('Generating code Docs for PR...')
|
||||
if get_settings().config.publish_output:
|
||||
self.git_provider.publish_comment("Generating Documentation...", is_temporary=True)
|
||||
|
||||
get_logger().info('Preparing PR documentation...')
|
||||
await retry_with_fallback_models(self._prepare_prediction)
|
||||
data = self._prepare_pr_code_docs()
|
||||
if (not data) or (not 'Code Documentation' in data):
|
||||
get_logger().info('No code documentation found for PR.')
|
||||
return
|
||||
|
||||
if get_settings().config.publish_output:
|
||||
get_logger().info('Pushing PR documentation...')
|
||||
self.git_provider.remove_initial_comment()
|
||||
get_logger().info('Pushing inline code documentation...')
|
||||
self.push_inline_docs(data)
|
||||
except Exception as e:
|
||||
get_logger().error(f"Failed to generate code documentation for PR, error: {e}")
|
||||
|
||||
async def _prepare_prediction(self, model: str):
|
||||
get_logger().info('Getting PR diff...')
|
||||
|
||||
# Disable adding docs to scripts and other non-relevant text files
|
||||
from pr_agent.algo.language_handler import bad_extensions
|
||||
bad_extensions += get_settings().docs_blacklist_extensions.docs_blacklist
|
||||
|
||||
self.patches_diff = get_pr_diff(self.git_provider,
|
||||
self.token_handler,
|
||||
model,
|
||||
add_line_numbers_to_hunks=True,
|
||||
disable_extra_lines=False)
|
||||
|
||||
get_logger().info('Getting AI prediction...')
|
||||
self.prediction = await self._get_prediction(model)
|
||||
|
||||
async def _get_prediction(self, model: str):
|
||||
variables = copy.deepcopy(self.vars)
|
||||
variables["diff"] = self.patches_diff # update diff
|
||||
environment = Environment(undefined=StrictUndefined)
|
||||
system_prompt = environment.from_string(get_settings().pr_add_docs_prompt.system).render(variables)
|
||||
user_prompt = environment.from_string(get_settings().pr_add_docs_prompt.user).render(variables)
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
get_logger().info(f"\nSystem prompt:\n{system_prompt}")
|
||||
get_logger().info(f"\nUser prompt:\n{user_prompt}")
|
||||
response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
|
||||
system=system_prompt, user=user_prompt)
|
||||
|
||||
return response
|
||||
|
||||
def _prepare_pr_code_docs(self) -> Dict:
|
||||
docs = self.prediction.strip()
|
||||
data = load_yaml(docs)
|
||||
if isinstance(data, list):
|
||||
data = {'Code Documentation': data}
|
||||
return data
|
||||
|
||||
def push_inline_docs(self, data):
|
||||
docs = []
|
||||
|
||||
if not data['Code Documentation']:
|
||||
return self.git_provider.publish_comment('No code documentation found to improve this PR.')
|
||||
|
||||
for d in data['Code Documentation']:
|
||||
try:
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
get_logger().info(f"add_docs: {d}")
|
||||
relevant_file = d['relevant file'].strip()
|
||||
relevant_line = int(d['relevant line']) # absolute position
|
||||
documentation = d['documentation']
|
||||
doc_placement = d['doc placement'].strip()
|
||||
if documentation:
|
||||
new_code_snippet = self.dedent_code(relevant_file, relevant_line, documentation, doc_placement,
|
||||
add_original_line=True)
|
||||
|
||||
body = f"**Suggestion:** Proposed documentation\n```suggestion\n" + new_code_snippet + "\n```"
|
||||
docs.append({'body': body, 'relevant_file': relevant_file,
|
||||
'relevant_lines_start': relevant_line,
|
||||
'relevant_lines_end': relevant_line})
|
||||
except Exception:
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
get_logger().info(f"Could not parse code docs: {d}")
|
||||
|
||||
is_successful = self.git_provider.publish_code_suggestions(docs)
|
||||
if not is_successful:
|
||||
get_logger().info("Failed to publish code docs, trying to publish each docs separately")
|
||||
for doc_suggestion in docs:
|
||||
self.git_provider.publish_code_suggestions([doc_suggestion])
|
||||
|
||||
def dedent_code(self, relevant_file, relevant_lines_start, new_code_snippet, doc_placement='after',
|
||||
add_original_line=False):
|
||||
try: # dedent code snippet
|
||||
self.diff_files = self.git_provider.diff_files if self.git_provider.diff_files \
|
||||
else self.git_provider.get_diff_files()
|
||||
original_initial_line = None
|
||||
for file in self.diff_files:
|
||||
if file.filename.strip() == relevant_file:
|
||||
original_initial_line = file.head_file.splitlines()[relevant_lines_start - 1]
|
||||
break
|
||||
if original_initial_line:
|
||||
if doc_placement == 'after':
|
||||
line = file.head_file.splitlines()[relevant_lines_start]
|
||||
else:
|
||||
line = original_initial_line
|
||||
suggested_initial_line = new_code_snippet.splitlines()[0]
|
||||
original_initial_spaces = len(line) - len(line.lstrip())
|
||||
suggested_initial_spaces = len(suggested_initial_line) - len(suggested_initial_line.lstrip())
|
||||
delta_spaces = original_initial_spaces - suggested_initial_spaces
|
||||
if delta_spaces > 0:
|
||||
new_code_snippet = textwrap.indent(new_code_snippet, delta_spaces * " ").rstrip('\n')
|
||||
if add_original_line:
|
||||
if doc_placement == 'after':
|
||||
new_code_snippet = original_initial_line + "\n" + new_code_snippet
|
||||
else:
|
||||
new_code_snippet = new_code_snippet.rstrip() + "\n" + original_initial_line
|
||||
except Exception as e:
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
get_logger().info(f"Could not dedent code snippet for file {relevant_file}, error: {e}")
|
||||
|
||||
return new_code_snippet
|
||||
|
||||
|
||||
def get_docs_for_language(language, style):
|
||||
language = language.lower()
|
||||
if language == 'java':
|
||||
return "Javadocs"
|
||||
elif language in ['python', 'lisp', 'clojure']:
|
||||
return f"Docstring ({style})"
|
||||
elif language in ['javascript', 'typescript']:
|
||||
return "JSdocs"
|
||||
elif language == 'c++':
|
||||
return "Doxygen"
|
||||
else:
|
||||
return "Docs"
|
@ -1,16 +1,16 @@
|
||||
import copy
|
||||
import logging
|
||||
import textwrap
|
||||
from typing import List, Dict
|
||||
from typing import Dict, List
|
||||
from jinja2 import Environment, StrictUndefined
|
||||
|
||||
from pr_agent.algo.ai_handler import BaseAiHandler, AiHandler
|
||||
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models, get_pr_multi_diffs
|
||||
from pr_agent.algo.pr_processing import get_pr_diff, get_pr_multi_diffs, retry_with_fallback_models
|
||||
from pr_agent.algo.token_handler import TokenHandler
|
||||
from pr_agent.algo.utils import load_yaml
|
||||
from pr_agent.config_loader import get_settings
|
||||
from pr_agent.git_providers import BitbucketProvider, get_git_provider
|
||||
from pr_agent.git_providers import get_git_provider
|
||||
from pr_agent.git_providers.git_provider import get_main_pr_language
|
||||
from pr_agent.log import get_logger
|
||||
|
||||
|
||||
class PRCodeSuggestions:
|
||||
@ -52,42 +52,46 @@ class PRCodeSuggestions:
|
||||
|
||||
async def run(self):
|
||||
try:
|
||||
logging.info('Generating code suggestions for PR...')
|
||||
get_logger().info('Generating code suggestions for PR...')
|
||||
if get_settings().config.publish_output:
|
||||
self.git_provider.publish_comment("Preparing review...", is_temporary=True)
|
||||
self.git_provider.publish_comment("Preparing suggestions...", is_temporary=True)
|
||||
|
||||
logging.info('Preparing PR review...')
|
||||
get_logger().info('Preparing PR code suggestions...')
|
||||
if not self.is_extended:
|
||||
await retry_with_fallback_models(self._prepare_prediction)
|
||||
data = self._prepare_pr_code_suggestions()
|
||||
else:
|
||||
data = await retry_with_fallback_models(self._prepare_prediction_extended)
|
||||
if (not data) or (not 'Code suggestions' in data):
|
||||
logging.info('No code suggestions found for PR.')
|
||||
get_logger().info('No code suggestions found for PR.')
|
||||
return
|
||||
|
||||
if (not self.is_extended and get_settings().pr_code_suggestions.rank_suggestions) or \
|
||||
(self.is_extended and get_settings().pr_code_suggestions.rank_extended_suggestions):
|
||||
logging.info('Ranking Suggestions...')
|
||||
get_logger().info('Ranking Suggestions...')
|
||||
data['Code suggestions'] = await self.rank_suggestions(data['Code suggestions'])
|
||||
|
||||
if get_settings().config.publish_output:
|
||||
logging.info('Pushing PR review...')
|
||||
get_logger().info('Pushing PR code suggestions...')
|
||||
self.git_provider.remove_initial_comment()
|
||||
logging.info('Pushing inline code suggestions...')
|
||||
self.push_inline_code_suggestions(data)
|
||||
if get_settings().pr_code_suggestions.summarize:
|
||||
get_logger().info('Pushing summarize code suggestions...')
|
||||
self.publish_summarizes_suggestions(data)
|
||||
else:
|
||||
get_logger().info('Pushing inline code suggestions...')
|
||||
self.push_inline_code_suggestions(data)
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to generate code suggestions for PR, error: {e}")
|
||||
get_logger().error(f"Failed to generate code suggestions for PR, error: {e}")
|
||||
|
||||
async def _prepare_prediction(self, model: str):
|
||||
logging.info('Getting PR diff...')
|
||||
get_logger().info('Getting PR diff...')
|
||||
self.patches_diff = get_pr_diff(self.git_provider,
|
||||
self.token_handler,
|
||||
model,
|
||||
add_line_numbers_to_hunks=True,
|
||||
disable_extra_lines=True)
|
||||
|
||||
logging.info('Getting AI prediction...')
|
||||
get_logger().info('Getting AI prediction...')
|
||||
self.prediction = await self._get_prediction(model)
|
||||
|
||||
async def _get_prediction(self, model: str):
|
||||
@ -97,8 +101,8 @@ class PRCodeSuggestions:
|
||||
system_prompt = environment.from_string(get_settings().pr_code_suggestions_prompt.system).render(variables)
|
||||
user_prompt = environment.from_string(get_settings().pr_code_suggestions_prompt.user).render(variables)
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
logging.info(f"\nSystem prompt:\n{system_prompt}")
|
||||
logging.info(f"\nUser prompt:\n{user_prompt}")
|
||||
get_logger().info(f"\nSystem prompt:\n{system_prompt}")
|
||||
get_logger().info(f"\nUser prompt:\n{user_prompt}")
|
||||
response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
|
||||
system=system_prompt, user=user_prompt)
|
||||
|
||||
@ -115,12 +119,13 @@ class PRCodeSuggestions:
|
||||
code_suggestions = []
|
||||
|
||||
if not data['Code suggestions']:
|
||||
get_logger().info('No suggestions found to improve this PR.')
|
||||
return self.git_provider.publish_comment('No suggestions found to improve this PR.')
|
||||
|
||||
for d in data['Code suggestions']:
|
||||
try:
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
logging.info(f"suggestion: {d}")
|
||||
get_logger().info(f"suggestion: {d}")
|
||||
relevant_file = d['relevant file'].strip()
|
||||
relevant_lines_start = int(d['relevant lines start']) # absolute position
|
||||
relevant_lines_end = int(d['relevant lines end'])
|
||||
@ -136,11 +141,11 @@ class PRCodeSuggestions:
|
||||
'relevant_lines_end': relevant_lines_end})
|
||||
except Exception:
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
logging.info(f"Could not parse suggestion: {d}")
|
||||
get_logger().info(f"Could not parse suggestion: {d}")
|
||||
|
||||
is_successful = self.git_provider.publish_code_suggestions(code_suggestions)
|
||||
if not is_successful:
|
||||
logging.info("Failed to publish code suggestions, trying to publish each suggestion separately")
|
||||
get_logger().info("Failed to publish code suggestions, trying to publish each suggestion separately")
|
||||
for code_suggestion in code_suggestions:
|
||||
self.git_provider.publish_code_suggestions([code_suggestion])
|
||||
|
||||
@ -162,19 +167,19 @@ class PRCodeSuggestions:
|
||||
new_code_snippet = textwrap.indent(new_code_snippet, delta_spaces * " ").rstrip('\n')
|
||||
except Exception as e:
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
logging.info(f"Could not dedent code snippet for file {relevant_file}, error: {e}")
|
||||
get_logger().info(f"Could not dedent code snippet for file {relevant_file}, error: {e}")
|
||||
|
||||
return new_code_snippet
|
||||
|
||||
async def _prepare_prediction_extended(self, model: str) -> dict:
|
||||
logging.info('Getting PR diff...')
|
||||
get_logger().info('Getting PR diff...')
|
||||
patches_diff_list = get_pr_multi_diffs(self.git_provider, self.token_handler, model,
|
||||
max_calls=get_settings().pr_code_suggestions.max_number_of_calls)
|
||||
|
||||
logging.info('Getting multi AI predictions...')
|
||||
get_logger().info('Getting multi AI predictions...')
|
||||
prediction_list = []
|
||||
for i, patches_diff in enumerate(patches_diff_list):
|
||||
logging.info(f"Processing chunk {i + 1} of {len(patches_diff_list)}")
|
||||
get_logger().info(f"Processing chunk {i + 1} of {len(patches_diff_list)}")
|
||||
self.patches_diff = patches_diff
|
||||
prediction = await self._get_prediction(model)
|
||||
prediction_list.append(prediction)
|
||||
@ -222,8 +227,8 @@ class PRCodeSuggestions:
|
||||
variables)
|
||||
user_prompt = environment.from_string(get_settings().pr_sort_code_suggestions_prompt.user).render(variables)
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
logging.info(f"\nSystem prompt:\n{system_prompt}")
|
||||
logging.info(f"\nUser prompt:\n{user_prompt}")
|
||||
get_logger().info(f"\nSystem prompt:\n{system_prompt}")
|
||||
get_logger().info(f"\nUser prompt:\n{user_prompt}")
|
||||
response, finish_reason = await self.ai_handler.chat_completion(model=model, system=system_prompt,
|
||||
user=user_prompt)
|
||||
|
||||
@ -238,9 +243,46 @@ class PRCodeSuggestions:
|
||||
data_sorted = data_sorted[:new_len]
|
||||
except Exception as e:
|
||||
if get_settings().config.verbosity_level >= 1:
|
||||
logging.info(f"Could not sort suggestions, error: {e}")
|
||||
get_logger().info(f"Could not sort suggestions, error: {e}")
|
||||
data_sorted = suggestion_list
|
||||
|
||||
return data_sorted
|
||||
|
||||
def publish_summarizes_suggestions(self, data: Dict):
|
||||
try:
|
||||
data_markdown = "## PR Code Suggestions\n\n"
|
||||
|
||||
language_extension_map_org = get_settings().language_extension_map_org
|
||||
extension_to_language = {}
|
||||
for language, extensions in language_extension_map_org.items():
|
||||
for ext in extensions:
|
||||
extension_to_language[ext] = language
|
||||
|
||||
for s in data['Code suggestions']:
|
||||
try:
|
||||
extension_s = s['relevant file'].rsplit('.')[-1]
|
||||
code_snippet_link = self.git_provider.get_line_link(s['relevant file'], s['relevant lines start'],
|
||||
s['relevant lines end'])
|
||||
data_markdown += f"\n💡 Suggestion:\n\n**{s['suggestion content']}**\n\n"
|
||||
if code_snippet_link:
|
||||
data_markdown += f" File: [{s['relevant file']} ({s['relevant lines start']}-{s['relevant lines end']})]({code_snippet_link})\n\n"
|
||||
else:
|
||||
data_markdown += f"File: {s['relevant file']} ({s['relevant lines start']}-{s['relevant lines end']})\n\n"
|
||||
if self.git_provider.is_supported("gfm_markdown"):
|
||||
data_markdown += "<details> <summary> Example code:</summary>\n\n"
|
||||
data_markdown += f"___\n\n"
|
||||
language_name = "python"
|
||||
if extension_s and (extension_s in extension_to_language):
|
||||
language_name = extension_to_language[extension_s]
|
||||
data_markdown += f"Existing code:\n```{language_name}\n{s['existing code']}\n```\n"
|
||||
data_markdown += f"Improved code:\n```{language_name}\n{s['improved code']}\n```\n"
|
||||
if self.git_provider.is_supported("gfm_markdown"):
|
||||
data_markdown += "</details>\n"
|
||||
data_markdown += "\n___\n\n"
|
||||
except Exception as e:
|
||||
get_logger().error(f"Could not parse suggestion: {s}, error: {e}")
|
||||
self.git_provider.publish_comment(data_markdown)
|
||||
except Exception as e:
|
||||
get_logger().info(f"Failed to publish summarized code suggestions, error: {e}")
|
||||
|
||||
|
||||
|
@ -1,7 +1,6 @@
|
||||
import logging
|
||||
|
||||
from pr_agent.config_loader import get_settings
|
||||
from pr_agent.git_providers import get_git_provider
|
||||
from pr_agent.log import get_logger
|
||||
|
||||
|
||||
class PRConfig:
|
||||
@ -19,11 +18,11 @@ class PRConfig:
|
||||
self.git_provider = get_git_provider()(pr_url)
|
||||
|
||||
async def run(self):
|
||||
logging.info('Getting configuration settings...')
|
||||
logging.info('Preparing configs...')
|
||||
get_logger().info('Getting configuration settings...')
|
||||
get_logger().info('Preparing configs...')
|
||||
pr_comment = self._prepare_pr_configs()
|
||||
if get_settings().config.publish_output:
|
||||
logging.info('Pushing configs...')
|
||||
get_logger().info('Pushing configs...')
|
||||
self.git_provider.publish_comment(pr_comment)
|
||||
self.git_provider.remove_initial_comment()
|
||||
return ""
|
||||
@ -44,5 +43,5 @@ class PRConfig:
|
||||
comment_str += f"\n{header.lower()}.{key.lower()} = {repr(value) if isinstance(value, str) else value}"
|
||||
comment_str += " "
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
logging.info(f"comment_str:\n{comment_str}")
|
||||
get_logger().info(f"comment_str:\n{comment_str}")
|
||||
return comment_str
|
||||
|
@ -1,7 +1,5 @@
|
||||
import copy
|
||||
import json
|
||||
import re
|
||||
import logging
|
||||
from typing import List, Tuple
|
||||
|
||||
from jinja2 import Environment, StrictUndefined
|
||||
@ -9,10 +7,11 @@ from jinja2 import Environment, StrictUndefined
|
||||
from pr_agent.algo.ai_handler import BaseAiHandler, AiHandler
|
||||
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
|
||||
from pr_agent.algo.token_handler import TokenHandler
|
||||
from pr_agent.algo.utils import load_yaml
|
||||
from pr_agent.algo.utils import load_yaml, set_custom_labels, get_user_labels
|
||||
from pr_agent.config_loader import get_settings
|
||||
from pr_agent.git_providers import get_git_provider
|
||||
from pr_agent.git_providers.git_provider import get_main_pr_language
|
||||
from pr_agent.log import get_logger
|
||||
|
||||
|
||||
class PRDescription:
|
||||
@ -31,6 +30,11 @@ class PRDescription:
|
||||
)
|
||||
self.pr_id = self.git_provider.get_pr_id()
|
||||
|
||||
if get_settings().pr_description.enable_semantic_files_types and not self.git_provider.is_supported(
|
||||
"gfm_markdown"):
|
||||
get_logger().debug(f"Disabling semantic files types for {self.pr_id}")
|
||||
get_settings().pr_description.enable_semantic_files_types = False
|
||||
|
||||
# Initialize the AI handler
|
||||
self.ai_handler = ai_handler
|
||||
|
||||
@ -41,8 +45,13 @@ class PRDescription:
|
||||
"description": self.git_provider.get_pr_description(full=False),
|
||||
"language": self.main_pr_language,
|
||||
"diff": "", # empty diff for initial calculation
|
||||
"use_bullet_points": get_settings().pr_description.use_bullet_points,
|
||||
"extra_instructions": get_settings().pr_description.extra_instructions,
|
||||
"commit_messages_str": self.git_provider.get_commit_messages()
|
||||
"commit_messages_str": self.git_provider.get_commit_messages(),
|
||||
"enable_custom_labels": get_settings().config.enable_custom_labels,
|
||||
"custom_labels_class": "", # will be filled if necessary in 'set_custom_labels' function
|
||||
"enable_file_walkthrough": get_settings().pr_description.enable_file_walkthrough,
|
||||
"enable_semantic_files_types": get_settings().pr_description.enable_semantic_files_types,
|
||||
}
|
||||
|
||||
self.user_description = self.git_provider.get_user_description()
|
||||
@ -65,18 +74,21 @@ class PRDescription:
|
||||
"""
|
||||
|
||||
try:
|
||||
logging.info(f"Generating a PR description {self.pr_id}")
|
||||
get_logger().info(f"Generating a PR description {self.pr_id}")
|
||||
if get_settings().config.publish_output:
|
||||
self.git_provider.publish_comment("Preparing PR description...", is_temporary=True)
|
||||
|
||||
await retry_with_fallback_models(self._prepare_prediction)
|
||||
|
||||
logging.info(f"Preparing answer {self.pr_id}")
|
||||
get_logger().info(f"Preparing answer {self.pr_id}")
|
||||
if self.prediction:
|
||||
self._prepare_data()
|
||||
else:
|
||||
return None
|
||||
|
||||
if get_settings().pr_description.enable_semantic_files_types:
|
||||
self._prepare_file_labels()
|
||||
|
||||
pr_labels = []
|
||||
if get_settings().pr_description.publish_labels:
|
||||
pr_labels = self._prepare_labels()
|
||||
@ -88,19 +100,25 @@ class PRDescription:
|
||||
full_markdown_description = f"## Title\n\n{pr_title}\n\n___\n{pr_body}"
|
||||
|
||||
if get_settings().config.publish_output:
|
||||
logging.info(f"Pushing answer {self.pr_id}")
|
||||
get_logger().info(f"Pushing answer {self.pr_id}")
|
||||
if get_settings().pr_description.publish_description_as_comment:
|
||||
self.git_provider.publish_comment(full_markdown_description)
|
||||
else:
|
||||
self.git_provider.publish_description(pr_title, pr_body)
|
||||
if get_settings().pr_description.publish_labels and self.git_provider.is_supported("get_labels"):
|
||||
current_labels = self.git_provider.get_labels()
|
||||
if current_labels is None:
|
||||
current_labels = []
|
||||
self.git_provider.publish_labels(pr_labels + current_labels)
|
||||
user_labels = get_user_labels(current_labels)
|
||||
self.git_provider.publish_labels(pr_labels + user_labels)
|
||||
|
||||
if (get_settings().pr_description.final_update_message and
|
||||
hasattr(self.git_provider, 'pr_url') and self.git_provider.pr_url):
|
||||
latest_commit_url = self.git_provider.get_latest_commit_url()
|
||||
if latest_commit_url:
|
||||
self.git_provider.publish_comment(
|
||||
f"**[PR Description]({self.git_provider.pr_url})** updated to latest commit ({latest_commit_url})")
|
||||
self.git_provider.remove_initial_comment()
|
||||
except Exception as e:
|
||||
logging.error(f"Error generating PR description {self.pr_id}: {e}")
|
||||
get_logger().error(f"Error generating PR description {self.pr_id}: {e}")
|
||||
|
||||
return ""
|
||||
|
||||
@ -121,9 +139,9 @@ class PRDescription:
|
||||
if get_settings().pr_description.use_description_markers and 'pr_agent:' not in self.user_description:
|
||||
return None
|
||||
|
||||
logging.info(f"Getting PR diff {self.pr_id}")
|
||||
get_logger().info(f"Getting PR diff {self.pr_id}")
|
||||
self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model)
|
||||
logging.info(f"Getting AI prediction {self.pr_id}")
|
||||
get_logger().info(f"Getting AI prediction {self.pr_id}")
|
||||
self.prediction = await self._get_prediction(model)
|
||||
|
||||
async def _get_prediction(self, model: str) -> str:
|
||||
@ -140,12 +158,13 @@ class PRDescription:
|
||||
variables["diff"] = self.patches_diff # update diff
|
||||
|
||||
environment = Environment(undefined=StrictUndefined)
|
||||
set_custom_labels(variables)
|
||||
system_prompt = environment.from_string(get_settings().pr_description_prompt.system).render(variables)
|
||||
user_prompt = environment.from_string(get_settings().pr_description_prompt.user).render(variables)
|
||||
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
logging.info(f"\nSystem prompt:\n{system_prompt}")
|
||||
logging.info(f"\nUser prompt:\n{user_prompt}")
|
||||
get_logger().info(f"\nSystem prompt:\n{system_prompt}")
|
||||
get_logger().info(f"\nUser prompt:\n{user_prompt}")
|
||||
|
||||
response, finish_reason = await self.ai_handler.chat_completion(
|
||||
model=model,
|
||||
@ -154,8 +173,10 @@ class PRDescription:
|
||||
user=user_prompt
|
||||
)
|
||||
|
||||
return response
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
get_logger().info(f"\nAI response:\n{response}")
|
||||
|
||||
return response
|
||||
|
||||
def _prepare_data(self):
|
||||
# Load the AI prediction data into a dictionary
|
||||
@ -169,16 +190,20 @@ class PRDescription:
|
||||
pr_types = []
|
||||
|
||||
# If the 'PR Type' key is present in the dictionary, split its value by comma and assign it to 'pr_types'
|
||||
if 'PR Type' in self.data:
|
||||
if type(self.data['PR Type']) == list:
|
||||
pr_types = self.data['PR Type']
|
||||
elif type(self.data['PR Type']) == str:
|
||||
pr_types = self.data['PR Type'].split(',')
|
||||
|
||||
if 'labels' in self.data:
|
||||
if type(self.data['labels']) == list:
|
||||
pr_types = self.data['labels']
|
||||
elif type(self.data['labels']) == str:
|
||||
pr_types = self.data['labels'].split(',')
|
||||
elif 'type' in self.data:
|
||||
if type(self.data['type']) == list:
|
||||
pr_types = self.data['type']
|
||||
elif type(self.data['type']) == str:
|
||||
pr_types = self.data['type'].split(',')
|
||||
return pr_types
|
||||
|
||||
def _prepare_pr_answer_with_markers(self) -> Tuple[str, str]:
|
||||
logging.info(f"Using description marker replacements {self.pr_id}")
|
||||
get_logger().info(f"Using description marker replacements {self.pr_id}")
|
||||
title = self.vars["title"]
|
||||
body = self.user_description
|
||||
if get_settings().pr_description.include_generated_by_header:
|
||||
@ -186,7 +211,12 @@ class PRDescription:
|
||||
else:
|
||||
ai_header = ""
|
||||
|
||||
ai_summary = self.data.get('PR Description')
|
||||
ai_type = self.data.get('type')
|
||||
if ai_type and not re.search(r'<!--\s*pr_agent:type\s*-->', body):
|
||||
pr_type = f"{ai_header}{ai_type}"
|
||||
body = body.replace('pr_agent:type', pr_type)
|
||||
|
||||
ai_summary = self.data.get('description')
|
||||
if ai_summary and not re.search(r'<!--\s*pr_agent:summary\s*-->', body):
|
||||
summary = f"{ai_header}{ai_summary}"
|
||||
body = body.replace('pr_agent:summary', summary)
|
||||
@ -215,12 +245,17 @@ class PRDescription:
|
||||
|
||||
# Iterate over the dictionary items and append the key and value to 'markdown_text' in a markdown format
|
||||
markdown_text = ""
|
||||
# Don't display 'PR Labels'
|
||||
if 'labels' in self.data and self.git_provider.is_supported("get_labels"):
|
||||
self.data.pop('labels')
|
||||
if not get_settings().pr_description.enable_pr_type:
|
||||
self.data.pop('type')
|
||||
for key, value in self.data.items():
|
||||
markdown_text += f"## {key}\n\n"
|
||||
markdown_text += f"{value}\n\n"
|
||||
|
||||
# Remove the 'PR Title' key from the dictionary
|
||||
ai_title = self.data.pop('PR Title', self.vars["title"])
|
||||
ai_title = self.data.pop('title', self.vars["title"])
|
||||
if get_settings().pr_description.keep_original_user_title:
|
||||
# Assign the original PR title to the 'title' variable
|
||||
title = self.vars["title"]
|
||||
@ -232,26 +267,131 @@ class PRDescription:
|
||||
# except for the items containing the word 'walkthrough'
|
||||
pr_body = ""
|
||||
for idx, (key, value) in enumerate(self.data.items()):
|
||||
pr_body += f"## {key}:\n"
|
||||
if key == 'pr_files':
|
||||
value = self.file_label_dict
|
||||
key_publish = "PR changes walkthrough"
|
||||
else:
|
||||
key_publish = key.rstrip(':').replace("_", " ").capitalize()
|
||||
pr_body += f"## {key_publish}\n"
|
||||
if 'walkthrough' in key.lower():
|
||||
# for filename, description in value.items():
|
||||
if self.git_provider.is_supported("gfm_markdown"):
|
||||
pr_body += "<details> <summary>files:</summary>\n\n"
|
||||
for file in value:
|
||||
filename = file['filename'].replace("'", "`")
|
||||
description = file['changes in file']
|
||||
pr_body += f'`{filename}`: {description}\n'
|
||||
description = file['changes_in_file']
|
||||
pr_body += f'- `{filename}`: {description}\n'
|
||||
if self.git_provider.is_supported("gfm_markdown"):
|
||||
pr_body +="</details>\n"
|
||||
pr_body += "</details>\n"
|
||||
elif 'pr_files' in key.lower():
|
||||
pr_body = self.process_pr_files_prediction(pr_body, value)
|
||||
else:
|
||||
# if the value is a list, join its items by comma
|
||||
if type(value) == list:
|
||||
if isinstance(value, list):
|
||||
value = ', '.join(v for v in value)
|
||||
pr_body += f"{value}\n"
|
||||
if idx < len(self.data) - 1:
|
||||
pr_body += "\n___\n"
|
||||
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
logging.info(f"title:\n{title}\n{pr_body}")
|
||||
get_logger().info(f"title:\n{title}\n{pr_body}")
|
||||
|
||||
return title, pr_body
|
||||
return title, pr_body
|
||||
|
||||
def _prepare_file_labels(self):
|
||||
self.file_label_dict = {}
|
||||
for file in self.data['pr_files']:
|
||||
try:
|
||||
filename = file['filename'].replace("'", "`").replace('"', '`')
|
||||
changes_summary = file['changes_summary']
|
||||
label = file['label']
|
||||
if label not in self.file_label_dict:
|
||||
self.file_label_dict[label] = []
|
||||
self.file_label_dict[label].append((filename, changes_summary))
|
||||
except Exception as e:
|
||||
get_logger().error(f"Error preparing file label dict {self.pr_id}: {e}")
|
||||
pass
|
||||
|
||||
def process_pr_files_prediction(self, pr_body, value):
|
||||
if not self.git_provider.is_supported("gfm_markdown"):
|
||||
get_logger().info(f"Disabling semantic files types for {self.pr_id} since gfm_markdown is not supported")
|
||||
return pr_body
|
||||
|
||||
try:
|
||||
pr_body += "<table>"
|
||||
header = f"Relevant files"
|
||||
delta = 65
|
||||
header += " " * delta
|
||||
pr_body += f"""<thead><tr><th></th><th>{header}</th></tr></thead>"""
|
||||
pr_body += """<tbody>"""
|
||||
for semantic_label in value.keys():
|
||||
s_label = semantic_label.strip("'").strip('"')
|
||||
pr_body += f"""<tr><td><strong>{s_label.capitalize()}</strong></td>"""
|
||||
list_tuples = value[semantic_label]
|
||||
pr_body += f"""<td><details><summary>{len(list_tuples)} files</summary><table>"""
|
||||
for filename, file_change_description in list_tuples:
|
||||
filename = filename.replace("'", "`")
|
||||
filename_publish = filename.split("/")[-1]
|
||||
filename_publish = f"{filename_publish}"
|
||||
if len(filename_publish) < (delta - 5):
|
||||
filename_publish += " " * ((delta - 5) - len(filename_publish))
|
||||
diff_plus_minus = ""
|
||||
diff_files = self.git_provider.diff_files
|
||||
for f in diff_files:
|
||||
if f.filename.lower() == filename.lower():
|
||||
num_plus_lines = f.num_plus_lines
|
||||
num_minus_lines = f.num_minus_lines
|
||||
diff_plus_minus += f"+{num_plus_lines}/-{num_minus_lines}"
|
||||
break
|
||||
|
||||
# try to add line numbers link to code suggestions
|
||||
link = ""
|
||||
if hasattr(self.git_provider, 'get_line_link'):
|
||||
filename = filename.strip()
|
||||
link = self.git_provider.get_line_link(filename, relevant_line_start=-1)
|
||||
|
||||
file_change_description = self._insert_br_after_x_chars(file_change_description, x=(delta - 5))
|
||||
pr_body += f"""
|
||||
<tr>
|
||||
<td>
|
||||
<details>
|
||||
<summary><strong>{filename_publish}</strong></summary>
|
||||
<ul>
|
||||
{filename}<br><br>
|
||||
<strong>{file_change_description}</strong>
|
||||
</ul>
|
||||
</details>
|
||||
</td>
|
||||
<td><a href="{link}"> {diff_plus_minus}</a></td>
|
||||
|
||||
</tr>
|
||||
"""
|
||||
pr_body += """</table></details></td></tr>"""
|
||||
pr_body += """</tr></tbody></table>"""
|
||||
|
||||
except Exception as e:
|
||||
get_logger().error(f"Error processing pr files to markdown {self.pr_id}: {e}")
|
||||
pass
|
||||
return pr_body
|
||||
|
||||
def _insert_br_after_x_chars(self, text, x=70):
|
||||
"""
|
||||
Insert <br> into a string after a word that increases its length above x characters.
|
||||
"""
|
||||
if len(text) < x:
|
||||
return text
|
||||
|
||||
words = text.split(' ')
|
||||
new_text = ""
|
||||
current_length = 0
|
||||
|
||||
for word in words:
|
||||
# Check if adding this word exceeds x characters
|
||||
if current_length + len(word) > x:
|
||||
new_text += "<br>" # Insert line break
|
||||
current_length = 0 # Reset counter
|
||||
|
||||
# Add the word to the new text
|
||||
new_text += word + " "
|
||||
current_length += len(word) + 1 # Add 1 for the space
|
||||
|
||||
return new_text.strip() # Remove trailing space
|
||||
|
171
pr_agent/tools/pr_generate_labels.py
Normal file
171
pr_agent/tools/pr_generate_labels.py
Normal file
@ -0,0 +1,171 @@
|
||||
import copy
|
||||
import re
|
||||
from typing import List, Tuple
|
||||
|
||||
from jinja2 import Environment, StrictUndefined
|
||||
|
||||
from pr_agent.algo.ai_handler import AiHandler
|
||||
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
|
||||
from pr_agent.algo.token_handler import TokenHandler
|
||||
from pr_agent.algo.utils import load_yaml, set_custom_labels, get_user_labels
|
||||
from pr_agent.config_loader import get_settings
|
||||
from pr_agent.git_providers import get_git_provider
|
||||
from pr_agent.git_providers.git_provider import get_main_pr_language
|
||||
from pr_agent.log import get_logger
|
||||
|
||||
|
||||
class PRGenerateLabels:
|
||||
def __init__(self, pr_url: str, args: list = None):
|
||||
"""
|
||||
Initialize the PRGenerateLabels object with the necessary attributes and objects for generating labels
|
||||
corresponding to the PR using an AI model.
|
||||
Args:
|
||||
pr_url (str): The URL of the pull request.
|
||||
args (list, optional): List of arguments passed to the PRGenerateLabels class. Defaults to None.
|
||||
"""
|
||||
# Initialize the git provider and main PR language
|
||||
self.git_provider = get_git_provider()(pr_url)
|
||||
self.main_pr_language = get_main_pr_language(
|
||||
self.git_provider.get_languages(), self.git_provider.get_files()
|
||||
)
|
||||
self.pr_id = self.git_provider.get_pr_id()
|
||||
|
||||
# Initialize the AI handler
|
||||
self.ai_handler = AiHandler()
|
||||
|
||||
# Initialize the variables dictionary
|
||||
self.vars = {
|
||||
"title": self.git_provider.pr.title,
|
||||
"branch": self.git_provider.get_pr_branch(),
|
||||
"description": self.git_provider.get_pr_description(full=False),
|
||||
"language": self.main_pr_language,
|
||||
"diff": "", # empty diff for initial calculation
|
||||
"use_bullet_points": get_settings().pr_description.use_bullet_points,
|
||||
"extra_instructions": get_settings().pr_description.extra_instructions,
|
||||
"commit_messages_str": self.git_provider.get_commit_messages(),
|
||||
"enable_custom_labels": get_settings().config.enable_custom_labels,
|
||||
"custom_labels_class": "", # will be filled if necessary in 'set_custom_labels' function
|
||||
}
|
||||
|
||||
# Initialize the token handler
|
||||
self.token_handler = TokenHandler(
|
||||
self.git_provider.pr,
|
||||
self.vars,
|
||||
get_settings().pr_custom_labels_prompt.system,
|
||||
get_settings().pr_custom_labels_prompt.user,
|
||||
)
|
||||
|
||||
# Initialize patches_diff and prediction attributes
|
||||
self.patches_diff = None
|
||||
self.prediction = None
|
||||
|
||||
async def run(self):
|
||||
"""
|
||||
Generates a PR labels using an AI model and publishes it to the PR.
|
||||
"""
|
||||
|
||||
try:
|
||||
get_logger().info(f"Generating a PR labels {self.pr_id}")
|
||||
if get_settings().config.publish_output:
|
||||
self.git_provider.publish_comment("Preparing PR labels...", is_temporary=True)
|
||||
|
||||
await retry_with_fallback_models(self._prepare_prediction)
|
||||
|
||||
get_logger().info(f"Preparing answer {self.pr_id}")
|
||||
if self.prediction:
|
||||
self._prepare_data()
|
||||
else:
|
||||
return None
|
||||
|
||||
pr_labels = self._prepare_labels()
|
||||
|
||||
if get_settings().config.publish_output:
|
||||
get_logger().info(f"Pushing labels {self.pr_id}")
|
||||
|
||||
current_labels = self.git_provider.get_labels()
|
||||
user_labels = get_user_labels(current_labels)
|
||||
pr_labels = pr_labels + user_labels
|
||||
|
||||
if self.git_provider.is_supported("get_labels"):
|
||||
self.git_provider.publish_labels(pr_labels)
|
||||
elif pr_labels:
|
||||
value = ', '.join(v for v in pr_labels)
|
||||
pr_labels_text = f"## PR Labels:\n{value}\n"
|
||||
self.git_provider.publish_comment(pr_labels_text, is_temporary=False)
|
||||
self.git_provider.remove_initial_comment()
|
||||
except Exception as e:
|
||||
get_logger().error(f"Error generating PR labels {self.pr_id}: {e}")
|
||||
|
||||
return ""
|
||||
|
||||
async def _prepare_prediction(self, model: str) -> None:
|
||||
"""
|
||||
Prepare the AI prediction for the PR labels based on the provided model.
|
||||
|
||||
Args:
|
||||
model (str): The name of the model to be used for generating the prediction.
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
Raises:
|
||||
Any exceptions raised by the 'get_pr_diff' and '_get_prediction' functions.
|
||||
|
||||
"""
|
||||
|
||||
get_logger().info(f"Getting PR diff {self.pr_id}")
|
||||
self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model)
|
||||
get_logger().info(f"Getting AI prediction {self.pr_id}")
|
||||
self.prediction = await self._get_prediction(model)
|
||||
|
||||
async def _get_prediction(self, model: str) -> str:
|
||||
"""
|
||||
Generate an AI prediction for the PR labels based on the provided model.
|
||||
|
||||
Args:
|
||||
model (str): The name of the model to be used for generating the prediction.
|
||||
|
||||
Returns:
|
||||
str: The generated AI prediction.
|
||||
"""
|
||||
variables = copy.deepcopy(self.vars)
|
||||
variables["diff"] = self.patches_diff # update diff
|
||||
|
||||
environment = Environment(undefined=StrictUndefined)
|
||||
set_custom_labels(variables)
|
||||
system_prompt = environment.from_string(get_settings().pr_custom_labels_prompt.system).render(variables)
|
||||
user_prompt = environment.from_string(get_settings().pr_custom_labels_prompt.user).render(variables)
|
||||
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
get_logger().info(f"\nSystem prompt:\n{system_prompt}")
|
||||
get_logger().info(f"\nUser prompt:\n{user_prompt}")
|
||||
|
||||
response, finish_reason = await self.ai_handler.chat_completion(
|
||||
model=model,
|
||||
temperature=0.2,
|
||||
system=system_prompt,
|
||||
user=user_prompt
|
||||
)
|
||||
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
get_logger().info(f"\nAI response:\n{response}")
|
||||
|
||||
return response
|
||||
|
||||
def _prepare_data(self):
|
||||
# Load the AI prediction data into a dictionary
|
||||
self.data = load_yaml(self.prediction.strip())
|
||||
|
||||
|
||||
|
||||
def _prepare_labels(self) -> List[str]:
|
||||
pr_types = []
|
||||
|
||||
# If the 'labels' key is present in the dictionary, split its value by comma and assign it to 'pr_types'
|
||||
if 'labels' in self.data:
|
||||
if type(self.data['labels']) == list:
|
||||
pr_types = self.data['labels']
|
||||
elif type(self.data['labels']) == str:
|
||||
pr_types = self.data['labels'].split(',')
|
||||
|
||||
return pr_types
|
@ -1,5 +1,4 @@
|
||||
import copy
|
||||
import logging
|
||||
|
||||
from jinja2 import Environment, StrictUndefined
|
||||
|
||||
@ -9,6 +8,7 @@ from pr_agent.algo.token_handler import TokenHandler
|
||||
from pr_agent.config_loader import get_settings
|
||||
from pr_agent.git_providers import get_git_provider
|
||||
from pr_agent.git_providers.git_provider import get_main_pr_language
|
||||
from pr_agent.log import get_logger
|
||||
|
||||
|
||||
class PRInformationFromUser:
|
||||
@ -34,22 +34,22 @@ class PRInformationFromUser:
|
||||
self.prediction = None
|
||||
|
||||
async def run(self):
|
||||
logging.info('Generating question to the user...')
|
||||
get_logger().info('Generating question to the user...')
|
||||
if get_settings().config.publish_output:
|
||||
self.git_provider.publish_comment("Preparing questions...", is_temporary=True)
|
||||
await retry_with_fallback_models(self._prepare_prediction)
|
||||
logging.info('Preparing questions...')
|
||||
get_logger().info('Preparing questions...')
|
||||
pr_comment = self._prepare_pr_answer()
|
||||
if get_settings().config.publish_output:
|
||||
logging.info('Pushing questions...')
|
||||
get_logger().info('Pushing questions...')
|
||||
self.git_provider.publish_comment(pr_comment)
|
||||
self.git_provider.remove_initial_comment()
|
||||
return ""
|
||||
|
||||
async def _prepare_prediction(self, model):
|
||||
logging.info('Getting PR diff...')
|
||||
get_logger().info('Getting PR diff...')
|
||||
self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model)
|
||||
logging.info('Getting AI prediction...')
|
||||
get_logger().info('Getting AI prediction...')
|
||||
self.prediction = await self._get_prediction(model)
|
||||
|
||||
async def _get_prediction(self, model: str):
|
||||
@ -59,8 +59,8 @@ class PRInformationFromUser:
|
||||
system_prompt = environment.from_string(get_settings().pr_information_from_user_prompt.system).render(variables)
|
||||
user_prompt = environment.from_string(get_settings().pr_information_from_user_prompt.user).render(variables)
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
logging.info(f"\nSystem prompt:\n{system_prompt}")
|
||||
logging.info(f"\nUser prompt:\n{user_prompt}")
|
||||
get_logger().info(f"\nSystem prompt:\n{system_prompt}")
|
||||
get_logger().info(f"\nUser prompt:\n{user_prompt}")
|
||||
response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
|
||||
system=system_prompt, user=user_prompt)
|
||||
return response
|
||||
@ -68,7 +68,7 @@ class PRInformationFromUser:
|
||||
def _prepare_pr_answer(self) -> str:
|
||||
model_output = self.prediction.strip()
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
logging.info(f"answer_str:\n{model_output}")
|
||||
get_logger().info(f"answer_str:\n{model_output}")
|
||||
answer_str = f"{model_output}\n\n Please respond to the questions above in the following format:\n\n" +\
|
||||
"\n>/answer\n>1) ...\n>2) ...\n>...\n"
|
||||
return answer_str
|
||||
|
@ -1,5 +1,4 @@
|
||||
import copy
|
||||
import logging
|
||||
|
||||
from jinja2 import Environment, StrictUndefined
|
||||
|
||||
@ -9,6 +8,7 @@ from pr_agent.algo.token_handler import TokenHandler
|
||||
from pr_agent.config_loader import get_settings
|
||||
from pr_agent.git_providers import get_git_provider
|
||||
from pr_agent.git_providers.git_provider import get_main_pr_language
|
||||
from pr_agent.log import get_logger
|
||||
|
||||
|
||||
class PRQuestions:
|
||||
@ -44,22 +44,22 @@ class PRQuestions:
|
||||
return question_str
|
||||
|
||||
async def run(self):
|
||||
logging.info('Answering a PR question...')
|
||||
get_logger().info('Answering a PR question...')
|
||||
if get_settings().config.publish_output:
|
||||
self.git_provider.publish_comment("Preparing answer...", is_temporary=True)
|
||||
await retry_with_fallback_models(self._prepare_prediction)
|
||||
logging.info('Preparing answer...')
|
||||
get_logger().info('Preparing answer...')
|
||||
pr_comment = self._prepare_pr_answer()
|
||||
if get_settings().config.publish_output:
|
||||
logging.info('Pushing answer...')
|
||||
get_logger().info('Pushing answer...')
|
||||
self.git_provider.publish_comment(pr_comment)
|
||||
self.git_provider.remove_initial_comment()
|
||||
return ""
|
||||
|
||||
async def _prepare_prediction(self, model: str):
|
||||
logging.info('Getting PR diff...')
|
||||
get_logger().info('Getting PR diff...')
|
||||
self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model)
|
||||
logging.info('Getting AI prediction...')
|
||||
get_logger().info('Getting AI prediction...')
|
||||
self.prediction = await self._get_prediction(model)
|
||||
|
||||
async def _get_prediction(self, model: str):
|
||||
@ -69,8 +69,8 @@ class PRQuestions:
|
||||
system_prompt = environment.from_string(get_settings().pr_questions_prompt.system).render(variables)
|
||||
user_prompt = environment.from_string(get_settings().pr_questions_prompt.user).render(variables)
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
logging.info(f"\nSystem prompt:\n{system_prompt}")
|
||||
logging.info(f"\nUser prompt:\n{user_prompt}")
|
||||
get_logger().info(f"\nSystem prompt:\n{system_prompt}")
|
||||
get_logger().info(f"\nUser prompt:\n{user_prompt}")
|
||||
response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
|
||||
system=system_prompt, user=user_prompt)
|
||||
return response
|
||||
@ -79,5 +79,5 @@ class PRQuestions:
|
||||
answer_str = f"Question: {self.question_str}\n\n"
|
||||
answer_str += f"Answer:\n{self.prediction.strip()}\n\n"
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
logging.info(f"answer_str:\n{answer_str}")
|
||||
get_logger().info(f"answer_str:\n{answer_str}")
|
||||
return answer_str
|
||||
|
@ -1,6 +1,5 @@
|
||||
import copy
|
||||
import json
|
||||
import logging
|
||||
import datetime
|
||||
from collections import OrderedDict
|
||||
from typing import List, Tuple
|
||||
|
||||
@ -11,10 +10,11 @@ from yaml import SafeLoader
|
||||
from pr_agent.algo.ai_handler import BaseAiHandler, AiHandler
|
||||
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
|
||||
from pr_agent.algo.token_handler import TokenHandler
|
||||
from pr_agent.algo.utils import convert_to_markdown, try_fix_json, try_fix_yaml, load_yaml
|
||||
from pr_agent.algo.utils import convert_to_markdown, load_yaml, try_fix_yaml, set_custom_labels, get_user_labels
|
||||
from pr_agent.config_loader import get_settings
|
||||
from pr_agent.git_providers import get_git_provider
|
||||
from pr_agent.git_providers.git_provider import IncrementalPR, get_main_pr_language
|
||||
from pr_agent.log import get_logger
|
||||
from pr_agent.servers.help import actions_help_text, bot_help_text
|
||||
|
||||
|
||||
@ -64,6 +64,8 @@ class PRReviewer:
|
||||
'answer_str': answer_str,
|
||||
"extra_instructions": get_settings().pr_reviewer.extra_instructions,
|
||||
"commit_messages_str": self.git_provider.get_commit_messages(),
|
||||
"custom_labels": "",
|
||||
"enable_custom_labels": get_settings().config.enable_custom_labels,
|
||||
}
|
||||
|
||||
self.token_handler = TokenHandler(
|
||||
@ -97,29 +99,41 @@ class PRReviewer:
|
||||
|
||||
try:
|
||||
if self.is_auto and not get_settings().pr_reviewer.automatic_review:
|
||||
logging.info(f'Automatic review is disabled {self.pr_url}')
|
||||
get_logger().info(f'Automatic review is disabled {self.pr_url}')
|
||||
return None
|
||||
if self.incremental.is_incremental and not self._can_run_incremental_review():
|
||||
return None
|
||||
|
||||
logging.info(f'Reviewing PR: {self.pr_url} ...')
|
||||
get_logger().info(f'Reviewing PR: {self.pr_url} ...')
|
||||
|
||||
if get_settings().config.publish_output:
|
||||
self.git_provider.publish_comment("Preparing review...", is_temporary=True)
|
||||
|
||||
await retry_with_fallback_models(self._prepare_prediction)
|
||||
|
||||
logging.info('Preparing PR review...')
|
||||
get_logger().info('Preparing PR review...')
|
||||
pr_comment = self._prepare_pr_review()
|
||||
|
||||
if get_settings().config.publish_output:
|
||||
logging.info('Pushing PR review...')
|
||||
self.git_provider.publish_comment(pr_comment)
|
||||
self.git_provider.remove_initial_comment()
|
||||
get_logger().info('Pushing PR review...')
|
||||
previous_review_comment = self._get_previous_review_comment()
|
||||
|
||||
# publish the review
|
||||
if get_settings().pr_reviewer.persistent_comment and not self.incremental.is_incremental:
|
||||
self.git_provider.publish_persistent_comment(pr_comment,
|
||||
initial_header="## PR Analysis",
|
||||
update_header=True)
|
||||
else:
|
||||
self.git_provider.publish_comment(pr_comment)
|
||||
|
||||
self.git_provider.remove_initial_comment()
|
||||
if previous_review_comment:
|
||||
self._remove_previous_review_comment(previous_review_comment)
|
||||
if get_settings().pr_reviewer.inline_code_comments:
|
||||
logging.info('Pushing inline code comments...')
|
||||
get_logger().info('Pushing inline code comments...')
|
||||
self._publish_inline_code_comments()
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to review PR: {e}")
|
||||
get_logger().error(f"Failed to review PR: {e}")
|
||||
|
||||
async def _prepare_prediction(self, model: str) -> None:
|
||||
"""
|
||||
@ -131,9 +145,9 @@ class PRReviewer:
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
logging.info('Getting PR diff...')
|
||||
get_logger().info('Getting PR diff...')
|
||||
self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model)
|
||||
logging.info('Getting AI prediction...')
|
||||
get_logger().info('Getting AI prediction...')
|
||||
self.prediction = await self._get_prediction(model)
|
||||
|
||||
async def _get_prediction(self, model: str) -> str:
|
||||
@ -154,8 +168,8 @@ class PRReviewer:
|
||||
user_prompt = environment.from_string(get_settings().pr_review_prompt.user).render(variables)
|
||||
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
logging.info(f"\nSystem prompt:\n{system_prompt}")
|
||||
logging.info(f"\nUser prompt:\n{user_prompt}")
|
||||
get_logger().info(f"\nSystem prompt:\n{system_prompt}")
|
||||
get_logger().info(f"\nUser prompt:\n{user_prompt}")
|
||||
|
||||
response, finish_reason = await self.ai_handler.chat_completion(
|
||||
model=model,
|
||||
@ -164,6 +178,9 @@ class PRReviewer:
|
||||
user=user_prompt
|
||||
)
|
||||
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
get_logger().info(f"\nAI response:\n{response}")
|
||||
|
||||
return response
|
||||
|
||||
def _prepare_pr_review(self) -> str:
|
||||
@ -208,14 +225,22 @@ class PRReviewer:
|
||||
link = self.git_provider.generate_link_to_relevant_line_number(suggestion)
|
||||
if link:
|
||||
suggestion['relevant line'] = f"[{suggestion['relevant line']}]({link})"
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
# Add incremental review section
|
||||
if self.incremental.is_incremental:
|
||||
last_commit_url = f"{self.git_provider.get_pr_url()}/commits/" \
|
||||
f"{self.git_provider.incremental.first_new_commit_sha}"
|
||||
last_commit_msg = self.incremental.commits_range[0].commit.message if self.incremental.commits_range else ""
|
||||
incremental_review_markdown_text = f"Starting from commit {last_commit_url}"
|
||||
if last_commit_msg:
|
||||
replacement = last_commit_msg.splitlines(keepends=False)[0].replace('_', r'\_')
|
||||
incremental_review_markdown_text += f" \n_({replacement})_"
|
||||
data = OrderedDict(data)
|
||||
data.update({'Incremental PR Review': {
|
||||
"⏮️ Review for commits since previous PR-Agent review": f"Starting from commit {last_commit_url}"}})
|
||||
"⏮️ Review for commits since previous PR-Agent review": incremental_review_markdown_text}})
|
||||
data.move_to_end('Incremental PR Review', last=False)
|
||||
|
||||
markdown_text = convert_to_markdown(data, self.git_provider.is_supported("gfm_markdown"))
|
||||
@ -224,14 +249,22 @@ class PRReviewer:
|
||||
# Add help text if not in CLI mode
|
||||
if not get_settings().get("CONFIG.CLI_MODE", False):
|
||||
markdown_text += "\n### How to use\n"
|
||||
if user and '[bot]' not in user:
|
||||
if self.git_provider.is_supported("gfm_markdown"):
|
||||
markdown_text += "\n <details> <summary> Instructions</summary>\n\n"
|
||||
bot_user = "[bot]" if get_settings().github_app.override_deployment_type else get_settings().github_app.bot_user
|
||||
if user and bot_user not in user:
|
||||
markdown_text += bot_help_text(user)
|
||||
else:
|
||||
markdown_text += actions_help_text
|
||||
if self.git_provider.is_supported("gfm_markdown"):
|
||||
markdown_text += "\n</details>\n"
|
||||
|
||||
# Add custom labels from the review prediction (effort, security)
|
||||
self.set_review_labels(data)
|
||||
|
||||
# Log markdown response if verbosity level is high
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
logging.info(f"Markdown response:\n{markdown_text}")
|
||||
get_logger().info(f"Markdown response:\n{markdown_text}")
|
||||
|
||||
if markdown_text == None or len(markdown_text) == 0:
|
||||
markdown_text = ""
|
||||
@ -245,21 +278,14 @@ class PRReviewer:
|
||||
if get_settings().pr_reviewer.num_code_suggestions == 0:
|
||||
return
|
||||
|
||||
review_text = self.prediction.strip()
|
||||
review_text = review_text.removeprefix('```yaml').rstrip('`')
|
||||
try:
|
||||
data = yaml.load(review_text, Loader=SafeLoader)
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to parse AI prediction: {e}")
|
||||
data = try_fix_yaml(review_text)
|
||||
|
||||
data = load_yaml(self.prediction.strip())
|
||||
comments: List[str] = []
|
||||
for suggestion in data.get('PR Feedback', {}).get('Code feedback', []):
|
||||
relevant_file = suggestion.get('relevant file', '').strip()
|
||||
relevant_line_in_file = suggestion.get('relevant line', '').strip()
|
||||
content = suggestion.get('suggestion', '')
|
||||
if not relevant_file or not relevant_line_in_file or not content:
|
||||
logging.info("Skipping inline comment with missing file/line/content")
|
||||
get_logger().info("Skipping inline comment with missing file/line/content")
|
||||
continue
|
||||
|
||||
if self.git_provider.is_supported("create_inline_comment"):
|
||||
@ -295,3 +321,83 @@ class PRReviewer:
|
||||
break
|
||||
|
||||
return question_str, answer_str
|
||||
|
||||
def _get_previous_review_comment(self):
|
||||
"""
|
||||
Get the previous review comment if it exists.
|
||||
"""
|
||||
try:
|
||||
if get_settings().pr_reviewer.remove_previous_review_comment and hasattr(self.git_provider, "get_previous_review"):
|
||||
return self.git_provider.get_previous_review(
|
||||
full=not self.incremental.is_incremental,
|
||||
incremental=self.incremental.is_incremental,
|
||||
)
|
||||
except Exception as e:
|
||||
get_logger().exception(f"Failed to get previous review comment, error: {e}")
|
||||
|
||||
def _remove_previous_review_comment(self, comment):
|
||||
"""
|
||||
Remove the previous review comment if it exists.
|
||||
"""
|
||||
try:
|
||||
if get_settings().pr_reviewer.remove_previous_review_comment and comment:
|
||||
self.git_provider.remove_comment(comment)
|
||||
except Exception as e:
|
||||
get_logger().exception(f"Failed to remove previous review comment, error: {e}")
|
||||
|
||||
def _can_run_incremental_review(self) -> bool:
|
||||
"""Checks if we can run incremental review according the various configurations and previous review"""
|
||||
# checking if running is auto mode but there are no new commits
|
||||
if self.is_auto and not self.incremental.first_new_commit_sha:
|
||||
get_logger().info(f"Incremental review is enabled for {self.pr_url} but there are no new commits")
|
||||
return False
|
||||
# checking if there are enough commits to start the review
|
||||
num_new_commits = len(self.incremental.commits_range)
|
||||
num_commits_threshold = get_settings().pr_reviewer.minimal_commits_for_incremental_review
|
||||
not_enough_commits = num_new_commits < num_commits_threshold
|
||||
# checking if the commits are not too recent to start the review
|
||||
recent_commits_threshold = datetime.datetime.now() - datetime.timedelta(
|
||||
minutes=get_settings().pr_reviewer.minimal_minutes_for_incremental_review
|
||||
)
|
||||
last_seen_commit_date = (
|
||||
self.incremental.last_seen_commit.commit.author.date if self.incremental.last_seen_commit else None
|
||||
)
|
||||
all_commits_too_recent = (
|
||||
last_seen_commit_date > recent_commits_threshold if self.incremental.last_seen_commit else False
|
||||
)
|
||||
# check all the thresholds or just one to start the review
|
||||
condition = any if get_settings().pr_reviewer.require_all_thresholds_for_incremental_review else all
|
||||
if condition((not_enough_commits, all_commits_too_recent)):
|
||||
get_logger().info(
|
||||
f"Incremental review is enabled for {self.pr_url} but didn't pass the threshold check to run:"
|
||||
f"\n* Number of new commits = {num_new_commits} (threshold is {num_commits_threshold})"
|
||||
f"\n* Last seen commit date = {last_seen_commit_date} (threshold is {recent_commits_threshold})"
|
||||
)
|
||||
return False
|
||||
return True
|
||||
|
||||
def set_review_labels(self, data):
|
||||
if (get_settings().pr_reviewer.enable_review_labels_security or
|
||||
get_settings().pr_reviewer.enable_review_labels_effort):
|
||||
try:
|
||||
review_labels = []
|
||||
if get_settings().pr_reviewer.enable_review_labels_effort:
|
||||
estimated_effort = data['PR Analysis']['Estimated effort to review [1-5]']
|
||||
estimated_effort_number = int(estimated_effort.split(',')[0])
|
||||
if 1 <= estimated_effort_number <= 5: # 1, because ...
|
||||
review_labels.append(f'Review effort [1-5]: {estimated_effort_number}')
|
||||
if get_settings().pr_reviewer.enable_review_labels_security:
|
||||
security_concerns = data['PR Analysis']['Security concerns'] # yes, because ...
|
||||
security_concerns_bool = 'yes' in security_concerns.lower() or 'true' in security_concerns.lower()
|
||||
if security_concerns_bool:
|
||||
review_labels.append('Possible security concern')
|
||||
|
||||
current_labels = self.git_provider.get_labels()
|
||||
current_labels_filtered = [label for label in current_labels if
|
||||
not label.lower().startswith('review effort [1-5]:') and not label.lower().startswith(
|
||||
'possible security concern')]
|
||||
if current_labels or review_labels:
|
||||
get_logger().info(f"Setting review labels: {review_labels + current_labels_filtered}")
|
||||
self.git_provider.publish_labels(review_labels + current_labels_filtered)
|
||||
except Exception as e:
|
||||
get_logger().error(f"Failed to set review labels, error: {e}")
|
||||
|
@ -1,18 +1,19 @@
|
||||
import copy
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from enum import Enum
|
||||
from typing import List, Tuple
|
||||
import pinecone
|
||||
from typing import List
|
||||
|
||||
import openai
|
||||
import pandas as pd
|
||||
import pinecone
|
||||
from pinecone_datasets import Dataset, DatasetMetadata
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from pr_agent.algo import MAX_TOKENS
|
||||
from pr_agent.algo.token_handler import TokenHandler
|
||||
from pr_agent.algo.utils import get_max_tokens
|
||||
from pr_agent.config_loader import get_settings
|
||||
from pr_agent.git_providers import get_git_provider
|
||||
from pinecone_datasets import Dataset, DatasetMetadata
|
||||
from pr_agent.log import get_logger
|
||||
|
||||
MODEL = "text-embedding-ada-002"
|
||||
|
||||
@ -47,6 +48,13 @@ class PRSimilarIssue:
|
||||
|
||||
# check if index exists, and if repo is already indexed
|
||||
run_from_scratch = False
|
||||
if run_from_scratch: # for debugging
|
||||
pinecone.init(api_key=api_key, environment=environment)
|
||||
if index_name in pinecone.list_indexes():
|
||||
get_logger().info('Removing index...')
|
||||
pinecone.delete_index(index_name)
|
||||
get_logger().info('Done')
|
||||
|
||||
upsert = True
|
||||
pinecone.init(api_key=api_key, environment=environment)
|
||||
if not index_name in pinecone.list_indexes():
|
||||
@ -62,11 +70,11 @@ class PRSimilarIssue:
|
||||
upsert = False
|
||||
|
||||
if run_from_scratch or upsert: # index the entire repo
|
||||
logging.info('Indexing the entire repo...')
|
||||
get_logger().info('Indexing the entire repo...')
|
||||
|
||||
logging.info('Getting issues...')
|
||||
get_logger().info('Getting issues...')
|
||||
issues = list(repo_obj.get_issues(state='all'))
|
||||
logging.info('Done')
|
||||
get_logger().info('Done')
|
||||
self._update_index_with_issues(issues, repo_name_for_index, upsert=upsert)
|
||||
else: # update index if needed
|
||||
pinecone_index = pinecone.Index(index_name=index_name)
|
||||
@ -92,20 +100,20 @@ class PRSimilarIssue:
|
||||
break
|
||||
|
||||
if issues_to_update:
|
||||
logging.info(f'Updating index with {counter} new issues...')
|
||||
get_logger().info(f'Updating index with {counter} new issues...')
|
||||
self._update_index_with_issues(issues_to_update, repo_name_for_index, upsert=True)
|
||||
else:
|
||||
logging.info('No new issues to update')
|
||||
get_logger().info('No new issues to update')
|
||||
|
||||
async def run(self):
|
||||
logging.info('Getting issue...')
|
||||
get_logger().info('Getting issue...')
|
||||
repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1])
|
||||
issue_main = self.git_provider.repo_obj.get_issue(original_issue_number)
|
||||
issue_str, comments, number = self._process_issue(issue_main)
|
||||
openai.api_key = get_settings().openai.key
|
||||
logging.info('Done')
|
||||
get_logger().info('Done')
|
||||
|
||||
logging.info('Querying...')
|
||||
get_logger().info('Querying...')
|
||||
res = openai.Embedding.create(input=[issue_str], engine=MODEL)
|
||||
embeds = [record['embedding'] for record in res['data']]
|
||||
pinecone_index = pinecone.Index(index_name=self.index_name)
|
||||
@ -117,7 +125,16 @@ class PRSimilarIssue:
|
||||
relevant_comment_number_list = []
|
||||
score_list = []
|
||||
for r in res['matches']:
|
||||
issue_number = int(r["id"].split('.')[0].split('_')[-1])
|
||||
# skip example issue
|
||||
if 'example_issue_' in r["id"]:
|
||||
continue
|
||||
|
||||
try:
|
||||
issue_number = int(r["id"].split('.')[0].split('_')[-1])
|
||||
except:
|
||||
get_logger().debug(f"Failed to parse issue number from {r['id']}")
|
||||
continue
|
||||
|
||||
if original_issue_number == issue_number:
|
||||
continue
|
||||
if issue_number not in relevant_issues_number_list:
|
||||
@ -127,9 +144,9 @@ class PRSimilarIssue:
|
||||
else:
|
||||
relevant_comment_number_list.append(-1)
|
||||
score_list.append(str("{:.2f}".format(r['score'])))
|
||||
logging.info('Done')
|
||||
get_logger().info('Done')
|
||||
|
||||
logging.info('Publishing response...')
|
||||
get_logger().info('Publishing response...')
|
||||
similar_issues_str = "### Similar Issues\n___\n\n"
|
||||
for i, issue_number_similar in enumerate(relevant_issues_number_list):
|
||||
issue = self.git_provider.repo_obj.get_issue(issue_number_similar)
|
||||
@ -140,8 +157,8 @@ class PRSimilarIssue:
|
||||
similar_issues_str += f"{i + 1}. **[{title}]({url})** (score={score_list[i]})\n\n"
|
||||
if get_settings().config.publish_output:
|
||||
response = issue_main.create_comment(similar_issues_str)
|
||||
logging.info(similar_issues_str)
|
||||
logging.info('Done')
|
||||
get_logger().info(similar_issues_str)
|
||||
get_logger().info('Done')
|
||||
|
||||
def _process_issue(self, issue):
|
||||
header = issue.title
|
||||
@ -155,7 +172,7 @@ class PRSimilarIssue:
|
||||
return issue_str, comments, number
|
||||
|
||||
def _update_index_with_issues(self, issues_list, repo_name_for_index, upsert=False):
|
||||
logging.info('Processing issues...')
|
||||
get_logger().info('Processing issues...')
|
||||
corpus = Corpus()
|
||||
example_issue_record = Record(
|
||||
id=f"example_issue_{repo_name_for_index}",
|
||||
@ -171,9 +188,9 @@ class PRSimilarIssue:
|
||||
|
||||
counter += 1
|
||||
if counter % 100 == 0:
|
||||
logging.info(f"Scanned {counter} issues")
|
||||
get_logger().info(f"Scanned {counter} issues")
|
||||
if counter >= self.max_issues_to_scan:
|
||||
logging.info(f"Scanned {self.max_issues_to_scan} issues, stopping")
|
||||
get_logger().info(f"Scanned {self.max_issues_to_scan} issues, stopping")
|
||||
break
|
||||
|
||||
issue_str, comments, number = self._process_issue(issue)
|
||||
@ -181,7 +198,7 @@ class PRSimilarIssue:
|
||||
username = issue.user.login
|
||||
created_at = str(issue.created_at)
|
||||
if len(issue_str) < 8000 or \
|
||||
self.token_handler.count_tokens(issue_str) < MAX_TOKENS[MODEL]: # fast reject first
|
||||
self.token_handler.count_tokens(issue_str) < get_max_tokens(MODEL): # fast reject first
|
||||
issue_record = Record(
|
||||
id=issue_key + "." + "issue",
|
||||
text=issue_str,
|
||||
@ -210,9 +227,9 @@ class PRSimilarIssue:
|
||||
)
|
||||
corpus.append(comment_record)
|
||||
df = pd.DataFrame(corpus.dict()["documents"])
|
||||
logging.info('Done')
|
||||
get_logger().info('Done')
|
||||
|
||||
logging.info('Embedding...')
|
||||
get_logger().info('Embedding...')
|
||||
openai.api_key = get_settings().openai.key
|
||||
list_to_encode = list(df["text"].values)
|
||||
try:
|
||||
@ -220,7 +237,7 @@ class PRSimilarIssue:
|
||||
embeds = [record['embedding'] for record in res['data']]
|
||||
except:
|
||||
embeds = []
|
||||
logging.error('Failed to embed entire list, embedding one by one...')
|
||||
get_logger().error('Failed to embed entire list, embedding one by one...')
|
||||
for i, text in enumerate(list_to_encode):
|
||||
try:
|
||||
res = openai.Embedding.create(input=[text], engine=MODEL)
|
||||
@ -231,21 +248,23 @@ class PRSimilarIssue:
|
||||
meta = DatasetMetadata.empty()
|
||||
meta.dense_model.dimension = len(embeds[0])
|
||||
ds = Dataset.from_pandas(df, meta)
|
||||
logging.info('Done')
|
||||
get_logger().info('Done')
|
||||
|
||||
api_key = get_settings().pinecone.api_key
|
||||
environment = get_settings().pinecone.environment
|
||||
if not upsert:
|
||||
logging.info('Creating index from scratch...')
|
||||
get_logger().info('Creating index from scratch...')
|
||||
ds.to_pinecone_index(self.index_name, api_key=api_key, environment=environment)
|
||||
time.sleep(15) # wait for pinecone to finalize indexing before querying
|
||||
else:
|
||||
logging.info('Upserting index...')
|
||||
get_logger().info('Upserting index...')
|
||||
namespace = ""
|
||||
batch_size: int = 100
|
||||
concurrency: int = 10
|
||||
pinecone.init(api_key=api_key, environment=environment)
|
||||
ds._upsert_to_index(self.index_name, namespace, batch_size, concurrency)
|
||||
logging.info('Done')
|
||||
time.sleep(5) # wait for pinecone to finalize upserting before querying
|
||||
get_logger().info('Done')
|
||||
|
||||
|
||||
class IssueLevel(str, Enum):
|
||||
|
@ -1,5 +1,4 @@
|
||||
import copy
|
||||
import logging
|
||||
from datetime import date
|
||||
from time import sleep
|
||||
from typing import Tuple
|
||||
@ -10,8 +9,9 @@ from pr_agent.algo.ai_handler import BaseAiHandler, AiHandler
|
||||
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
|
||||
from pr_agent.algo.token_handler import TokenHandler
|
||||
from pr_agent.config_loader import get_settings
|
||||
from pr_agent.git_providers import GithubProvider, get_git_provider
|
||||
from pr_agent.git_providers import get_git_provider
|
||||
from pr_agent.git_providers.git_provider import get_main_pr_language
|
||||
from pr_agent.log import get_logger
|
||||
|
||||
CHANGELOG_LINES = 50
|
||||
|
||||
@ -48,26 +48,26 @@ class PRUpdateChangelog:
|
||||
async def run(self):
|
||||
# assert type(self.git_provider) == GithubProvider, "Currently only Github is supported"
|
||||
|
||||
logging.info('Updating the changelog...')
|
||||
get_logger().info('Updating the changelog...')
|
||||
if get_settings().config.publish_output:
|
||||
self.git_provider.publish_comment("Preparing changelog updates...", is_temporary=True)
|
||||
await retry_with_fallback_models(self._prepare_prediction)
|
||||
logging.info('Preparing PR changelog updates...')
|
||||
get_logger().info('Preparing PR changelog updates...')
|
||||
new_file_content, answer = self._prepare_changelog_update()
|
||||
if get_settings().config.publish_output:
|
||||
self.git_provider.remove_initial_comment()
|
||||
logging.info('Publishing changelog updates...')
|
||||
get_logger().info('Publishing changelog updates...')
|
||||
if self.commit_changelog:
|
||||
logging.info('Pushing PR changelog updates to repo...')
|
||||
get_logger().info('Pushing PR changelog updates to repo...')
|
||||
self._push_changelog_update(new_file_content, answer)
|
||||
else:
|
||||
logging.info('Publishing PR changelog as comment...')
|
||||
get_logger().info('Publishing PR changelog as comment...')
|
||||
self.git_provider.publish_comment(f"**Changelog updates:**\n\n{answer}")
|
||||
|
||||
async def _prepare_prediction(self, model: str):
|
||||
logging.info('Getting PR diff...')
|
||||
get_logger().info('Getting PR diff...')
|
||||
self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model)
|
||||
logging.info('Getting AI prediction...')
|
||||
get_logger().info('Getting AI prediction...')
|
||||
self.prediction = await self._get_prediction(model)
|
||||
|
||||
async def _get_prediction(self, model: str):
|
||||
@ -77,8 +77,8 @@ class PRUpdateChangelog:
|
||||
system_prompt = environment.from_string(get_settings().pr_update_changelog_prompt.system).render(variables)
|
||||
user_prompt = environment.from_string(get_settings().pr_update_changelog_prompt.user).render(variables)
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
logging.info(f"\nSystem prompt:\n{system_prompt}")
|
||||
logging.info(f"\nUser prompt:\n{user_prompt}")
|
||||
get_logger().info(f"\nSystem prompt:\n{system_prompt}")
|
||||
get_logger().info(f"\nUser prompt:\n{user_prompt}")
|
||||
response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
|
||||
system=system_prompt, user=user_prompt)
|
||||
|
||||
@ -100,7 +100,7 @@ class PRUpdateChangelog:
|
||||
"\n>'/update_changelog --pr_update_changelog.push_changelog_changes=true'\n"
|
||||
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
logging.info(f"answer:\n{answer}")
|
||||
get_logger().info(f"answer:\n{answer}")
|
||||
|
||||
return new_file_content, answer
|
||||
|
||||
@ -149,7 +149,7 @@ Example:
|
||||
except Exception:
|
||||
self.changelog_file_str = ""
|
||||
if self.commit_changelog:
|
||||
logging.info("No CHANGELOG.md file found in the repository. Creating one...")
|
||||
get_logger().info("No CHANGELOG.md file found in the repository. Creating one...")
|
||||
changelog_file = self.git_provider.repo_obj.create_file(path="CHANGELOG.md",
|
||||
message='add CHANGELOG.md',
|
||||
content="",
|
||||
|
Reference in New Issue
Block a user