Files
pr-agent/pr_agent/tools/pr_add_docs.py

181 lines
8.5 KiB
Python
Raw Normal View History

2023-09-27 16:48:17 +03:00
import copy
import textwrap
from functools import partial
from typing import Dict
2023-09-27 16:48:17 +03:00
from jinja2 import Environment, StrictUndefined
2023-12-12 23:03:38 +08:00
from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
2023-09-27 16:48:17 +03:00
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import load_yaml
2023-09-27 16:48:17 +03:00
from pr_agent.config_loader import get_settings
from pr_agent.git_providers import get_git_provider
2023-09-27 16:48:17 +03:00
from pr_agent.git_providers.git_provider import get_main_pr_language
from pr_agent.log import get_logger
2023-09-27 16:48:17 +03:00
class PRAddDocs:
def __init__(self, pr_url: str, cli_mode=False, args: list = None,
ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):
2023-09-27 16:48:17 +03:00
self.git_provider = get_git_provider()(pr_url)
self.main_language = get_main_pr_language(
self.git_provider.get_languages(), self.git_provider.get_files()
)
self.ai_handler = ai_handler()
self.ai_handler.main_pr_language = self.main_language
2023-09-27 16:48:17 +03:00
self.patches_diff = None
self.prediction = None
self.cli_mode = cli_mode
self.vars = {
"title": self.git_provider.pr.title,
"branch": self.git_provider.get_pr_branch(),
"description": self.git_provider.get_pr_description(),
"language": self.main_language,
"diff": "", # empty diff for initial calculation
2023-10-01 13:52:00 +03:00
"extra_instructions": get_settings().pr_add_docs.extra_instructions,
2023-09-27 16:48:17 +03:00
"commit_messages_str": self.git_provider.get_commit_messages(),
'docs_for_language': get_docs_for_language(self.main_language,
2023-10-01 13:52:00 +03:00
get_settings().pr_add_docs.docs_style),
2023-09-27 16:48:17 +03:00
}
self.token_handler = TokenHandler(self.git_provider.pr,
self.vars,
get_settings().pr_add_docs_prompt.system,
get_settings().pr_add_docs_prompt.user)
async def run(self):
try:
get_logger().info('Generating code Docs for PR...')
2023-09-27 16:48:17 +03:00
if get_settings().config.publish_output:
2023-10-01 19:51:15 +03:00
self.git_provider.publish_comment("Generating Documentation...", is_temporary=True)
get_logger().info('Preparing PR documentation...')
2023-10-01 19:51:15 +03:00
await retry_with_fallback_models(self._prepare_prediction)
data = self._prepare_pr_code_docs()
2023-09-28 20:11:18 +03:00
if (not data) or (not 'Code Documentation' in data):
get_logger().info('No code documentation found for PR.')
2023-09-27 16:48:17 +03:00
return
if get_settings().config.publish_output:
get_logger().info('Pushing PR documentation...')
2023-09-27 16:48:17 +03:00
self.git_provider.remove_initial_comment()
get_logger().info('Pushing inline code documentation...')
2023-10-01 13:07:08 +03:00
self.push_inline_docs(data)
2023-09-27 16:48:17 +03:00
except Exception as e:
get_logger().error(f"Failed to generate code documentation for PR, error: {e}")
2023-09-27 16:48:17 +03:00
async def _prepare_prediction(self, model: str):
get_logger().info('Getting PR diff...')
2023-09-27 16:48:17 +03:00
self.patches_diff = get_pr_diff(self.git_provider,
self.token_handler,
model,
add_line_numbers_to_hunks=True,
2023-10-05 08:38:43 +03:00
disable_extra_lines=False)
2023-09-27 16:48:17 +03:00
get_logger().info('Getting AI prediction...')
2023-09-27 16:48:17 +03:00
self.prediction = await self._get_prediction(model)
async def _get_prediction(self, model: str):
variables = copy.deepcopy(self.vars)
variables["diff"] = self.patches_diff # update diff
environment = Environment(undefined=StrictUndefined)
system_prompt = environment.from_string(get_settings().pr_add_docs_prompt.system).render(variables)
user_prompt = environment.from_string(get_settings().pr_add_docs_prompt.user).render(variables)
if get_settings().config.verbosity_level >= 2:
get_logger().info(f"\nSystem prompt:\n{system_prompt}")
get_logger().info(f"\nUser prompt:\n{user_prompt}")
response, finish_reason = await self.ai_handler.chat_completion(
model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt)
2023-09-27 16:48:17 +03:00
return response
2023-09-28 20:11:18 +03:00
def _prepare_pr_code_docs(self) -> Dict:
2023-10-01 19:51:15 +03:00
docs = self.prediction.strip()
data = load_yaml(docs)
2023-09-27 16:48:17 +03:00
if isinstance(data, list):
2023-09-28 20:11:18 +03:00
data = {'Code Documentation': data}
2023-09-27 16:48:17 +03:00
return data
2023-10-01 13:07:08 +03:00
def push_inline_docs(self, data):
docs = []
2023-09-27 16:48:17 +03:00
2023-09-28 20:11:18 +03:00
if not data['Code Documentation']:
return self.git_provider.publish_comment('No code documentation found to improve this PR.')
2023-09-27 16:48:17 +03:00
2023-09-28 20:11:18 +03:00
for d in data['Code Documentation']:
2023-09-27 16:48:17 +03:00
try:
if get_settings().config.verbosity_level >= 2:
get_logger().info(f"add_docs: {d}")
2023-09-27 16:48:17 +03:00
relevant_file = d['relevant file'].strip()
2023-09-28 20:11:18 +03:00
relevant_line = int(d['relevant line']) # absolute position
documentation = d['documentation']
2023-09-28 22:44:15 +03:00
doc_placement = d['doc placement'].strip()
2023-09-28 20:11:18 +03:00
if documentation:
2023-09-28 22:44:15 +03:00
new_code_snippet = self.dedent_code(relevant_file, relevant_line, documentation, doc_placement,
2023-09-28 20:15:18 +03:00
add_original_line=True)
2023-09-28 20:11:18 +03:00
body = f"**Suggestion:** Proposed documentation\n```suggestion\n" + new_code_snippet + "\n```"
2023-10-01 13:07:08 +03:00
docs.append({'body': body, 'relevant_file': relevant_file,
2023-09-28 20:15:18 +03:00
'relevant_lines_start': relevant_line,
'relevant_lines_end': relevant_line})
2023-09-27 16:48:17 +03:00
except Exception:
if get_settings().config.verbosity_level >= 2:
get_logger().info(f"Could not parse code docs: {d}")
2023-09-27 16:48:17 +03:00
2023-10-01 13:07:08 +03:00
is_successful = self.git_provider.publish_code_suggestions(docs)
2023-09-27 16:48:17 +03:00
if not is_successful:
get_logger().info("Failed to publish code docs, trying to publish each docs separately")
2023-10-01 13:52:00 +03:00
for doc_suggestion in docs:
self.git_provider.publish_code_suggestions([doc_suggestion])
2023-09-27 16:48:17 +03:00
def dedent_code(self, relevant_file, relevant_lines_start, new_code_snippet, doc_placement='after',
add_original_line=False):
2023-09-27 16:48:17 +03:00
try: # dedent code snippet
self.diff_files = self.git_provider.diff_files if self.git_provider.diff_files \
else self.git_provider.get_diff_files()
original_initial_line = None
for file in self.diff_files:
if file.filename.strip() == relevant_file:
original_initial_line = file.head_file.splitlines()[relevant_lines_start - 1]
break
if original_initial_line:
2023-09-28 22:44:15 +03:00
if doc_placement == 'after':
line = file.head_file.splitlines()[relevant_lines_start]
else:
line = original_initial_line
2023-09-27 16:48:17 +03:00
suggested_initial_line = new_code_snippet.splitlines()[0]
2023-09-28 22:44:15 +03:00
original_initial_spaces = len(line) - len(line.lstrip())
2023-09-27 16:48:17 +03:00
suggested_initial_spaces = len(suggested_initial_line) - len(suggested_initial_line.lstrip())
delta_spaces = original_initial_spaces - suggested_initial_spaces
if delta_spaces > 0:
new_code_snippet = textwrap.indent(new_code_snippet, delta_spaces * " ").rstrip('\n')
2023-09-28 20:11:18 +03:00
if add_original_line:
2023-09-28 22:44:15 +03:00
if doc_placement == 'after':
new_code_snippet = original_initial_line + "\n" + new_code_snippet
else:
2023-09-28 22:47:26 +03:00
new_code_snippet = new_code_snippet.rstrip() + "\n" + original_initial_line
2023-09-27 16:48:17 +03:00
except Exception as e:
if get_settings().config.verbosity_level >= 2:
get_logger().info(f"Could not dedent code snippet for file {relevant_file}, error: {e}")
2023-09-27 16:48:17 +03:00
return new_code_snippet
2023-09-28 20:15:18 +03:00
2023-09-28 20:58:37 +03:00
def get_docs_for_language(language, style):
2023-09-28 20:15:18 +03:00
language = language.lower()
if language == 'java':
2023-09-28 20:58:37 +03:00
return "Javadocs"
2023-09-28 20:15:18 +03:00
elif language in ['python', 'lisp', 'clojure']:
2023-09-28 20:58:37 +03:00
return f"Docstring ({style})"
2023-09-28 20:15:18 +03:00
elif language in ['javascript', 'typescript']:
2023-09-28 20:58:37 +03:00
return "JSdocs"
2023-09-28 20:15:18 +03:00
elif language == 'c++':
2023-09-28 20:58:37 +03:00
return "Doxygen"
2023-09-28 20:15:18 +03:00
else:
2023-09-28 20:58:37 +03:00
return "Docs"