mirror of
https://github.com/qodo-ai/pr-agent.git
synced 2025-07-04 12:50:38 +08:00
Compare commits
10 Commits
hl/refacto
...
hl/gitlab_
Author | SHA1 | Date | |
---|---|---|---|
c58e1f90e7 | |||
d363f148f0 | |||
cbf96a2e67 | |||
4d87c3ec6a | |||
c13c52d733 | |||
fc309f69b9 | |||
7efb5cf74e | |||
8e200197c5 | |||
e638dc075c | |||
f4de3d2899 |
@ -62,9 +62,9 @@ CodiumAI `PR-Agent` is an open-source tool aiming to help developers review PRs
|
||||
|
||||
## Live demo
|
||||
|
||||
Experience GPT-4 powered PR review on your public GitHub repository with our hosted PR-Agent. To try it, just mention `@CodiumAI-Agent` in any PR comment! The agent will generate a PR review in response.
|
||||
Experience GPT-4 powered PR review on your public GitHub repository with our hosted PR-Agent. To try it, just mention `@CodiumAI-Agent` and add the desired command in any PR comment! The agent will generate a response based on your command.
|
||||
|
||||

|
||||

|
||||
|
||||
To set up your own PR-Agent, see the [Quickstart](#Quickstart) section
|
||||
|
||||
@ -77,7 +77,7 @@ To set up your own PR-Agent, see the [Quickstart](#Quickstart) section
|
||||
| | ⮑ Inline review | ✓ | ✓ | |
|
||||
| | Ask | ✓ | ✓ | |
|
||||
| | Auto-Description | ✓ | | |
|
||||
| | Improve Code | ✓ | | |
|
||||
| | Improve Code | ✓ | ✓ | |
|
||||
| | | | | |
|
||||
| USAGE | CLI | ✓ | ✓ | ✓ |
|
||||
| | Tagging bot | ✓ | ✓ | |
|
||||
|
BIN
pics/demo.gif
Normal file
BIN
pics/demo.gif
Normal file
Binary file not shown.
After Width: | Height: | Size: 13 MiB |
@ -61,18 +61,24 @@ def parse_code_suggestion(code_suggestions: dict) -> str:
|
||||
return markdown_text
|
||||
|
||||
|
||||
def try_fix_json(review, max_iter=10):
|
||||
def try_fix_json(review, max_iter=10, code_suggestions=False):
|
||||
if review.endswith("}"):
|
||||
return fix_json_escape_char(review)
|
||||
# Try to fix JSON if it is broken/incomplete: parse until the last valid code suggestion
|
||||
data = {}
|
||||
if code_suggestions:
|
||||
closing_bracket = "]}"
|
||||
else:
|
||||
closing_bracket = "]}}"
|
||||
if review.rfind("'Code suggestions': [") > 0 or review.rfind('"Code suggestions": [') > 0:
|
||||
last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1
|
||||
valid_json = False
|
||||
iter_count = 0
|
||||
while last_code_suggestion_ind > 0 and not valid_json and iter_count < max_iter:
|
||||
try:
|
||||
data = json.loads(review[:last_code_suggestion_ind] + "]}}")
|
||||
data = json.loads(review[:last_code_suggestion_ind] + closing_bracket)
|
||||
valid_json = True
|
||||
review = review[:last_code_suggestion_ind].strip() + "]}}"
|
||||
review = review[:last_code_suggestion_ind].strip() + closing_bracket
|
||||
except json.decoder.JSONDecodeError:
|
||||
review = review[:last_code_suggestion_ind]
|
||||
# Use regular expression to find the last occurrence of "}," with any number of whitespaces or newlines
|
||||
@ -82,3 +88,17 @@ def try_fix_json(review, max_iter=10):
|
||||
logging.error("Unable to decode JSON response from AI")
|
||||
data = {}
|
||||
return data
|
||||
|
||||
def fix_json_escape_char(json_message=None):
|
||||
result = None
|
||||
try:
|
||||
result = json.loads(json_message)
|
||||
except Exception as e:
|
||||
# Find the offending character index:
|
||||
idx_to_replace = int(str(e).split(' ')[-1].replace(')', ''))
|
||||
# Remove the offending character:
|
||||
json_message = list(json_message)
|
||||
json_message[idx_to_replace] = ' '
|
||||
new_message = ''.join(json_message)
|
||||
return fix_JSON(json_message=new_message)
|
||||
return result
|
@ -5,6 +5,7 @@ import os
|
||||
|
||||
from pr_agent.tools.pr_code_suggestions import PRCodeSuggestions
|
||||
from pr_agent.tools.pr_description import PRDescription
|
||||
from pr_agent.tools.pr_information_from_user import PRInformationFromUser
|
||||
from pr_agent.tools.pr_questions import PRQuestions
|
||||
from pr_agent.tools.pr_reviewer import PRReviewer
|
||||
|
||||
@ -28,7 +29,8 @@ improve / improve_code - Suggest improvements to the code in the PR as pull requ
|
||||
parser.add_argument('command', type=str, help='The', choices=['review', 'review_pr',
|
||||
'ask', 'ask_question',
|
||||
'describe', 'describe_pr',
|
||||
'improve', 'improve_code'], default='review')
|
||||
'improve', 'improve_code',
|
||||
'user_questions'], default='review')
|
||||
parser.add_argument('rest', nargs=argparse.REMAINDER, default=[])
|
||||
args = parser.parse_args()
|
||||
logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"))
|
||||
@ -54,6 +56,10 @@ improve / improve_code - Suggest improvements to the code in the PR as pull requ
|
||||
print(f"Reviewing PR: {args.pr_url}")
|
||||
reviewer = PRReviewer(args.pr_url, cli_mode=True)
|
||||
asyncio.run(reviewer.review())
|
||||
elif command in ['user_questions']:
|
||||
print(f"Asking the PR author questions: {args.pr_url}")
|
||||
reviewer = PRInformationFromUser(args.pr_url)
|
||||
asyncio.run(reviewer.generate_questions())
|
||||
else:
|
||||
print(f"Unknown command: {command}")
|
||||
parser.print_help()
|
||||
|
@ -13,6 +13,7 @@ settings = Dynaconf(
|
||||
"settings/pr_questions_prompts.toml",
|
||||
"settings/pr_description_prompts.toml",
|
||||
"settings/pr_code_suggestions_prompts.toml",
|
||||
"settings/pr_information_from_user_prompts.toml",
|
||||
"settings_prod/.secrets.toml"
|
||||
]]
|
||||
)
|
||||
|
@ -61,9 +61,6 @@ class BitbucketProvider:
|
||||
def get_title(self):
|
||||
return self.pr.title
|
||||
|
||||
def get_description(self):
|
||||
return self.pr.body
|
||||
|
||||
def get_languages(self):
|
||||
languages = {self._get_repo().get_data('language'): 0}
|
||||
return languages
|
||||
|
@ -134,9 +134,6 @@ class GithubProvider(GitProvider):
|
||||
def get_title(self):
|
||||
return self.pr.title
|
||||
|
||||
def get_description(self):
|
||||
return self.pr.body
|
||||
|
||||
def get_languages(self):
|
||||
languages = self._get_repo().get_languages()
|
||||
return languages
|
||||
|
@ -28,6 +28,8 @@ class GitLabProvider(GitProvider):
|
||||
self.diff_files = None
|
||||
self.temp_comments = []
|
||||
self._set_merge_request(merge_request_url)
|
||||
self.RE_HUNK_HEADER = re.compile(
|
||||
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
|
||||
|
||||
@property
|
||||
def pr(self):
|
||||
@ -84,25 +86,26 @@ class GitLabProvider(GitProvider):
|
||||
self.diff_files = self.diff_files if self.diff_files else self.get_diff_files()
|
||||
edit_type, found, source_line_no, target_file, target_line_no = self.search_line(relevant_file,
|
||||
relevant_line_in_file)
|
||||
self.send_inline_comment(body, edit_type, found, relevant_file, relevant_line_in_file, source_line_no,
|
||||
target_file, target_line_no)
|
||||
|
||||
def send_inline_comment(self, body, edit_type, found, relevant_file, relevant_line_in_file, source_line_no,
|
||||
target_file, target_line_no):
|
||||
if not found:
|
||||
logging.info(f"Could not find position for {relevant_file} {relevant_line_in_file}")
|
||||
else:
|
||||
if edit_type == 'addition':
|
||||
position = target_line_no - 1
|
||||
else:
|
||||
position = source_line_no - 1
|
||||
d = self.last_diff
|
||||
pos_obj = {'position_type': 'text',
|
||||
'new_path': target_file.filename,
|
||||
'old_path': target_file.old_filename if target_file.old_filename else target_file.filename,
|
||||
'base_sha': d.base_commit_sha, 'start_sha': d.start_commit_sha, 'head_sha': d.head_commit_sha}
|
||||
'new_path': target_file.filename,
|
||||
'old_path': target_file.old_filename if target_file.old_filename else target_file.filename,
|
||||
'base_sha': d.base_commit_sha, 'start_sha': d.start_commit_sha, 'head_sha': d.head_commit_sha}
|
||||
if edit_type == 'deletion':
|
||||
pos_obj['old_line'] = position
|
||||
pos_obj['old_line'] = source_line_no - 1
|
||||
elif edit_type == 'addition':
|
||||
pos_obj['new_line'] = position
|
||||
pos_obj['new_line'] = target_line_no - 1
|
||||
else:
|
||||
pos_obj['new_line'] = position
|
||||
pos_obj['old_line'] = position
|
||||
pos_obj['new_line'] = target_line_no - 1
|
||||
pos_obj['old_line'] = source_line_no - 1
|
||||
self.mr.discussions.create({'body': body,
|
||||
'position': pos_obj})
|
||||
|
||||
@ -110,47 +113,81 @@ class GitLabProvider(GitProvider):
|
||||
relevant_file: str,
|
||||
relevant_lines_start: int,
|
||||
relevant_lines_end: int):
|
||||
raise "not implemented yet for gitlab"
|
||||
self.diff_files = self.diff_files if self.diff_files else self.get_diff_files()
|
||||
target_file = None
|
||||
for file in self.diff_files:
|
||||
if file.filename == relevant_file:
|
||||
if file.filename == relevant_file:
|
||||
target_file = file
|
||||
break
|
||||
range = relevant_lines_end - relevant_lines_start + 1
|
||||
body = body.replace('```suggestion', f'```suggestion:-0+{range}')
|
||||
|
||||
d = self.last_diff
|
||||
#
|
||||
# pos_obj = {'position_type': 'text',
|
||||
# 'new_path': target_file.filename,
|
||||
# 'old_path': target_file.old_filename if target_file.old_filename else target_file.filename,
|
||||
# 'base_sha': d.base_commit_sha, 'start_sha': d.start_commit_sha, 'head_sha': d.head_commit_sha}
|
||||
lines = target_file.head_file.splitlines()
|
||||
relevant_line_in_file = lines[relevant_lines_start - 1]
|
||||
edit_type, found, source_line_no, target_file, target_line_no = self.find_in_file(target_file, relevant_line_in_file)
|
||||
self.send_inline_comment(body, edit_type, found, relevant_file, relevant_line_in_file, source_line_no,
|
||||
target_file, target_line_no)
|
||||
# if lines[relevant_lines_start][0] == '-':
|
||||
# pos_obj['old_line'] = relevant_lines_start
|
||||
# elif lines[relevant_lines_start][0] == '+':
|
||||
# pos_obj['new_line'] = relevant_lines_start
|
||||
# else:
|
||||
# pos_obj['new_line'] = relevant_lines_start
|
||||
# pos_obj['old_line'] = relevant_lines_start
|
||||
# self.mr.discussions.create({'body': body,
|
||||
# 'position': pos_obj})
|
||||
|
||||
def search_line(self, relevant_file, relevant_line_in_file):
|
||||
RE_HUNK_HEADER = re.compile(
|
||||
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
|
||||
target_file = None
|
||||
source_line_no = 0
|
||||
target_line_no = 0
|
||||
found = False
|
||||
|
||||
edit_type = self.get_edit_type(relevant_line_in_file)
|
||||
for file in self.diff_files:
|
||||
if file.filename == relevant_file:
|
||||
target_file = file
|
||||
patch = file.patch
|
||||
patch_lines = patch.splitlines()
|
||||
for i, line in enumerate(patch_lines):
|
||||
if line.startswith('@@'):
|
||||
match = RE_HUNK_HEADER.match(line)
|
||||
if not match:
|
||||
continue
|
||||
start_old, size_old, start_new, size_new, _ = match.groups()
|
||||
source_line_no = int(start_old)
|
||||
target_line_no = int(start_new)
|
||||
continue
|
||||
if line.startswith('-'):
|
||||
source_line_no += 1
|
||||
elif line.startswith('+'):
|
||||
target_line_no += 1
|
||||
elif line.startswith(' '):
|
||||
source_line_no += 1
|
||||
target_line_no += 1
|
||||
if relevant_line_in_file in line:
|
||||
found = True
|
||||
edit_type = self.get_edit_type(line)
|
||||
break
|
||||
elif relevant_line_in_file[0] == '+' and relevant_line_in_file[1:] in line:
|
||||
# The model often adds a '+' to the beginning of the relevant_line_in_file even if originally
|
||||
# it's a context line
|
||||
found = True
|
||||
edit_type = self.get_edit_type(line)
|
||||
break
|
||||
edit_type, found, source_line_no, target_file, target_line_no = self.find_in_file(file,
|
||||
relevant_line_in_file)
|
||||
return edit_type, found, source_line_no, target_file, target_line_no
|
||||
|
||||
def find_in_file(self, file, relevant_line_in_file):
|
||||
edit_type = 'context'
|
||||
source_line_no = 0
|
||||
target_line_no = 0
|
||||
found = False
|
||||
target_file = file
|
||||
patch = file.patch
|
||||
patch_lines = patch.splitlines()
|
||||
for i, line in enumerate(patch_lines):
|
||||
if line.startswith('@@'):
|
||||
match = self.RE_HUNK_HEADER.match(line)
|
||||
if not match:
|
||||
continue
|
||||
start_old, size_old, start_new, size_new, _ = match.groups()
|
||||
source_line_no = int(start_old)
|
||||
target_line_no = int(start_new)
|
||||
continue
|
||||
if line.startswith('-'):
|
||||
source_line_no += 1
|
||||
elif line.startswith('+'):
|
||||
target_line_no += 1
|
||||
elif line.startswith(' '):
|
||||
source_line_no += 1
|
||||
target_line_no += 1
|
||||
if relevant_line_in_file in line:
|
||||
found = True
|
||||
edit_type = self.get_edit_type(line)
|
||||
break
|
||||
elif relevant_line_in_file[0] == '+' and relevant_line_in_file[1:] in line:
|
||||
# The model often adds a '+' to the beginning of the relevant_line_in_file even if originally
|
||||
# it's a context line
|
||||
found = True
|
||||
edit_type = self.get_edit_type(line)
|
||||
break
|
||||
return edit_type, found, source_line_no, target_file, target_line_no
|
||||
|
||||
def get_edit_type(self, relevant_line_in_file):
|
||||
@ -171,9 +208,6 @@ class GitLabProvider(GitProvider):
|
||||
def get_title(self):
|
||||
return self.mr.title
|
||||
|
||||
def get_description(self):
|
||||
return self.mr.description
|
||||
|
||||
def get_languages(self):
|
||||
languages = self.gl.projects.get(self.id_project).languages()
|
||||
return languages
|
||||
|
@ -2,7 +2,7 @@
|
||||
model="gpt-4-0613"
|
||||
git_provider="github"
|
||||
publish_review=true
|
||||
verbosity_level=0 # 0,1,2
|
||||
verbosity_level=2 # 0,1,2
|
||||
|
||||
[pr_reviewer]
|
||||
require_focused_review=true
|
||||
|
33
pr_agent/settings/pr_information_from_user_prompts.toml
Normal file
33
pr_agent/settings/pr_information_from_user_prompts.toml
Normal file
@ -0,0 +1,33 @@
|
||||
[pr_information_from_user_prompt]
|
||||
system="""You are CodiumAI-PR-Reviewer, a language model designed to review git pull requests.
|
||||
Given the PR Info and the PR Git Diff, generate 4 questions about the PR for the PR author.
|
||||
The goal of the questions is to help the language model understand the PR better, so the questions should be insightful, informative, non-trivial, and relevant to the PR.
|
||||
Prefer yes\\no or multiple choice questions. If you have to ask open-ended questions, make sure they are not too difficult, and can be answered in a sentence or two.
|
||||
|
||||
|
||||
Example output:
|
||||
'
|
||||
Questions to better understand the PR:
|
||||
1. ...
|
||||
2. ...
|
||||
...
|
||||
"""
|
||||
|
||||
user="""PR Info:
|
||||
Title: '{{title}}'
|
||||
Branch: '{{branch}}'
|
||||
Description: '{{description}}'
|
||||
{%- if language %}
|
||||
Main language: {{language}}
|
||||
{%- endif %}
|
||||
|
||||
|
||||
The PR Git Diff:
|
||||
```
|
||||
{{diff}}
|
||||
```
|
||||
Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines
|
||||
|
||||
|
||||
Response:
|
||||
"""
|
@ -10,7 +10,7 @@ from pr_agent.algo.pr_processing import get_pr_diff
|
||||
from pr_agent.algo.token_handler import TokenHandler
|
||||
from pr_agent.algo.utils import convert_to_markdown, try_fix_json
|
||||
from pr_agent.config_loader import settings
|
||||
from pr_agent.git_providers import get_git_provider, GithubProvider
|
||||
from pr_agent.git_providers import get_git_provider, BitbucketProvider
|
||||
from pr_agent.git_providers.git_provider import get_main_pr_language
|
||||
|
||||
|
||||
@ -39,7 +39,7 @@ class PRCodeSuggestions:
|
||||
settings.pr_code_suggestions_prompt.user)
|
||||
|
||||
async def suggest(self):
|
||||
assert type(self.git_provider) == GithubProvider, "Only Github is supported for now"
|
||||
assert type(self.git_provider) != BitbucketProvider, "Bitbucket is not supported for now"
|
||||
|
||||
logging.info('Generating code suggestions for PR...')
|
||||
if settings.config.publish_review:
|
||||
@ -86,7 +86,7 @@ class PRCodeSuggestions:
|
||||
except json.decoder.JSONDecodeError:
|
||||
if settings.config.verbosity_level >= 2:
|
||||
logging.info(f"Could not parse json response: {review}")
|
||||
data = try_fix_json(review)
|
||||
data = try_fix_json(review, code_suggestions=True)
|
||||
return data
|
||||
|
||||
def push_inline_code_suggestions(self, data):
|
||||
|
@ -23,7 +23,7 @@ class PRDescription:
|
||||
self.vars = {
|
||||
"title": self.git_provider.pr.title,
|
||||
"branch": self.git_provider.get_pr_branch(),
|
||||
"description": self.git_provider.get_description(),
|
||||
"description": self.git_provider.get_pr_description(),
|
||||
"language": self.main_pr_language,
|
||||
"diff": "", # empty diff for initial calculation
|
||||
}
|
||||
|
71
pr_agent/tools/pr_information_from_user.py
Normal file
71
pr_agent/tools/pr_information_from_user.py
Normal file
@ -0,0 +1,71 @@
|
||||
import copy
|
||||
import logging
|
||||
|
||||
from jinja2 import Environment, StrictUndefined
|
||||
|
||||
from pr_agent.algo.ai_handler import AiHandler
|
||||
from pr_agent.algo.pr_processing import get_pr_diff
|
||||
from pr_agent.algo.token_handler import TokenHandler
|
||||
from pr_agent.config_loader import settings
|
||||
from pr_agent.git_providers import get_git_provider
|
||||
from pr_agent.git_providers.git_provider import get_main_pr_language
|
||||
|
||||
|
||||
class PRInformationFromUser:
|
||||
def __init__(self, pr_url: str):
|
||||
self.git_provider = get_git_provider()(pr_url)
|
||||
self.main_pr_language = get_main_pr_language(
|
||||
self.git_provider.get_languages(), self.git_provider.get_files()
|
||||
)
|
||||
self.ai_handler = AiHandler()
|
||||
self.vars = {
|
||||
"title": self.git_provider.pr.title,
|
||||
"branch": self.git_provider.get_pr_branch(),
|
||||
"description": self.git_provider.get_description(),
|
||||
"language": self.main_pr_language,
|
||||
"diff": "", # empty diff for initial calculation
|
||||
}
|
||||
self.token_handler = TokenHandler(self.git_provider.pr,
|
||||
self.vars,
|
||||
settings.pr_information_from_user_prompt.system,
|
||||
settings.pr_information_from_user_prompt.user)
|
||||
self.patches_diff = None
|
||||
self.prediction = None
|
||||
|
||||
async def generate_questions(self):
|
||||
logging.info('Generating question to the user...')
|
||||
if settings.config.publish_review:
|
||||
self.git_provider.publish_comment("Preparing answer...", is_temporary=True)
|
||||
logging.info('Getting PR diff...')
|
||||
self.patches_diff = get_pr_diff(self.git_provider, self.token_handler)
|
||||
logging.info('Getting AI prediction...')
|
||||
self.prediction = await self._get_prediction()
|
||||
logging.info('Preparing questions...')
|
||||
pr_comment = self._prepare_pr_answer()
|
||||
if settings.config.publish_review:
|
||||
logging.info('Pushing questions...')
|
||||
self.git_provider.publish_comment(pr_comment)
|
||||
self.git_provider.remove_initial_comment()
|
||||
return ""
|
||||
|
||||
async def _get_prediction(self):
|
||||
variables = copy.deepcopy(self.vars)
|
||||
variables["diff"] = self.patches_diff # update diff
|
||||
environment = Environment(undefined=StrictUndefined)
|
||||
system_prompt = environment.from_string(settings.pr_information_from_user_prompt.system).render(variables)
|
||||
user_prompt = environment.from_string(settings.pr_information_from_user_prompt.user).render(variables)
|
||||
if settings.config.verbosity_level >= 2:
|
||||
logging.info(f"\nSystem prompt:\n{system_prompt}")
|
||||
logging.info(f"\nUser prompt:\n{user_prompt}")
|
||||
model = settings.config.model
|
||||
response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
|
||||
system=system_prompt, user=user_prompt)
|
||||
return response
|
||||
|
||||
def _prepare_pr_answer(self) -> str:
|
||||
model_output = self.prediction.strip()
|
||||
if settings.config.verbosity_level >= 2:
|
||||
logging.info(f"answer_str:\n{model_output}")
|
||||
answer_str = f"{model_output}\n\n Please respond to the question above in the following format:\n\n" + \
|
||||
f"/answer <question_id> <answer>\n\n" + f"Example:\n'\n/answer\n1. Yes, because ...\n2. No, because ...\n'"
|
||||
return answer_str
|
@ -22,7 +22,7 @@ class PRQuestions:
|
||||
self.vars = {
|
||||
"title": self.git_provider.pr.title,
|
||||
"branch": self.git_provider.get_pr_branch(),
|
||||
"description": self.git_provider.get_description(),
|
||||
"description": self.git_provider.get_pr_description(),
|
||||
"language": self.main_pr_language,
|
||||
"diff": "", # empty diff for initial calculation
|
||||
"questions": self.question_str,
|
||||
|
Reference in New Issue
Block a user