Compare commits

..

10 Commits

Author SHA1 Message Date
c58e1f90e7 Merge branch 'main' into hl/gitlab_code_suggestion 2023-07-17 02:10:15 +03:00
d363f148f0 Merge pull request #65 from Codium-ai/tr/agent_logic
pr_information_from_user_prompts
2023-07-17 02:08:56 +03:00
cbf96a2e67 Merge pull request #68 from ilchemla/feature/remove-duplicate-function
Removal of Duplicate Function Across Git Providers
2023-07-17 02:07:09 +03:00
4d87c3ec6a Merge pull request #69 from Codium-ai/readme-update-demo
Update README with new demo instructions and gif
2023-07-17 02:04:50 +03:00
c13c52d733 Merge pull request #66 from Codium-ai/hl/refactor_install_md
Refactor Installation Instructions
2023-07-17 02:02:42 +03:00
fc309f69b9 Support Code Suggestion in Gitlab 2023-07-17 01:44:40 +03:00
7efb5cf74e add demo gif 2023-07-16 22:06:54 +03:00
8e200197c5 update demo description 2023-07-16 22:05:09 +03:00
e638dc075c Remove git_provider.get_description() which is a duplicate of git_provider.get_pr_description() 2023-07-16 21:47:48 +03:00
f4de3d2899 pr_information_from_user_prompts 2023-07-16 19:36:20 +03:00
14 changed files with 227 additions and 68 deletions

View File

@ -62,9 +62,9 @@ CodiumAI `PR-Agent` is an open-source tool aiming to help developers review PRs
## Live demo
Experience GPT-4 powered PR review on your public GitHub repository with our hosted PR-Agent. To try it, just mention `@CodiumAI-Agent` in any PR comment! The agent will generate a PR review in response.
Experience GPT-4 powered PR review on your public GitHub repository with our hosted PR-Agent. To try it, just mention `@CodiumAI-Agent` and add the desired command in any PR comment! The agent will generate a response based on your command.
![Review generation process](./pics/pr-agent-review-process1.gif)
![Review generation process](./pics/demo.gif)
To set up your own PR-Agent, see the [Quickstart](#Quickstart) section
@ -77,7 +77,7 @@ To set up your own PR-Agent, see the [Quickstart](#Quickstart) section
| | ⮑ Inline review | ✓ | ✓ | |
| | Ask | ✓ | ✓ | |
| | Auto-Description | ✓ | | |
| | Improve Code | ✓ | | |
| | Improve Code | ✓ | | |
| | | | | |
| USAGE | CLI | ✓ | ✓ | ✓ |
| | Tagging bot | ✓ | ✓ | |

BIN
pics/demo.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 MiB

View File

@ -61,18 +61,24 @@ def parse_code_suggestion(code_suggestions: dict) -> str:
return markdown_text
def try_fix_json(review, max_iter=10):
def try_fix_json(review, max_iter=10, code_suggestions=False):
if review.endswith("}"):
return fix_json_escape_char(review)
# Try to fix JSON if it is broken/incomplete: parse until the last valid code suggestion
data = {}
if code_suggestions:
closing_bracket = "]}"
else:
closing_bracket = "]}}"
if review.rfind("'Code suggestions': [") > 0 or review.rfind('"Code suggestions": [') > 0:
last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1
valid_json = False
iter_count = 0
while last_code_suggestion_ind > 0 and not valid_json and iter_count < max_iter:
try:
data = json.loads(review[:last_code_suggestion_ind] + "]}}")
data = json.loads(review[:last_code_suggestion_ind] + closing_bracket)
valid_json = True
review = review[:last_code_suggestion_ind].strip() + "]}}"
review = review[:last_code_suggestion_ind].strip() + closing_bracket
except json.decoder.JSONDecodeError:
review = review[:last_code_suggestion_ind]
# Use regular expression to find the last occurrence of "}," with any number of whitespaces or newlines
@ -82,3 +88,17 @@ def try_fix_json(review, max_iter=10):
logging.error("Unable to decode JSON response from AI")
data = {}
return data
def fix_json_escape_char(json_message=None):
result = None
try:
result = json.loads(json_message)
except Exception as e:
# Find the offending character index:
idx_to_replace = int(str(e).split(' ')[-1].replace(')', ''))
# Remove the offending character:
json_message = list(json_message)
json_message[idx_to_replace] = ' '
new_message = ''.join(json_message)
return fix_JSON(json_message=new_message)
return result

View File

@ -5,6 +5,7 @@ import os
from pr_agent.tools.pr_code_suggestions import PRCodeSuggestions
from pr_agent.tools.pr_description import PRDescription
from pr_agent.tools.pr_information_from_user import PRInformationFromUser
from pr_agent.tools.pr_questions import PRQuestions
from pr_agent.tools.pr_reviewer import PRReviewer
@ -28,7 +29,8 @@ improve / improve_code - Suggest improvements to the code in the PR as pull requ
parser.add_argument('command', type=str, help='The', choices=['review', 'review_pr',
'ask', 'ask_question',
'describe', 'describe_pr',
'improve', 'improve_code'], default='review')
'improve', 'improve_code',
'user_questions'], default='review')
parser.add_argument('rest', nargs=argparse.REMAINDER, default=[])
args = parser.parse_args()
logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"))
@ -54,6 +56,10 @@ improve / improve_code - Suggest improvements to the code in the PR as pull requ
print(f"Reviewing PR: {args.pr_url}")
reviewer = PRReviewer(args.pr_url, cli_mode=True)
asyncio.run(reviewer.review())
elif command in ['user_questions']:
print(f"Asking the PR author questions: {args.pr_url}")
reviewer = PRInformationFromUser(args.pr_url)
asyncio.run(reviewer.generate_questions())
else:
print(f"Unknown command: {command}")
parser.print_help()

View File

@ -13,6 +13,7 @@ settings = Dynaconf(
"settings/pr_questions_prompts.toml",
"settings/pr_description_prompts.toml",
"settings/pr_code_suggestions_prompts.toml",
"settings/pr_information_from_user_prompts.toml",
"settings_prod/.secrets.toml"
]]
)

View File

@ -61,9 +61,6 @@ class BitbucketProvider:
def get_title(self):
return self.pr.title
def get_description(self):
return self.pr.body
def get_languages(self):
languages = {self._get_repo().get_data('language'): 0}
return languages

View File

@ -134,9 +134,6 @@ class GithubProvider(GitProvider):
def get_title(self):
return self.pr.title
def get_description(self):
return self.pr.body
def get_languages(self):
languages = self._get_repo().get_languages()
return languages

View File

@ -28,6 +28,8 @@ class GitLabProvider(GitProvider):
self.diff_files = None
self.temp_comments = []
self._set_merge_request(merge_request_url)
self.RE_HUNK_HEADER = re.compile(
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
@property
def pr(self):
@ -84,25 +86,26 @@ class GitLabProvider(GitProvider):
self.diff_files = self.diff_files if self.diff_files else self.get_diff_files()
edit_type, found, source_line_no, target_file, target_line_no = self.search_line(relevant_file,
relevant_line_in_file)
self.send_inline_comment(body, edit_type, found, relevant_file, relevant_line_in_file, source_line_no,
target_file, target_line_no)
def send_inline_comment(self, body, edit_type, found, relevant_file, relevant_line_in_file, source_line_no,
target_file, target_line_no):
if not found:
logging.info(f"Could not find position for {relevant_file} {relevant_line_in_file}")
else:
if edit_type == 'addition':
position = target_line_no - 1
else:
position = source_line_no - 1
d = self.last_diff
pos_obj = {'position_type': 'text',
'new_path': target_file.filename,
'old_path': target_file.old_filename if target_file.old_filename else target_file.filename,
'base_sha': d.base_commit_sha, 'start_sha': d.start_commit_sha, 'head_sha': d.head_commit_sha}
'new_path': target_file.filename,
'old_path': target_file.old_filename if target_file.old_filename else target_file.filename,
'base_sha': d.base_commit_sha, 'start_sha': d.start_commit_sha, 'head_sha': d.head_commit_sha}
if edit_type == 'deletion':
pos_obj['old_line'] = position
pos_obj['old_line'] = source_line_no - 1
elif edit_type == 'addition':
pos_obj['new_line'] = position
pos_obj['new_line'] = target_line_no - 1
else:
pos_obj['new_line'] = position
pos_obj['old_line'] = position
pos_obj['new_line'] = target_line_no - 1
pos_obj['old_line'] = source_line_no - 1
self.mr.discussions.create({'body': body,
'position': pos_obj})
@ -110,47 +113,81 @@ class GitLabProvider(GitProvider):
relevant_file: str,
relevant_lines_start: int,
relevant_lines_end: int):
raise "not implemented yet for gitlab"
self.diff_files = self.diff_files if self.diff_files else self.get_diff_files()
target_file = None
for file in self.diff_files:
if file.filename == relevant_file:
if file.filename == relevant_file:
target_file = file
break
range = relevant_lines_end - relevant_lines_start + 1
body = body.replace('```suggestion', f'```suggestion:-0+{range}')
d = self.last_diff
#
# pos_obj = {'position_type': 'text',
# 'new_path': target_file.filename,
# 'old_path': target_file.old_filename if target_file.old_filename else target_file.filename,
# 'base_sha': d.base_commit_sha, 'start_sha': d.start_commit_sha, 'head_sha': d.head_commit_sha}
lines = target_file.head_file.splitlines()
relevant_line_in_file = lines[relevant_lines_start - 1]
edit_type, found, source_line_no, target_file, target_line_no = self.find_in_file(target_file, relevant_line_in_file)
self.send_inline_comment(body, edit_type, found, relevant_file, relevant_line_in_file, source_line_no,
target_file, target_line_no)
# if lines[relevant_lines_start][0] == '-':
# pos_obj['old_line'] = relevant_lines_start
# elif lines[relevant_lines_start][0] == '+':
# pos_obj['new_line'] = relevant_lines_start
# else:
# pos_obj['new_line'] = relevant_lines_start
# pos_obj['old_line'] = relevant_lines_start
# self.mr.discussions.create({'body': body,
# 'position': pos_obj})
def search_line(self, relevant_file, relevant_line_in_file):
RE_HUNK_HEADER = re.compile(
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
target_file = None
source_line_no = 0
target_line_no = 0
found = False
edit_type = self.get_edit_type(relevant_line_in_file)
for file in self.diff_files:
if file.filename == relevant_file:
target_file = file
patch = file.patch
patch_lines = patch.splitlines()
for i, line in enumerate(patch_lines):
if line.startswith('@@'):
match = RE_HUNK_HEADER.match(line)
if not match:
continue
start_old, size_old, start_new, size_new, _ = match.groups()
source_line_no = int(start_old)
target_line_no = int(start_new)
continue
if line.startswith('-'):
source_line_no += 1
elif line.startswith('+'):
target_line_no += 1
elif line.startswith(' '):
source_line_no += 1
target_line_no += 1
if relevant_line_in_file in line:
found = True
edit_type = self.get_edit_type(line)
break
elif relevant_line_in_file[0] == '+' and relevant_line_in_file[1:] in line:
# The model often adds a '+' to the beginning of the relevant_line_in_file even if originally
# it's a context line
found = True
edit_type = self.get_edit_type(line)
break
edit_type, found, source_line_no, target_file, target_line_no = self.find_in_file(file,
relevant_line_in_file)
return edit_type, found, source_line_no, target_file, target_line_no
def find_in_file(self, file, relevant_line_in_file):
edit_type = 'context'
source_line_no = 0
target_line_no = 0
found = False
target_file = file
patch = file.patch
patch_lines = patch.splitlines()
for i, line in enumerate(patch_lines):
if line.startswith('@@'):
match = self.RE_HUNK_HEADER.match(line)
if not match:
continue
start_old, size_old, start_new, size_new, _ = match.groups()
source_line_no = int(start_old)
target_line_no = int(start_new)
continue
if line.startswith('-'):
source_line_no += 1
elif line.startswith('+'):
target_line_no += 1
elif line.startswith(' '):
source_line_no += 1
target_line_no += 1
if relevant_line_in_file in line:
found = True
edit_type = self.get_edit_type(line)
break
elif relevant_line_in_file[0] == '+' and relevant_line_in_file[1:] in line:
# The model often adds a '+' to the beginning of the relevant_line_in_file even if originally
# it's a context line
found = True
edit_type = self.get_edit_type(line)
break
return edit_type, found, source_line_no, target_file, target_line_no
def get_edit_type(self, relevant_line_in_file):
@ -171,9 +208,6 @@ class GitLabProvider(GitProvider):
def get_title(self):
return self.mr.title
def get_description(self):
return self.mr.description
def get_languages(self):
languages = self.gl.projects.get(self.id_project).languages()
return languages

View File

@ -2,7 +2,7 @@
model="gpt-4-0613"
git_provider="github"
publish_review=true
verbosity_level=0 # 0,1,2
verbosity_level=2 # 0,1,2
[pr_reviewer]
require_focused_review=true

View File

@ -0,0 +1,33 @@
[pr_information_from_user_prompt]
system="""You are CodiumAI-PR-Reviewer, a language model designed to review git pull requests.
Given the PR Info and the PR Git Diff, generate 4 questions about the PR for the PR author.
The goal of the questions is to help the language model understand the PR better, so the questions should be insightful, informative, non-trivial, and relevant to the PR.
Prefer yes\\no or multiple choice questions. If you have to ask open-ended questions, make sure they are not too difficult, and can be answered in a sentence or two.
Example output:
'
Questions to better understand the PR:
1. ...
2. ...
...
"""
user="""PR Info:
Title: '{{title}}'
Branch: '{{branch}}'
Description: '{{description}}'
{%- if language %}
Main language: {{language}}
{%- endif %}
The PR Git Diff:
```
{{diff}}
```
Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines
Response:
"""

View File

@ -10,7 +10,7 @@ from pr_agent.algo.pr_processing import get_pr_diff
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import convert_to_markdown, try_fix_json
from pr_agent.config_loader import settings
from pr_agent.git_providers import get_git_provider, GithubProvider
from pr_agent.git_providers import get_git_provider, BitbucketProvider
from pr_agent.git_providers.git_provider import get_main_pr_language
@ -39,7 +39,7 @@ class PRCodeSuggestions:
settings.pr_code_suggestions_prompt.user)
async def suggest(self):
assert type(self.git_provider) == GithubProvider, "Only Github is supported for now"
assert type(self.git_provider) != BitbucketProvider, "Bitbucket is not supported for now"
logging.info('Generating code suggestions for PR...')
if settings.config.publish_review:
@ -86,7 +86,7 @@ class PRCodeSuggestions:
except json.decoder.JSONDecodeError:
if settings.config.verbosity_level >= 2:
logging.info(f"Could not parse json response: {review}")
data = try_fix_json(review)
data = try_fix_json(review, code_suggestions=True)
return data
def push_inline_code_suggestions(self, data):

View File

@ -23,7 +23,7 @@ class PRDescription:
self.vars = {
"title": self.git_provider.pr.title,
"branch": self.git_provider.get_pr_branch(),
"description": self.git_provider.get_description(),
"description": self.git_provider.get_pr_description(),
"language": self.main_pr_language,
"diff": "", # empty diff for initial calculation
}

View File

@ -0,0 +1,71 @@
import copy
import logging
from jinja2 import Environment, StrictUndefined
from pr_agent.algo.ai_handler import AiHandler
from pr_agent.algo.pr_processing import get_pr_diff
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.config_loader import settings
from pr_agent.git_providers import get_git_provider
from pr_agent.git_providers.git_provider import get_main_pr_language
class PRInformationFromUser:
def __init__(self, pr_url: str):
self.git_provider = get_git_provider()(pr_url)
self.main_pr_language = get_main_pr_language(
self.git_provider.get_languages(), self.git_provider.get_files()
)
self.ai_handler = AiHandler()
self.vars = {
"title": self.git_provider.pr.title,
"branch": self.git_provider.get_pr_branch(),
"description": self.git_provider.get_description(),
"language": self.main_pr_language,
"diff": "", # empty diff for initial calculation
}
self.token_handler = TokenHandler(self.git_provider.pr,
self.vars,
settings.pr_information_from_user_prompt.system,
settings.pr_information_from_user_prompt.user)
self.patches_diff = None
self.prediction = None
async def generate_questions(self):
logging.info('Generating question to the user...')
if settings.config.publish_review:
self.git_provider.publish_comment("Preparing answer...", is_temporary=True)
logging.info('Getting PR diff...')
self.patches_diff = get_pr_diff(self.git_provider, self.token_handler)
logging.info('Getting AI prediction...')
self.prediction = await self._get_prediction()
logging.info('Preparing questions...')
pr_comment = self._prepare_pr_answer()
if settings.config.publish_review:
logging.info('Pushing questions...')
self.git_provider.publish_comment(pr_comment)
self.git_provider.remove_initial_comment()
return ""
async def _get_prediction(self):
variables = copy.deepcopy(self.vars)
variables["diff"] = self.patches_diff # update diff
environment = Environment(undefined=StrictUndefined)
system_prompt = environment.from_string(settings.pr_information_from_user_prompt.system).render(variables)
user_prompt = environment.from_string(settings.pr_information_from_user_prompt.user).render(variables)
if settings.config.verbosity_level >= 2:
logging.info(f"\nSystem prompt:\n{system_prompt}")
logging.info(f"\nUser prompt:\n{user_prompt}")
model = settings.config.model
response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
system=system_prompt, user=user_prompt)
return response
def _prepare_pr_answer(self) -> str:
model_output = self.prediction.strip()
if settings.config.verbosity_level >= 2:
logging.info(f"answer_str:\n{model_output}")
answer_str = f"{model_output}\n\n Please respond to the question above in the following format:\n\n" + \
f"/answer <question_id> <answer>\n\n" + f"Example:\n'\n/answer\n1. Yes, because ...\n2. No, because ...\n'"
return answer_str

View File

@ -22,7 +22,7 @@ class PRQuestions:
self.vars = {
"title": self.git_provider.pr.title,
"branch": self.git_provider.get_pr_branch(),
"description": self.git_provider.get_description(),
"description": self.git_provider.get_pr_description(),
"language": self.main_pr_language,
"diff": "", # empty diff for initial calculation
"questions": self.question_str,