Compare commits

..

11 Commits

Author SHA1 Message Date
cd1e62ec96 Add Azure OpenAI support 2023-07-12 11:53:46 +03:00
7767cae181 Merge pull request #39 from Codium-ai/bugfix/cli
Remove installation_id from cli
2023-07-12 11:31:43 +03:00
1bc206e7b2 Remove installation_id from cli 2023-07-12 11:31:06 +03:00
52a438b3c8 Merge pull request #38 from Codium-ai/hl/try_fix_when_broken_output
Try to fix json output when it's broken or incomplete
2023-07-11 22:23:07 +03:00
b8a71b369d add max_iter 2023-07-11 22:22:08 +03:00
72af2a1f9c Add tests 2023-07-11 22:11:55 +03:00
fd4a2bf7ff refactor try_fix_json, generalize finding the ending of a json item (support new lines, spaces tab) 2023-07-11 22:11:42 +03:00
a3211d4958 Merge commit '210d94f2aa6ebf872b9b85051d1842c32d4fc34e' into hl/try_fix_when_broken_output 2023-07-11 17:33:02 +03:00
86d7ed5f82 Try to fix broken json output 2023-07-11 17:32:48 +03:00
210d94f2aa Merge pull request #24 from Xyand/feature/gitlab_provider
Feature/gitlab provider
2023-07-11 16:56:44 +03:00
b2d952cafa 1. Move deployment_type to configuration.toml
2. Lint
3. Inject GitHub app installation ID into GitHub provider using the settings mechanism.
2023-07-11 16:55:09 +03:00
20 changed files with 188 additions and 50 deletions

View File

@ -1,5 +1,4 @@
import re
from typing import Optional
from pr_agent.tools.pr_questions import PRQuestions
from pr_agent.tools.pr_reviewer import PRReviewer

View File

@ -14,6 +14,13 @@ class AiHandler:
openai.api_key = settings.openai.key
if settings.get("OPENAI.ORG", None):
openai.organization = settings.openai.org
self.deployment_id = settings.get("OPENAI.DEPLOYMENT_ID", None)
if settings.get("OPENAI.API_TYPE", None):
openai.api_type = settings.openai.api_type
if settings.get("OPENAI.API_VERSION", None):
openai.engine = settings.openai.api_version
if settings.get("OPENAI.API_BASE", None):
openai.api_base = settings.openai.api_base
except AttributeError as e:
raise ValueError("OpenAI key is required") from e
@ -23,6 +30,7 @@ class AiHandler:
try:
response = await openai.ChatCompletion.acreate(
model=model,
deployment_id=self.deployment_id,
messages=[
{"role": "system", "content": system},
{"role": "user", "content": user}

View File

@ -93,7 +93,7 @@ def sort_files_by_main_languages(languages: Dict, files: list):
for ext in main_extensions:
main_extensions_flat.extend(ext)
for extensions, lang in zip(main_extensions, languages_sorted_list):
for extensions, lang in zip(main_extensions, languages_sorted_list): # noqa: B905
tmp = []
for file in files_filtered:
extension_str = f".{file.filename.split('.')[-1]}"

View File

@ -1,5 +1,8 @@
from __future__ import annotations
import json
import logging
import re
import textwrap
@ -61,3 +64,25 @@ def parse_code_suggestion(code_suggestions: dict) -> str:
markdown_text += "\n"
return markdown_text
def try_fix_json(review, max_iter=10):
# Try to fix JSON if it is broken/incomplete: parse until the last valid code suggestion
data = {}
if review.rfind("'Code suggestions': [") > 0 or review.rfind('"Code suggestions": [') > 0:
last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1
valid_json = False
iter_count = 0
while last_code_suggestion_ind > 0 and not valid_json and iter_count < max_iter:
try:
data = json.loads(review[:last_code_suggestion_ind] + "]}}")
valid_json = True
review = review[:last_code_suggestion_ind].strip() + "]}}"
except json.decoder.JSONDecodeError:
review = review[:last_code_suggestion_ind]
# Use regular expression to find the last occurrence of "}," with any number of whitespaces or newlines
last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1
iter_count += 1
if not valid_json:
logging.error("Unable to decode JSON response from AI")
data = {}
return data

View File

@ -15,11 +15,11 @@ def run():
logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"))
if args.question:
print(f"Question: {args.question} about PR {args.pr_url}")
reviewer = PRQuestions(args.pr_url, args.question, installation_id=None)
reviewer = PRQuestions(args.pr_url, args.question)
asyncio.run(reviewer.answer())
else:
print(f"Reviewing PR: {args.pr_url}")
reviewer = PRReviewer(args.pr_url, installation_id=None, cli_mode=True)
reviewer = PRReviewer(args.pr_url, cli_mode=True)
asyncio.run(reviewer.review())

View File

@ -1,5 +1,4 @@
from abc import ABC
from abc import ABC, abstractmethod
from dataclasses import dataclass
@ -13,27 +12,35 @@ class FilePatchInfo:
class GitProvider(ABC):
@abstractmethod
def get_diff_files(self) -> list[FilePatchInfo]:
pass
@abstractmethod
def publish_comment(self, pr_comment: str, is_temporary: bool = False):
pass
@abstractmethod
def remove_initial_comment(self):
pass
@abstractmethod
def get_languages(self):
pass
@abstractmethod
def get_pr_branch(self):
pass
@abstractmethod
def get_user_id(self):
pass
@abstractmethod
def get_pr_description(self):
pass
def get_main_pr_language(languages, files) -> str:
"""
Get the main language of the commit. Return an empty string if cannot determine.
@ -72,4 +79,4 @@ def get_main_pr_language(languages, files) -> str:
except Exception:
pass
return main_language_str
return main_language_str

View File

@ -6,6 +6,7 @@ from urllib.parse import urlparse
from github import AppAuthentication, Github
from pr_agent.config_loader import settings
from .git_provider import FilePatchInfo

View File

@ -1,6 +1,8 @@
from urllib.parse import urlparse
import gitlab
import logging
from typing import Optional, Tuple
from urllib.parse import urlparse
import gitlab
from pr_agent.config_loader import settings
@ -9,24 +11,28 @@ from .git_provider import FilePatchInfo, GitProvider
class GitLabProvider(GitProvider):
def __init__(self, merge_request_url: Optional[str] = None):
gitlab_url = settings.get("GITLAB.URL", None)
if not gitlab_url:
raise ValueError("GitLab URL is not set in the config file")
gitlab_access_token = settings.get("GITLAB.PERSONAL_ACCESS_TOKEN", None)
if not gitlab_access_token:
raise ValueError("GitLab personal access token is not set in the config file")
self.gl = gitlab.Gitlab(
settings.get("GITLAB.URL"),
private_token=settings.get("GITLAB.PERSONAL_ACCESS_TOKEN")
gitlab_url,
gitlab_access_token
)
self.id_project = None
self.id_mr = None
self.mr = None
self.temp_comments = []
self.set_merge_request(merge_request_url)
self._set_merge_request(merge_request_url)
@property
def pr(self):
'''The GitLab terminology is merge request (MR) instead of pull request (PR)'''
return self.mr
def set_merge_request(self, merge_request_url: str):
def _set_merge_request(self, merge_request_url: str):
self.id_project, self.id_mr = self._parse_merge_request_url(merge_request_url)
self.mr = self._get_merge_request()

View File

@ -35,7 +35,8 @@ async def handle_github_webhooks(request: Request, response: Response):
async def handle_request(body):
action = body.get("action", None)
installation_id = body.get("installation", {}).get("id", None)
agent = PRAgent(installation_id)
settings.set("GITHUB.INSTALLATION_ID", installation_id)
agent = PRAgent()
if action == 'created':
if "comment" not in body:
return {}
@ -66,8 +67,8 @@ async def root():
def start():
if settings.get("GITHUB.DEPLOYMENT_TYPE", "user") != "app":
raise Exception("Please set deployment type to app in .secrets.toml file")
# Override the deployment type to app
settings.set("GITHUB.DEPLOYMENT_TYPE", "app")
app = FastAPI()
app.include_router(router)

View File

@ -76,7 +76,8 @@ async def polling_loop():
if comment['user']['login'] == user_id:
continue
comment_body = comment['body'] if 'body' in comment else ''
commenter_github_user = comment['user']['login'] if 'user' in comment else ''
commenter_github_user = comment['user']['login'] \
if 'user' in comment else ''
logging.info(f"Commenter: {commenter_github_user}\nComment: {comment_body}")
user_tag = "@" + user_id
if user_tag not in comment_body:

View File

@ -1,12 +1,11 @@
import asyncio
import time
from urllib.parse import urlparse
import gitlab
from pr_agent.agent.pr_agent import PRAgent
from pr_agent.config_loader import settings
gl = gitlab.Gitlab(
settings.get("GITLAB.URL"),
private_token=settings.get("GITLAB.PERSONAL_ACCESS_TOKEN")

View File

@ -9,11 +9,13 @@
[openai]
key = "<API_KEY>" # Acquire through https://platform.openai.com
org = "<ORGANIZATION>" # Optional, may be commented out.
# Uncomment the following for Azure OpenAI
#api_type = "azure"
#api_version = '2023-05-15' # Check Azure documentation for the current API version
#api_base = "<API_BASE>" # The base URL for your Azure OpenAI resource. e.g. "https://<your resource name>.openai.azure.com"
#deployment_id = "<DEPLOYMENT_ID>" # The deployment name you chose when you deployed the engine
[github]
# The type of deployment to create. Valid values are 'app' or 'user'.
deployment_type = "user"
# ---- Set the following only for deployment type == "user"
user_token = "<TOKEN>" # A GitHub personal access token with 'repo' scope.
@ -30,5 +32,3 @@ webhook_secret = "<WEBHOOK SECRET>" # Optional, may be commented out.
# Gitlab personal access token
personal_access_token = ""
# URL to the gitlab service
gitlab_url = "https://gitlab.com"

View File

@ -11,18 +11,21 @@ require_security_review=true
extended_code_suggestions=false
num_code_suggestions=4
[pr_questions]
[github]
# The type of deployment to create. Valid values are 'app' or 'user'.
deployment_type = "user"
[gitlab]
# URL to the gitlab service
gitlab_url = "https://gitlab.com"
# Polling (either proheheject id or namespace/project_name) syntax can be used
projects_to_monitor = ['nuclai/algo', 'nuclai/pr-agent-test']
# Polling (either project id or namespace/project_name) syntax can be used
projects_to_monitor = ['org_name/repo_name']
# Polling trigger
magic_word = "AutoReview"
# Polling interval
polling_interval_seconds = 300
polling_interval_seconds = 30

View File

@ -1,6 +1,5 @@
import copy
import logging
from typing import Optional
from jinja2 import Environment, StrictUndefined

View File

@ -1,14 +1,13 @@
import copy
import json
import logging
from typing import Optional
from jinja2 import Environment, StrictUndefined
from pr_agent.algo.ai_handler import AiHandler
from pr_agent.algo.pr_processing import get_pr_diff
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import convert_to_markdown
from pr_agent.algo.utils import convert_to_markdown, try_fix_json
from pr_agent.config_loader import settings
from pr_agent.git_providers import get_git_provider
from pr_agent.git_providers.git_provider import get_main_pr_language
@ -70,11 +69,7 @@ class PRReviewer:
model = settings.config.model
response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
system=system_prompt, user=user_prompt)
try:
json.loads(response)
except json.decoder.JSONDecodeError:
logging.warning("Could not decode JSON")
response = {}
return response
def _prepare_pr_review(self) -> str:
@ -82,8 +77,7 @@ class PRReviewer:
try:
data = json.loads(review)
except json.decoder.JSONDecodeError:
logging.error("Unable to decode JSON response from AI")
data = {}
data = try_fix_json(review)
# reordering for nicer display
if 'PR Feedback' in data:
@ -109,4 +103,4 @@ class PRReviewer:
if settings.config.verbosity_level >= 2:
logging.info(f"Markdown response:\n{markdown_text}")
return markdown_text
return markdown_text

View File

@ -7,3 +7,5 @@ Jinja2==3.1.2
tiktoken==0.4.0
uvicorn==0.22.0
python-gitlab==3.15.0
pytest~=7.4.0
aiohttp~=3.8.4

View File

@ -1,6 +1,6 @@
# Generated by CodiumAI
from pr_agent.algo.utils import convert_to_markdown
import pytest
"""
Code Analysis

View File

@ -0,0 +1,91 @@
# Generated by CodiumAI
from pr_agent.algo.utils import try_fix_json
import pytest
class TestTryFixJson:
# Tests that JSON with complete 'Code suggestions' section returns expected output
def test_incomplete_code_suggestions(self):
review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...'
expected_output = {
'PR Analysis': {
'Main theme': 'xxx',
'Description and title': 'Yes',
'Type of PR': 'Bug fix'
},
'PR Feedback': {
'General PR suggestions': '..., `xxx`...',
'Code suggestions': [
{
'suggestion number': 1,
'relevant file': 'xxx.py',
'suggestion content': 'xxx [important]'
}
]
}
}
assert try_fix_json(review) == expected_output
def test_incomplete_code_suggestions_new_line(self):
review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n\t, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...'
expected_output = {
'PR Analysis': {
'Main theme': 'xxx',
'Description and title': 'Yes',
'Type of PR': 'Bug fix'
},
'PR Feedback': {
'General PR suggestions': '..., `xxx`...',
'Code suggestions': [
{
'suggestion number': 1,
'relevant file': 'xxx.py',
'suggestion content': 'xxx [important]'
}
]
}
}
assert try_fix_json(review) == expected_output
def test_incomplete_code_suggestions_many_close_brackets(self):
review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy }, [}\n ,incomp.} ,..'
expected_output = {
'PR Analysis': {
'Main theme': 'xxx',
'Description and title': 'Yes',
'Type of PR': 'Bug fix'
},
'PR Feedback': {
'General PR suggestions': '..., `xxx`...',
'Code suggestions': [
{
'suggestion number': 1,
'relevant file': 'xxx.py',
'suggestion content': 'xxx [important]'
}
]
}
}
assert try_fix_json(review) == expected_output
def test_incomplete_code_suggestions_relevant_file(self):
review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.p'
expected_output = {
'PR Analysis': {
'Main theme': 'xxx',
'Description and title': 'Yes',
'Type of PR': 'Bug fix'
},
'PR Feedback': {
'General PR suggestions': '..., `xxx`...',
'Code suggestions': [
{
'suggestion number': 1,
'relevant file': 'xxx.py',
'suggestion content': 'xxx [important]'
}
]
}
}
assert try_fix_json(review) == expected_output

View File

@ -1,15 +1,15 @@
# Generated by CodiumAI
from pr_agent.algo.language_handler import sort_files_by_main_languages
import pytest
"""
Code Analysis
Objective:
The objective of the function is to sort a list of files by their main language, putting the files that are in the main language first and the rest of the files after. It takes in a dictionary of languages and their sizes, and a list of files.
The objective of the function is to sort a list of files by their main language, putting the files that are in the main
language first and the rest of the files after. It takes in a dictionary of languages and their sizes, and a list of
files.
Inputs:
- languages: a dictionary containing the languages and their sizes
@ -33,6 +33,8 @@ Additional aspects:
- The function uses the filter_bad_extensions function to filter out files with bad extensions
- The function uses a rest_files dictionary to store the files that do not belong to any of the main extensions
"""
class TestSortFilesByMainLanguages:
# Tests that files are sorted by main language, with files in main language first and the rest after
def test_happy_path_sort_files_by_main_languages(self):
@ -118,4 +120,4 @@ class TestSortFilesByMainLanguages:
{'language': 'C++', 'files': [files[2], files[7]]},
{'language': 'Other', 'files': []}
]
assert sort_files_by_main_languages(languages, files) == expected_output
assert sort_files_by_main_languages(languages, files) == expected_output

View File

@ -70,7 +70,7 @@ class TestParseCodeSuggestion:
'before': 'Before 1',
'after': 'After 1'
}
expected_output = " **suggestion:** Suggestion 1\n **description:** Description 1\n **before:** Before 1\n **after:** After 1\n\n"
expected_output = " **suggestion:** Suggestion 1\n **description:** Description 1\n **before:** Before 1\n **after:** After 1\n\n" # noqa: E501
assert parse_code_suggestion(code_suggestions) == expected_output
# Tests that function returns correct output when input dictionary has 'code example' key
@ -84,5 +84,5 @@ class TestParseCodeSuggestion:
'after': 'After 2'
}
}
expected_output = " **suggestion:** Suggestion 2\n **description:** Description 2\n - **code example:**\n - **before:**\n ```\n Before 2\n ```\n - **after:**\n ```\n After 2\n ```\n\n"
expected_output = " **suggestion:** Suggestion 2\n **description:** Description 2\n - **code example:**\n - **before:**\n ```\n Before 2\n ```\n - **after:**\n ```\n After 2\n ```\n\n" # noqa: E501
assert parse_code_suggestion(code_suggestions) == expected_output