Add Azure OpenAI support

Merge pull request #39 from Codium-ai/bugfix/cli
Remove installation_id from cli
2025-07-21 04:50:39 +08:00 · 2023-07-12 11:53:46 +03:00 · 2023-07-12 11:31:43 +03:00 · 2023-07-12 11:31:06 +03:00 · 2023-07-11 22:23:07 +03:00 · 2023-07-11 22:22:08 +03:00
20 changed files with 188 additions and 50 deletions
--- a/pr_agent/agent/pr_agent.py
+++ b/pr_agent/agent/pr_agent.py
@ -1,5 +1,4 @@
 import re
-from typing import Optional

 from pr_agent.tools.pr_questions import PRQuestions
 from pr_agent.tools.pr_reviewer import PRReviewer
--- a/pr_agent/algo/ai_handler.py
+++ b/pr_agent/algo/ai_handler.py
@ -14,6 +14,13 @@ class AiHandler:
            openai.api_key = settings.openai.key
            if settings.get("OPENAI.ORG", None):
                openai.organization = settings.openai.org
+            self.deployment_id = settings.get("OPENAI.DEPLOYMENT_ID", None)
+            if settings.get("OPENAI.API_TYPE", None):
+                openai.api_type = settings.openai.api_type
+            if settings.get("OPENAI.API_VERSION", None):
+                openai.engine = settings.openai.api_version
+            if settings.get("OPENAI.API_BASE", None):
+                openai.api_base = settings.openai.api_base
        except AttributeError as e:
            raise ValueError("OpenAI key is required") from e

@ -23,6 +30,7 @@ class AiHandler:
        try:
            response = await openai.ChatCompletion.acreate(
                            model=model,
+                            deployment_id=self.deployment_id,
                            messages=[
                                {"role": "system", "content": system},
                                {"role": "user", "content": user}
--- a/pr_agent/algo/language_handler.py
+++ b/pr_agent/algo/language_handler.py
@ -93,7 +93,7 @@ def sort_files_by_main_languages(languages: Dict, files: list):
    for ext in main_extensions:
        main_extensions_flat.extend(ext)

-    for extensions, lang in zip(main_extensions, languages_sorted_list):
+    for extensions, lang in zip(main_extensions, languages_sorted_list):  # noqa: B905
        tmp = []
        for file in files_filtered:
            extension_str = f".{file.filename.split('.')[-1]}"
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@ -1,5 +1,8 @@
 from __future__ import annotations

+import json
+import logging
+import re
 import textwrap


@ -61,3 +64,25 @@ def parse_code_suggestion(code_suggestions: dict) -> str:
    markdown_text += "\n"
    return markdown_text

+
+def try_fix_json(review, max_iter=10):
+    # Try to fix JSON if it is broken/incomplete: parse until the last valid code suggestion
+    data = {}
+    if review.rfind("'Code suggestions': [") > 0 or review.rfind('"Code suggestions": [') > 0:
+        last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1
+        valid_json = False
+        iter_count = 0
+        while last_code_suggestion_ind > 0 and not valid_json and iter_count < max_iter:
+            try:
+                data = json.loads(review[:last_code_suggestion_ind] + "]}}")
+                valid_json = True
+                review = review[:last_code_suggestion_ind].strip() + "]}}"
+            except json.decoder.JSONDecodeError:
+                review = review[:last_code_suggestion_ind]
+                # Use regular expression to find the last occurrence of "}," with any number of whitespaces or newlines
+                last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1
+                iter_count += 1
+        if not valid_json:
+            logging.error("Unable to decode JSON response from AI")
+            data = {}
+    return data
--- a/pr_agent/cli.py
+++ b/pr_agent/cli.py
@ -15,11 +15,11 @@ def run():
    logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"))
    if args.question:
        print(f"Question: {args.question} about PR {args.pr_url}")
-        reviewer = PRQuestions(args.pr_url, args.question, installation_id=None)
+        reviewer = PRQuestions(args.pr_url, args.question)
        asyncio.run(reviewer.answer())
    else:
        print(f"Reviewing PR: {args.pr_url}")
-        reviewer = PRReviewer(args.pr_url, installation_id=None, cli_mode=True)
+        reviewer = PRReviewer(args.pr_url, cli_mode=True)
        asyncio.run(reviewer.review())


--- a/pr_agent/git_providers/git_provider.py
+++ b/pr_agent/git_providers/git_provider.py
@ -1,5 +1,4 @@
-
-from abc import ABC
+from abc import ABC, abstractmethod
 from dataclasses import dataclass


@ -13,27 +12,35 @@ class FilePatchInfo:


 class GitProvider(ABC):
+    @abstractmethod
    def get_diff_files(self) -> list[FilePatchInfo]:
        pass

+    @abstractmethod
    def publish_comment(self, pr_comment: str, is_temporary: bool = False):
        pass

+    @abstractmethod
    def remove_initial_comment(self):
        pass

+    @abstractmethod
    def get_languages(self):
        pass

+    @abstractmethod
    def get_pr_branch(self):
        pass

+    @abstractmethod
    def get_user_id(self):
        pass

+    @abstractmethod
    def get_pr_description(self):
        pass

+
 def get_main_pr_language(languages, files) -> str:
    """
    Get the main language of the commit. Return an empty string if cannot determine.
@ -72,4 +79,4 @@ def get_main_pr_language(languages, files) -> str:
    except Exception:
        pass

-    return main_language_str
+    return main_language_str
--- a/pr_agent/git_providers/github_provider.py
+++ b/pr_agent/git_providers/github_provider.py
@ -6,6 +6,7 @@ from urllib.parse import urlparse
 from github import AppAuthentication, Github

 from pr_agent.config_loader import settings
+
 from .git_provider import FilePatchInfo


--- a/pr_agent/git_providers/gitlab_provider.py
+++ b/pr_agent/git_providers/gitlab_provider.py
@ -1,6 +1,8 @@
-from urllib.parse import urlparse
-import gitlab
+import logging
 from typing import Optional, Tuple
+from urllib.parse import urlparse
+
+import gitlab

 from pr_agent.config_loader import settings

@ -9,24 +11,28 @@ from .git_provider import FilePatchInfo, GitProvider

 class GitLabProvider(GitProvider):
    def __init__(self, merge_request_url: Optional[str] = None):
+        gitlab_url = settings.get("GITLAB.URL", None)
+        if not gitlab_url:
+            raise ValueError("GitLab URL is not set in the config file")
+        gitlab_access_token = settings.get("GITLAB.PERSONAL_ACCESS_TOKEN", None)
+        if not gitlab_access_token:
+            raise ValueError("GitLab personal access token is not set in the config file")
        self.gl = gitlab.Gitlab(
-            settings.get("GITLAB.URL"),
-            private_token=settings.get("GITLAB.PERSONAL_ACCESS_TOKEN")
+            gitlab_url,
+            gitlab_access_token
        )
-
        self.id_project = None
        self.id_mr = None
        self.mr = None
        self.temp_comments = []
-
-        self.set_merge_request(merge_request_url)
+        self._set_merge_request(merge_request_url)

    @property
    def pr(self):
        '''The GitLab terminology is merge request (MR) instead of pull request (PR)'''
        return self.mr

-    def set_merge_request(self, merge_request_url: str):
+    def _set_merge_request(self, merge_request_url: str):
        self.id_project, self.id_mr = self._parse_merge_request_url(merge_request_url)
        self.mr = self._get_merge_request()

--- a/pr_agent/servers/github_app.py
+++ b/pr_agent/servers/github_app.py
@ -35,7 +35,8 @@ async def handle_github_webhooks(request: Request, response: Response):
 async def handle_request(body):
    action = body.get("action", None)
    installation_id = body.get("installation", {}).get("id", None)
-    agent = PRAgent(installation_id)
+    settings.set("GITHUB.INSTALLATION_ID", installation_id)
+    agent = PRAgent()
    if action == 'created':
        if "comment" not in body:
            return {}
@ -66,8 +67,8 @@ async def root():


 def start():
-    if settings.get("GITHUB.DEPLOYMENT_TYPE", "user") != "app":
-        raise Exception("Please set deployment type to app in .secrets.toml file")
+    # Override the deployment type to app
+    settings.set("GITHUB.DEPLOYMENT_TYPE", "app")
    app = FastAPI()
    app.include_router(router)

--- a/pr_agent/servers/github_polling.py
+++ b/pr_agent/servers/github_polling.py
@ -76,7 +76,8 @@ async def polling_loop():
                                                if comment['user']['login'] == user_id:
                                                    continue
                                            comment_body = comment['body'] if 'body' in comment else ''
-                                            commenter_github_user = comment['user']['login'] if 'user' in comment else ''
+                                            commenter_github_user = comment['user']['login'] \
+                                                if 'user' in comment else ''
                                            logging.info(f"Commenter: {commenter_github_user}\nComment: {comment_body}")
                                            user_tag = "@" + user_id
                                            if user_tag not in comment_body:
--- a/pr_agent/servers/gitlab_polling.py
+++ b/pr_agent/servers/gitlab_polling.py
@ -1,12 +1,11 @@
 import asyncio
 import time
-from urllib.parse import urlparse
+
 import gitlab
+
 from pr_agent.agent.pr_agent import PRAgent
-
 from pr_agent.config_loader import settings

-
 gl = gitlab.Gitlab(
    settings.get("GITLAB.URL"),
    private_token=settings.get("GITLAB.PERSONAL_ACCESS_TOKEN")
--- a/pr_agent/settings/.secrets_template.toml
+++ b/pr_agent/settings/.secrets_template.toml
@ -9,11 +9,13 @@
 [openai]
 key = "<API_KEY>"  # Acquire through https://platform.openai.com
 org = "<ORGANIZATION>"  # Optional, may be commented out.
+# Uncomment the following for Azure OpenAI
+#api_type = "azure"
+#api_version = '2023-05-15'  # Check Azure documentation for the current API version
+#api_base = "<API_BASE>"  # The base URL for your Azure OpenAI resource. e.g. "https://<your resource name>.openai.azure.com"
+#deployment_id = "<DEPLOYMENT_ID>"  # The deployment name you chose when you deployed the engine

 [github]
-# The type of deployment to create. Valid values are 'app' or 'user'.
-deployment_type = "user"
-
 # ---- Set the following only for deployment type == "user"
 user_token = "<TOKEN>"  # A GitHub personal access token with 'repo' scope.

@ -30,5 +32,3 @@ webhook_secret = "<WEBHOOK SECRET>"  # Optional, may be commented out.
 # Gitlab personal access token
 personal_access_token = ""

-# URL to the gitlab service
-gitlab_url = "https://gitlab.com"
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@ -11,18 +11,21 @@ require_security_review=true
 extended_code_suggestions=false
 num_code_suggestions=4

-
 [pr_questions]

+[github]
+# The type of deployment to create. Valid values are 'app' or 'user'.
+deployment_type = "user"
+
 [gitlab]
 # URL to the gitlab service
 gitlab_url = "https://gitlab.com"

-# Polling (either proheheject id or namespace/project_name) syntax can be used
-projects_to_monitor = ['nuclai/algo', 'nuclai/pr-agent-test']
+# Polling (either project id or namespace/project_name) syntax can be used
+projects_to_monitor = ['org_name/repo_name']

 # Polling trigger
 magic_word = "AutoReview"

 # Polling interval
-polling_interval_seconds = 300
+polling_interval_seconds = 30
--- a/pr_agent/tools/pr_questions.py
+++ b/pr_agent/tools/pr_questions.py
@ -1,6 +1,5 @@
 import copy
 import logging
-from typing import Optional

 from jinja2 import Environment, StrictUndefined

--- a/pr_agent/tools/pr_reviewer.py
+++ b/pr_agent/tools/pr_reviewer.py
@ -1,14 +1,13 @@
 import copy
 import json
 import logging
-from typing import Optional

 from jinja2 import Environment, StrictUndefined

 from pr_agent.algo.ai_handler import AiHandler
 from pr_agent.algo.pr_processing import get_pr_diff
 from pr_agent.algo.token_handler import TokenHandler
-from pr_agent.algo.utils import convert_to_markdown
+from pr_agent.algo.utils import convert_to_markdown, try_fix_json
 from pr_agent.config_loader import settings
 from pr_agent.git_providers import get_git_provider
 from pr_agent.git_providers.git_provider import get_main_pr_language
@ -70,11 +69,7 @@ class PRReviewer:
        model = settings.config.model
        response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
                                                                        system=system_prompt, user=user_prompt)
-        try:
-            json.loads(response)
-        except json.decoder.JSONDecodeError:
-            logging.warning("Could not decode JSON")
-            response = {}
+
        return response

    def _prepare_pr_review(self) -> str:
@ -82,8 +77,7 @@ class PRReviewer:
        try:
            data = json.loads(review)
        except json.decoder.JSONDecodeError:
-            logging.error("Unable to decode JSON response from AI")
-            data = {}
+            data = try_fix_json(review)

        # reordering for nicer display
        if 'PR Feedback' in data:
@ -109,4 +103,4 @@ class PRReviewer:

        if settings.config.verbosity_level >= 2:
            logging.info(f"Markdown response:\n{markdown_text}")
-        return markdown_text
+        return markdown_text
--- a/requirements.txt
+++ b/requirements.txt
@ -7,3 +7,5 @@ Jinja2==3.1.2
 tiktoken==0.4.0
 uvicorn==0.22.0
 python-gitlab==3.15.0
+pytest~=7.4.0
+aiohttp~=3.8.4
--- a/tests/unit/test_convert_to_markdown.py
+++ b/tests/unit/test_convert_to_markdown.py
@ -1,6 +1,6 @@
 # Generated by CodiumAI
 from pr_agent.algo.utils import convert_to_markdown
-import pytest
+
 """
 Code Analysis

--- a/tests/unit/test_fix_output.py
+++ b/tests/unit/test_fix_output.py
@ -0,0 +1,91 @@
+# Generated by CodiumAI
+from pr_agent.algo.utils import try_fix_json
+
+
+import pytest
+
+class TestTryFixJson:
+    # Tests that JSON with complete 'Code suggestions' section returns expected output
+    def test_incomplete_code_suggestions(self):
+        review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...'
+        expected_output = {
+            'PR Analysis': {
+                'Main theme': 'xxx',
+                'Description and title': 'Yes',
+                'Type of PR': 'Bug fix'
+            },
+            'PR Feedback': {
+                'General PR suggestions': '..., `xxx`...',
+                'Code suggestions': [
+                    {
+                        'suggestion number': 1,
+                        'relevant file': 'xxx.py',
+                        'suggestion content': 'xxx [important]'
+                    }
+                ]
+            }
+        }
+        assert try_fix_json(review) == expected_output
+
+    def test_incomplete_code_suggestions_new_line(self):
+        review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n\t, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...'
+        expected_output = {
+            'PR Analysis': {
+                'Main theme': 'xxx',
+                'Description and title': 'Yes',
+                'Type of PR': 'Bug fix'
+            },
+            'PR Feedback': {
+                'General PR suggestions': '..., `xxx`...',
+                'Code suggestions': [
+                    {
+                        'suggestion number': 1,
+                        'relevant file': 'xxx.py',
+                        'suggestion content': 'xxx [important]'
+                    }
+                ]
+            }
+        }
+        assert try_fix_json(review) == expected_output
+
+    def test_incomplete_code_suggestions_many_close_brackets(self):
+        review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy }, [}\n ,incomp.}  ,..'
+        expected_output = {
+            'PR Analysis': {
+                'Main theme': 'xxx',
+                'Description and title': 'Yes',
+                'Type of PR': 'Bug fix'
+            },
+            'PR Feedback': {
+                'General PR suggestions': '..., `xxx`...',
+                'Code suggestions': [
+                    {
+                        'suggestion number': 1,
+                        'relevant file': 'xxx.py',
+                        'suggestion content': 'xxx [important]'
+                    }
+                ]
+            }
+        }
+        assert try_fix_json(review) == expected_output
+
+    def test_incomplete_code_suggestions_relevant_file(self):
+        review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.p'
+        expected_output = {
+            'PR Analysis': {
+                'Main theme': 'xxx',
+                'Description and title': 'Yes',
+                'Type of PR': 'Bug fix'
+            },
+            'PR Feedback': {
+                'General PR suggestions': '..., `xxx`...',
+                'Code suggestions': [
+                    {
+                        'suggestion number': 1,
+                        'relevant file': 'xxx.py',
+                        'suggestion content': 'xxx [important]'
+                    }
+                ]
+            }
+        }
+        assert try_fix_json(review) == expected_output
--- a/tests/unit/test_language_handler.py
+++ b/tests/unit/test_language_handler.py
@ -1,15 +1,15 @@

 # Generated by CodiumAI
+
 from pr_agent.algo.language_handler import sort_files_by_main_languages

-
-import pytest
-
 """
 Code Analysis

 Objective:
-The objective of the function is to sort a list of files by their main language, putting the files that are in the main language first and the rest of the files after. It takes in a dictionary of languages and their sizes, and a list of files.
+The objective of the function is to sort a list of files by their main language, putting the files that are in the main 
+language first and the rest of the files after. It takes in a dictionary of languages and their sizes, and a list of 
+files.

 Inputs:
 - languages: a dictionary containing the languages and their sizes
@ -33,6 +33,8 @@ Additional aspects:
 - The function uses the filter_bad_extensions function to filter out files with bad extensions
 - The function uses a rest_files dictionary to store the files that do not belong to any of the main extensions
 """
+
+
 class TestSortFilesByMainLanguages:
    # Tests that files are sorted by main language, with files in main language first and the rest after
    def test_happy_path_sort_files_by_main_languages(self):
@ -118,4 +120,4 @@ class TestSortFilesByMainLanguages:
            {'language': 'C++', 'files': [files[2], files[7]]},
            {'language': 'Other', 'files': []}
        ]
-        assert sort_files_by_main_languages(languages, files) == expected_output
+        assert sort_files_by_main_languages(languages, files) == expected_output
--- a/tests/unit/test_parse_code_suggestion.py
+++ b/tests/unit/test_parse_code_suggestion.py
@ -70,7 +70,7 @@ class TestParseCodeSuggestion:
            'before': 'Before 1',
            'after': 'After 1'
        }
-        expected_output = "   **suggestion:** Suggestion 1\n   **description:** Description 1\n   **before:** Before 1\n   **after:** After 1\n\n"
+        expected_output = "   **suggestion:** Suggestion 1\n   **description:** Description 1\n   **before:** Before 1\n   **after:** After 1\n\n"  # noqa: E501
        assert parse_code_suggestion(code_suggestions) == expected_output

    # Tests that function returns correct output when input dictionary has 'code example' key
@ -84,5 +84,5 @@ class TestParseCodeSuggestion:
                'after': 'After 2'
            }
        }
-        expected_output = "   **suggestion:** Suggestion 2\n   **description:** Description 2\n  - **code example:**\n    - **before:**\n        ```\n        Before 2\n        ```\n    - **after:**\n        ```\n        After 2\n        ```\n\n"
+        expected_output = "   **suggestion:** Suggestion 2\n   **description:** Description 2\n  - **code example:**\n    - **before:**\n        ```\n        Before 2\n        ```\n    - **after:**\n        ```\n        After 2\n        ```\n\n"  # noqa: E501
        assert parse_code_suggestion(code_suggestions) == expected_output
Author	SHA1	Message	Date
Ori Kotek	cd1e62ec96	Add Azure OpenAI support	2023-07-12 11:53:46 +03:00
Ori Kotek	7767cae181	Merge pull request #39 from Codium-ai/bugfix/cli Remove installation_id from cli	2023-07-12 11:31:43 +03:00
Ori Kotek	1bc206e7b2	Remove installation_id from cli	2023-07-12 11:31:06 +03:00
Hussam Lawen	52a438b3c8	Merge pull request #38 from Codium-ai/hl/try_fix_when_broken_output Try to fix json output when it's broken or incomplete	2023-07-11 22:23:07 +03:00
Hussam.lawen	b8a71b369d	add max_iter	2023-07-11 22:22:08 +03:00
Hussam.lawen	72af2a1f9c	Add tests	2023-07-11 22:11:55 +03:00
Hussam.lawen	fd4a2bf7ff	refactor try_fix_json, generalize finding the ending of a json item (support new lines, spaces tab)	2023-07-11 22:11:42 +03:00
Hussam.lawen	a3211d4958	Merge commit '210d94f2aa6ebf872b9b85051d1842c32d4fc34e' into hl/try_fix_when_broken_output	2023-07-11 17:33:02 +03:00
Hussam.lawen	86d7ed5f82	Try to fix broken json output	2023-07-11 17:32:48 +03:00
Ori Kotek	210d94f2aa	Merge pull request #24 from Xyand/feature/gitlab_provider Feature/gitlab provider	2023-07-11 16:56:44 +03:00
Ori Kotek	b2d952cafa	1. Move deployment_type to configuration.toml 2. Lint 3. Inject GitHub app installation ID into GitHub provider using the settings mechanism.	2023-07-11 16:55:09 +03:00