Merge pull request #43 from Codium-ai/tr/inline_code_suggestions

Tr/inline code suggestions
2025-07-21 04:50:39 +08:00 · 2023-07-13 10:48:42 +03:00
parent 25bc69f70e 77a451ada0
commit d1b6b3bc95
10 changed files with 56 additions and 108 deletions
--- a/README.md
+++ b/README.md
@ -250,45 +250,6 @@ require_tests_review=true
 require_security_review=true
 ```
 #### Code Suggestions configuration:
 There are also configuration options to control different aspects of the `code suggestions` feature.
 The number of suggestions provided can be controlled by adjusting the following parameter:
 ```
 num_code_suggestions=4
 ```
 You can also enable more verbose and informative mode of code suggestions:
 ```
 extended_code_suggestions=false
 ```
 This is a comparison of the regular and extended code suggestions modes:
 - **relevant file:** sql.py
 - **suggestion content:** Remove hardcoded sensitive information like username and password. Use environment variables or a secure method to store these values. [important]
 Example for extended suggestion:
 - **relevant file:** sql.py
 - **suggestion content:** Remove hardcoded sensitive information (username and password) [important]
 - **why:** Hardcoding sensitive information is a security risk. It's better to use environment variables or a secure way to store these values.
 - **code example:**
  - **before code:**
    ```
    user = "root",
    password = "Mysql@123",
    ```
  - **after code:**
    ```
    user = os.getenv('DB_USER'),
    password = os.getenv('DB_PASSWORD'),
    ```
 ---
 ## How it works
 ![PR-Agent Tools](./pics/pr_agent_overview.png)
--- a/pr_agent/algo/pr_processing.py
+++ b/pr_agent/algo/pr_processing.py
@ -24,10 +24,10 @@ def get_pr_diff(git_provider: Union[GithubProvider, Any], token_handler: TokenHa
    Returns a string with the diff of the PR.
    If needed, apply diff minimization techniques to reduce the number of tokens
    """
-    files = list(git_provider.get_diff_files())
+    git_provider.pr.files = list(git_provider.get_diff_files())
    # get pr languages
-    pr_languages = sort_files_by_main_languages(git_provider.get_languages(), files)
+    pr_languages = sort_files_by_main_languages(git_provider.get_languages(), git_provider.pr.files)
    # generate a standard diff string, with patch extension
    patches_extended, total_tokens = pr_generate_extended_diff(pr_languages, token_handler)
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@ -53,10 +53,7 @@ def parse_code_suggestion(code_suggestions: dict) -> str:
                code_str_indented = textwrap.indent(code_str, '        ')
                markdown_text += f"    - **{code_key}:**\n{code_str_indented}\n"
        else:
-            if "suggestion number" in sub_key.lower():
+            if "relevant file" in sub_key.lower():
                # markdown_text += f"- **suggestion {sub_value}:**\n"  # prettier formatting
                pass
            elif "relevant file" in sub_key.lower():
                markdown_text += f"\n  - **{sub_key}:** {sub_value}\n"
            else:
                markdown_text += f"   **{sub_key}:** {sub_value}\n"
--- a/pr_agent/git_providers/github_provider.py
+++ b/pr_agent/git_providers/github_provider.py
@ -26,6 +26,8 @@ class GithubProvider:
        self.pr = self._get_pr()
    def get_files(self):
        if hasattr(self.pr, 'files'):
            return self.pr.files
        return self.pr.get_files()
    def get_diff_files(self) -> list[FilePatchInfo]:
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@ -2,14 +2,14 @@
 model="gpt-4-0613"
 git_provider="github"
 publish_review=true
-verbosity_level=0  # 0,1,2
+verbosity_level=0 # 0,1,2
 [pr_reviewer]
 require_focused_review=true
 require_tests_review=true
 require_security_review=true
 extended_code_suggestions=false
 num_code_suggestions=4
 inline_code_comments = true
 [pr_questions]
--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@ -3,9 +3,6 @@ system="""You are CodiumAI-PR-Reviewer, a language model designed to review git
 Your task is to provide constructive and concise feedback for the PR, and also provide meaningfull code suggestions to improve the new PR code (the '+' lines).
 - Provide up to {{ num_code_suggestions }} code suggestions.
 - Try to focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningfull code improvements, like performance, vulnerability, modularity, and best practices.
 {%- if extended_code_suggestions %}
 - For each suggestion, provide a short and concise code snippet to illustrate the existing code, and the improved code.
 {%- endif %}
 - Make sure not to provide suggestion repeating modifications already implemented in the new PR code (the '+' lines).
 You must use the following JSON schema to format your answer:
@ -47,41 +44,18 @@ You must use the following JSON schema to format your answer:
      "maxItems": {{ num_code_suggestions }},
      "uniqueItems": true,
      "items": {
        "suggestion number": {
          "type": "int",
          "description": "suggestion number, starting from 1"
        },
        "relevant file": {
          "type": "string",
-          "description": "the relevant file name"
+          "description": "the relevant file full path"
        },
        "suggestion content": {
          "type": "string",
 {%- if extended_code_suggestions %}
          "description": "a concrete suggestion for meaningfully improving the new PR code. Don't repeat previous suggestions. Add tags with importance measure that matches each suggestion ('important' or 'medium'). Do not make suggestions for updating or adding docstrings, renaming PR title and description, or linter like.
 {%- else %}
          "description": "a concrete suggestion for meaningfully improving the new PR code. Also describe how, specifically, the suggestion can be applied to new PR code. Add tags with importance measure that matches each suggestion ('important' or 'medium'). Do not make suggestions for updating or adding docstrings, renaming PR title and description, or linter like.
 {%- endif %}
        },
-{%- if extended_code_suggestions %}
+        "relevant line in file": {
        "why": {
          "type": "string",
-          "description": "shortly explain why this suggestion is important"
+          "description": "an authentic single code line from the PR git diff section, to which the suggestion applies."
        },
        "code example": {
          "type": "object",
          "properties": {
            "before code": {
              "type": "string",
              "description": "Short and concise code snippet, to illustrate the existing code"
            },
            "after code": {
              "type": "string",
              "description": "Short and concise code snippet, to illustrate the improved code"
            }
          }
        }
 {%- endif %}
      }
    },
 {%- if require_security %}
@ -115,17 +89,9 @@ Example output:
        "General PR suggestions": "..., `xxx`...",
        "Code suggestions": [
            {
-                "suggestion number": 1,
+                "relevant file": "directory/xxx.py",
                "relevant file": "xxx.py",
                "suggestion content": "xxx [important]",
-{%- if extended_code_suggestions %}
+                "relevant line in file": "xxx",
                "why": "xxx",
                "code example":
                {
                    "before code": "xxx",
                    "after code": "xxx"
                }
 {%- endif %}
            },
            ...
        ]
--- a/pr_agent/tools/pr_reviewer.py
+++ b/pr_agent/tools/pr_reviewer.py
@ -33,7 +33,6 @@ class PRReviewer:
            "require_tests": settings.pr_reviewer.require_tests_review,
            "require_security": settings.pr_reviewer.require_security_review,
            "require_focused": settings.pr_reviewer.require_focused_review,
            'extended_code_suggestions': settings.pr_reviewer.extended_code_suggestions,
            'num_code_suggestions': settings.pr_reviewer.num_code_suggestions,
        }
        self.token_handler = TokenHandler(self.git_provider.pr,
@ -55,6 +54,9 @@ class PRReviewer:
            logging.info('Pushing PR review...')
            self.git_provider.publish_comment(pr_comment)
            self.git_provider.remove_initial_comment()
            if settings.pr_reviewer.inline_code_comments:
                logging.info('Pushing inline code comments...')
                self._publish_inline_code_comments()
        return ""
    async def _get_prediction(self):
@ -86,6 +88,9 @@ class PRReviewer:
                del data['PR Feedback']['Security concerns']
                data['PR Analysis']['Security concerns'] = val
        if settings.config.git_provider == 'github' and settings.pr_reviewer.inline_code_comments:
            del data['PR Feedback']['Code suggestions']
        markdown_text = convert_to_markdown(data)
        user = self.git_provider.get_user_id()
@ -103,4 +108,37 @@ class PRReviewer:
        if settings.config.verbosity_level >= 2:
            logging.info(f"Markdown response:\n{markdown_text}")
-        return markdown_text
+        return markdown_text
    def _publish_inline_code_comments(self):
        if settings.config.git_provider != 'github': # inline comments are currently only supported for github
            return
        review = self.prediction.strip()
        try:
            data = json.loads(review)
        except json.decoder.JSONDecodeError:
            data = try_fix_json(review)
        pr = self.git_provider.pr
        last_commit_id = list(pr.get_commits())[-1]
        files = list(self.git_provider.get_diff_files())
        for d in data['PR Feedback']['Code suggestions']:
            relevant_file = d['relevant file'].strip()
            relevant_line_in_file = d['relevant line in file'].strip()
            content = d['suggestion content']
            position = -1
            for file in files:
                if file.filename.strip() == relevant_file:
                    patch = file.patch
                    patch_lines = patch.splitlines()
                    for i, line in enumerate(patch_lines):
                        if relevant_line_in_file in line:
                            position = i
            if position == -1:
                logging.info(f"Could not find position for {relevant_file} {relevant_line_in_file}")
            else:
                body = content
                path = relevant_file.strip()
                pr.create_review_comment(body=body, commit_id=last_commit_id, path=path, position=position)
--- a/tests/unit/test_convert_to_markdown.py
+++ b/tests/unit/test_convert_to_markdown.py
@ -54,14 +54,12 @@ class TestConvertToMarkdown:
            'General PR suggestions': 'general suggestion...',
            'Code suggestions': [
                {
                    'Suggestion number': 1,
                    'Code example': {
                        'Before': 'Code before',
                        'After': 'Code after'
                    }
                },
                {
                    'Suggestion number': 2,
                    'Code example': {
                        'Before': 'Code before 2',
                        'After': 'Code after 2'
--- a/tests/unit/test_fix_output.py
+++ b/tests/unit/test_fix_output.py
@ -7,7 +7,7 @@ import pytest
 class TestTryFixJson:
    # Tests that JSON with complete 'Code suggestions' section returns expected output
    def test_incomplete_code_suggestions(self):
-        review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...'
+        review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...'
        expected_output = {
            'PR Analysis': {
                'Main theme': 'xxx',
@ -18,7 +18,6 @@ class TestTryFixJson:
                'General PR suggestions': '..., `xxx`...',
                'Code suggestions': [
                    {
                        'suggestion number': 1,
                        'relevant file': 'xxx.py',
                        'suggestion content': 'xxx [important]'
                    }
@ -28,7 +27,7 @@ class TestTryFixJson:
        assert try_fix_json(review) == expected_output
    def test_incomplete_code_suggestions_new_line(self):
-        review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n\t, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...'
+        review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n\t, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...'
        expected_output = {
            'PR Analysis': {
                'Main theme': 'xxx',
@ -39,7 +38,6 @@ class TestTryFixJson:
                'General PR suggestions': '..., `xxx`...',
                'Code suggestions': [
                    {
                        'suggestion number': 1,
                        'relevant file': 'xxx.py',
                        'suggestion content': 'xxx [important]'
                    }
@ -49,7 +47,7 @@ class TestTryFixJson:
        assert try_fix_json(review) == expected_output
    def test_incomplete_code_suggestions_many_close_brackets(self):
-        review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy }, [}\n ,incomp.}  ,..'
+        review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy }, [}\n ,incomp.}  ,..'
        expected_output = {
            'PR Analysis': {
                'Main theme': 'xxx',
@ -60,7 +58,6 @@ class TestTryFixJson:
                'General PR suggestions': '..., `xxx`...',
                'Code suggestions': [
                    {
                        'suggestion number': 1,
                        'relevant file': 'xxx.py',
                        'suggestion content': 'xxx [important]'
                    }
@ -70,7 +67,7 @@ class TestTryFixJson:
        assert try_fix_json(review) == expected_output
    def test_incomplete_code_suggestions_relevant_file(self):
-        review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.p'
+        review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.p'
        expected_output = {
            'PR Analysis': {
                'Main theme': 'xxx',
@ -81,7 +78,6 @@ class TestTryFixJson:
                'General PR suggestions': '..., `xxx`...',
                'Code suggestions': [
                    {
                        'suggestion number': 1,
                        'relevant file': 'xxx.py',
                        'suggestion content': 'xxx [important]'
                    }
--- a/tests/unit/test_parse_code_suggestion.py
+++ b/tests/unit/test_parse_code_suggestion.py
@ -41,14 +41,6 @@ class TestParseCodeSuggestion:
        expected_output = "\n"  # modified to expect a newline character
        assert parse_code_suggestion(input_data) == expected_output
    # Tests that function returns correct output when 'suggestion number' key has a non-integer value
    def test_non_integer_suggestion_number(self):
        input_data = {
            "Suggestion number": "one",
            "Description": "This is a suggestion"
        }
        expected_output = "   **Description:** This is a suggestion\n\n"
        assert parse_code_suggestion(input_data) == expected_output
    # Tests that function returns correct output when 'before' or 'after' key has a non-string value
    def test_non_string_before_or_after(self):
@ -64,7 +56,6 @@ class TestParseCodeSuggestion:
    # Tests that function returns correct output when input dictionary does not have 'code example' key
    def test_no_code_example_key(self):
        code_suggestions = {
            'suggestion number': 1,
            'suggestion': 'Suggestion 1',
            'description': 'Description 1',
            'before': 'Before 1',
@ -76,7 +67,6 @@ class TestParseCodeSuggestion:
    # Tests that function returns correct output when input dictionary has 'code example' key
    def test_with_code_example_key(self):
        code_suggestions = {
            'suggestion number': 2,
            'suggestion': 'Suggestion 2',
            'description': 'Description 2',
            'code example': {