inline_code_comments

2025-07-21 04:50:39 +08:00 · 2023-07-13 09:44:33 +03:00
parent 4b8420aa16
commit 77a451ada0
6 changed files with 51 additions and 79 deletions
--- a/README.md
+++ b/README.md
@ -250,45 +250,6 @@ require_tests_review=true
 require_security_review=true
 ```
 #### Code Suggestions configuration:
 There are also configuration options to control different aspects of the `code suggestions` feature.
 The number of suggestions provided can be controlled by adjusting the following parameter:
 ```
 num_code_suggestions=4
 ```
 You can also enable more verbose and informative mode of code suggestions:
 ```
 extended_code_suggestions=false
 ```
 This is a comparison of the regular and extended code suggestions modes:
 - **relevant file:** sql.py
 - **suggestion content:** Remove hardcoded sensitive information like username and password. Use environment variables or a secure method to store these values. [important]
 Example for extended suggestion:
 - **relevant file:** sql.py
 - **suggestion content:** Remove hardcoded sensitive information (username and password) [important]
 - **why:** Hardcoding sensitive information is a security risk. It's better to use environment variables or a secure way to store these values.
 - **code example:**
  - **before code:**
    ```
    user = "root",
    password = "Mysql@123",
    ```
  - **after code:**
    ```
    user = os.getenv('DB_USER'),
    password = os.getenv('DB_PASSWORD'),
    ```
 ---
 ## How it works
 ![PR-Agent Tools](./pics/pr_agent_overview.png)
--- a/pr_agent/algo/pr_processing.py
+++ b/pr_agent/algo/pr_processing.py
@ -24,10 +24,10 @@ def get_pr_diff(git_provider: Union[GithubProvider, Any], token_handler: TokenHa
    Returns a string with the diff of the PR.
    If needed, apply diff minimization techniques to reduce the number of tokens
    """
-    files = list(git_provider.get_diff_files())
+    git_provider.pr.files = list(git_provider.get_diff_files())
    # get pr languages
-    pr_languages = sort_files_by_main_languages(git_provider.get_languages(), files)
+    pr_languages = sort_files_by_main_languages(git_provider.get_languages(), git_provider.pr.files)
    # generate a standard diff string, with patch extension
    patches_extended, total_tokens = pr_generate_extended_diff(pr_languages, token_handler)
--- a/pr_agent/git_providers/github_provider.py
+++ b/pr_agent/git_providers/github_provider.py
@ -26,6 +26,8 @@ class GithubProvider:
        self.pr = self._get_pr()
    def get_files(self):
        if hasattr(self.pr, 'files'):
            return self.pr.files
        return self.pr.get_files()
    def get_diff_files(self) -> list[FilePatchInfo]:
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@ -8,8 +8,8 @@ verbosity_level=0  # 0,1,2
 require_focused_review=true
 require_tests_review=true
 require_security_review=true
 extended_code_suggestions=false
 num_code_suggestions=4
 inline_code_comments = true
 [pr_questions]
--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@ -3,9 +3,6 @@ system="""You are CodiumAI-PR-Reviewer, a language model designed to review git
 Your task is to provide constructive and concise feedback for the PR, and also provide meaningfull code suggestions to improve the new PR code (the '+' lines).
 - Provide up to {{ num_code_suggestions }} code suggestions.
 - Try to focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningfull code improvements, like performance, vulnerability, modularity, and best practices.
 {%- if extended_code_suggestions %}
 - For each suggestion, provide a short and concise code snippet to illustrate the existing code, and the improved code.
 {%- endif %}
 - Make sure not to provide suggestion repeating modifications already implemented in the new PR code (the '+' lines).
 You must use the following JSON schema to format your answer:
@ -49,36 +46,17 @@ You must use the following JSON schema to format your answer:
      "items": {
        "relevant file": {
          "type": "string",
-          "description": "the relevant file name"
+          "description": "the relevant file full path"
        },
        "suggestion content": {
          "type": "string",
 {%- if extended_code_suggestions %}
          "description": "a concrete suggestion for meaningfully improving the new PR code. Don't repeat previous suggestions. Add tags with importance measure that matches each suggestion ('important' or 'medium'). Do not make suggestions for updating or adding docstrings, renaming PR title and description, or linter like.
 {%- else %}
          "description": "a concrete suggestion for meaningfully improving the new PR code. Also describe how, specifically, the suggestion can be applied to new PR code. Add tags with importance measure that matches each suggestion ('important' or 'medium'). Do not make suggestions for updating or adding docstrings, renaming PR title and description, or linter like.
 {%- endif %}
        },
-{%- if extended_code_suggestions %}
+        "relevant line in file": {
        "why": {
          "type": "string",
-          "description": "shortly explain why this suggestion is important"
+          "description": "an authentic single code line from the PR git diff section, to which the suggestion applies."
        },
        "code example": {
          "type": "object",
          "properties": {
            "before code": {
              "type": "string",
              "description": "Short and concise code snippet, to illustrate the existing code"
            },
            "after code": {
              "type": "string",
              "description": "Short and concise code snippet, to illustrate the improved code"
        }
      }
        }
 {%- endif %}
      }
    },
 {%- if require_security %}
    "Security concerns": {
@ -111,16 +89,9 @@ Example output:
        "General PR suggestions": "..., `xxx`...",
        "Code suggestions": [
            {
-                "relevant file": "xxx.py",
+                "relevant file": "directory/xxx.py",
                "suggestion content": "xxx [important]",
-{%- if extended_code_suggestions %}
+                "relevant line in file": "xxx",
                "why": "xxx",
                "code example":
                {
                    "before code": "xxx",
                    "after code": "xxx"
                }
 {%- endif %}
            },
            ...
        ]
--- a/pr_agent/tools/pr_reviewer.py
+++ b/pr_agent/tools/pr_reviewer.py
@ -33,7 +33,6 @@ class PRReviewer:
            "require_tests": settings.pr_reviewer.require_tests_review,
            "require_security": settings.pr_reviewer.require_security_review,
            "require_focused": settings.pr_reviewer.require_focused_review,
            'extended_code_suggestions': settings.pr_reviewer.extended_code_suggestions,
            'num_code_suggestions': settings.pr_reviewer.num_code_suggestions,
        }
        self.token_handler = TokenHandler(self.git_provider.pr,
@ -55,6 +54,9 @@ class PRReviewer:
            logging.info('Pushing PR review...')
            self.git_provider.publish_comment(pr_comment)
            self.git_provider.remove_initial_comment()
            if settings.pr_reviewer.inline_code_comments:
                logging.info('Pushing inline code comments...')
                self._publish_inline_code_comments()
        return ""
    async def _get_prediction(self):
@ -86,6 +88,9 @@ class PRReviewer:
                del data['PR Feedback']['Security concerns']
                data['PR Analysis']['Security concerns'] = val
        if settings.config.git_provider == 'github' and settings.pr_reviewer.inline_code_comments:
            del data['PR Feedback']['Code suggestions']
        markdown_text = convert_to_markdown(data)
        user = self.git_provider.get_user_id()
@ -104,3 +109,36 @@ class PRReviewer:
        if settings.config.verbosity_level >= 2:
            logging.info(f"Markdown response:\n{markdown_text}")
        return markdown_text
    def _publish_inline_code_comments(self):
        if settings.config.git_provider != 'github': # inline comments are currently only supported for github
            return
        review = self.prediction.strip()
        try:
            data = json.loads(review)
        except json.decoder.JSONDecodeError:
            data = try_fix_json(review)
        pr = self.git_provider.pr
        last_commit_id = list(pr.get_commits())[-1]
        files = list(self.git_provider.get_diff_files())
        for d in data['PR Feedback']['Code suggestions']:
            relevant_file = d['relevant file'].strip()
            relevant_line_in_file = d['relevant line in file'].strip()
            content = d['suggestion content']
            position = -1
            for file in files:
                if file.filename.strip() == relevant_file:
                    patch = file.patch
                    patch_lines = patch.splitlines()
                    for i, line in enumerate(patch_lines):
                        if relevant_line_in_file in line:
                            position = i
            if position == -1:
                logging.info(f"Could not find position for {relevant_file} {relevant_line_in_file}")
            else:
                body = content
                path = relevant_file.strip()
                pr.create_review_comment(body=body, commit_id=last_commit_id, path=path, position=position)