Merge remote-tracking branch 'origin/main' into feature/gitlab_provider

2025-07-21 04:50:39 +08:00 · 2023-07-11 15:49:06 +03:00
parent ca47833c56 4076f67ab8
commit 6eacf4791d
13 changed files with 32 additions and 33 deletions
--- a/README.md
+++ b/README.md
@ -86,8 +86,8 @@ pip install -r requirements.txt
 3. Copy the secrets template file and fill in your OpenAI key and your GitHub user token:

 ```
-cp pr_agent/settings/.secrets_template.toml pr_agent/settings/.secrets
-# Edit .secrets file
+cp pr_agent/settings/.secrets_template.toml pr_agent/settings/.secrets.toml
+# Edit .secrets.toml file
 ```

 4. Run the appropriate Python scripts from the scripts folder:
@ -147,8 +147,8 @@ git clone https://github.com/Codium-ai/pr-agent.git
   - Copy your app's webhook secret to the webhook_secret field.

 ```
-cp pr_agent/settings/.secrets_template.toml pr_agent/settings/.secrets
-# Edit .secrets file
+cp pr_agent/settings/.secrets_template.toml pr_agent/settings/.secrets.toml
+# Edit .secrets.toml file
 ```

 6. Build a Docker image for the app and optionally push it to a Docker repository. We'll use Dockerhub as an example:
@ -186,7 +186,7 @@ Here is a quick overview of the different sub-tools of PR Reviewer:
  - PR description and title
  - PR type classification
  - Is the PR covered by relevant tests
-  - Is the PR minimal and focused
+  - Is this a focused PR
  - Are there security concerns
 - PR Feedback
  - General PR suggestions
@ -202,7 +202,7 @@ This is how a typical output of the PR Reviewer looks like:
 - 🔍 **Description and title:** Yes
 - 📌 **Type of PR:** Enhancement
 - 🧪 **Relevant tests added:** No
- ✨ **Minimal and focused:** Yes, the PR is focused on adding two new handlers for language extension and token counting.
+- ✨ **Focused PR:** Yes, the PR is focused on adding two new handlers for language extension and token counting.
 - 🔒 **Security concerns:** No, the PR does not introduce possible security concerns or issues.

 #### PR Feedback
@ -245,7 +245,7 @@ The different tools and sub-tools used by CodiumAI pr-agent are easily configura
 You can enable/disable the different PR Reviewer sub-sections with the following flags:

 ```
-require_minimal_and_focused_review=true
+require_focused_review=true
 require_tests_review=true
 require_security_review=true
 ```
--- a/pics/logo-dark.png
+++ b/pics/logo-dark.png
--- a/pics/logo-light.png
+++ b/pics/logo-light.png
--- a/pr_agent/algo/pr_processing.py
+++ b/pr_agent/algo/pr_processing.py
@ -2,7 +2,7 @@ from __future__ import annotations

 import difflib
 import logging
-from typing import Any, Dict, Tuple, Union
+from typing import Any, Tuple, Union

 from pr_agent.algo.git_patch_processing import extend_patch, handle_patch_deletions
 from pr_agent.algo.language_handler import sort_files_by_main_languages
@ -14,7 +14,8 @@ DELETED_FILES_ = "Deleted files:\n"

 MORE_MODIFIED_FILES_ = "More modified files:\n"

-OUTPUT_BUFFER_TOKENS = 800
+OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD = 1000
+OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD = 600
 PATCH_EXTRA_LINES = 3


@ -32,11 +33,12 @@ def get_pr_diff(git_provider: Union[GithubProvider, Any], token_handler: TokenHa
    patches_extended, total_tokens = pr_generate_extended_diff(pr_languages, token_handler)

    # if we are under the limit, return the full diff
-    if total_tokens + OUTPUT_BUFFER_TOKENS < token_handler.limit:
+    if total_tokens + OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD < token_handler.limit:
        return "\n".join(patches_extended)

    # if we are over the limit, start pruning
-    patches_compressed, modified_file_names, deleted_file_names = pr_generate_compressed_diff(pr_languages, token_handler)
+    patches_compressed, modified_file_names, deleted_file_names = pr_generate_compressed_diff(pr_languages,
+                                                                                              token_handler)
    final_diff = "\n".join(patches_compressed)
    if modified_file_names:
        modified_list_str = MORE_MODIFIED_FILES_ + "\n".join(modified_file_names)
@ -115,12 +117,12 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler) ->
        new_patch_tokens = token_handler.count_tokens(patch)

        # Hard Stop, no more tokens
-        if total_tokens > token_handler.limit - OUTPUT_BUFFER_TOKENS // 2:
+        if total_tokens > token_handler.limit - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD:
            logging.warning(f"File was fully skipped, no more tokens: {file.filename}.")
            continue

        # If the patch is too large, just show the file name
-        if total_tokens + new_patch_tokens > token_handler.limit - OUTPUT_BUFFER_TOKENS:
+        if total_tokens + new_patch_tokens > token_handler.limit - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD:
            # Current logic is to skip the patch if it's too large
            # TODO: Option for alternative logic to remove hunks from the patch to reduce the number of tokens
            #  until we meet the requirements
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@ -12,7 +12,7 @@ def convert_to_markdown(output_data: dict) -> str:
        "Type of PR": "📌",
        "Relevant tests added": "🧪",
        "Unrelated changes": "⚠️",
-        "Minimal and focused": "✨",
+        "Focused PR": "✨",
        "Security concerns": "🔒",
        "General PR suggestions": "💡",
        "Code suggestions": "🤖"
--- a/pr_agent/servers/github_app_webhook.py
+++ b/pr_agent/servers/github_app_webhook.py
--- a/pr_agent/servers/github_polling.py
+++ b/pr_agent/servers/github_polling.py
@ -38,6 +38,7 @@ async def polling_loop():
    async with aiohttp.ClientSession() as session:
        while True:
            try:
+                await asyncio.sleep(5)
                headers = {
                    "Accept": "application/vnd.github.v3+json",
                    "Authorization": f"Bearer {token}"
@ -86,10 +87,8 @@ async def polling_loop():
                    elif response.status != 304:
                        print(f"Failed to fetch notifications. Status code: {response.status}")

-                await asyncio.sleep(5)
            except Exception as e:
                logging.error(f"Exception during processing of a notification: {e}")
-                await asyncio.sleep(5)

 if __name__ == '__main__':
    asyncio.run(polling_loop())
--- a/pr_agent/settings/.secrets_template.toml
+++ b/pr_agent/settings/.secrets_template.toml
@ -1,5 +1,5 @@
 # QUICKSTART:
-# Copy this file to .secrets in the same folder.
+# Copy this file to .secrets.toml in the same folder.
 # The minimum workable settings - set openai.key to your API key.
 # Set github.deployment_type to "user" and github.user_token to your GitHub personal access token.
 # This will allow you to run the CLI scripts in the scripts/ folder and the github_polling server.
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@ -5,7 +5,7 @@ publish_review=true
 verbosity_level=0  # 0,1,2

 [pr_reviewer]
-require_minimal_and_focused_review=true
+require_focused_review=true
 require_tests_review=true
 require_security_review=true
 extended_code_suggestions=false
--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@ -30,10 +30,10 @@ You must use the following JSON schema to format your answer:
      "description": "yes\\no question: does this PR have relevant tests ?"
    },
 {%- endif %}
-{%- if require_minimal_and_focused %}
-    "Minimal and focused": {
+{%- if require_focused %}
+    "Focused PR": {
      "type": "string",
-      "description": "is this PR as minimal and focused as possible, with all code changes centered around a single coherent theme, described in the PR description and title ?" Make sure to explain your answer"
+      "description": "Is this a focused PR, in the sense that it has a clear and coherent title and description, and all PR code diff changes are properly derived from the title and description? Explain your response."
    }
  },
 {%- endif %}
@ -106,8 +106,8 @@ Example output:
 {%- if require_tests %}
        "Relevant tests added": "No",
 {%- endif %}
-{%- if require_minimal_and_focused %}
-        "Minimal and focused": "yes\\no, because ..."
+{%- if require_focused %}
+        "Focused PR": "yes\\no, because ..."
 {%- endif %}
    },
    "PR Feedback":
--- a/pr_agent/tools/pr_reviewer.py
+++ b/pr_agent/tools/pr_reviewer.py
@ -33,7 +33,7 @@ class PRReviewer:
            "diff": "",  # empty diff for initial calculation
            "require_tests": settings.pr_reviewer.require_tests_review,
            "require_security": settings.pr_reviewer.require_security_review,
-            "require_minimal_and_focused": settings.pr_reviewer.require_minimal_and_focused_review,
+            "require_focused": settings.pr_reviewer.require_focused_review,
            'extended_code_suggestions': settings.pr_reviewer.extended_code_suggestions,
            'num_code_suggestions': settings.pr_reviewer.num_code_suggestions,
        }
--- a/tests/unit/test_convert_to_markdown.py
+++ b/tests/unit/test_convert_to_markdown.py
@ -1,6 +1,6 @@
 # Generated by CodiumAI
 from pr_agent.algo.utils import convert_to_markdown
-
+import pytest
 """
 Code Analysis

@ -50,7 +50,7 @@ class TestConvertToMarkdown:
            'Type of PR': 'Test type',
            'Relevant tests added': 'no',
            'Unrelated changes': 'n/a',  # won't be included in the output
-            'Minimal and focused': 'Yes',
+            'Focused PR': 'Yes',
            'General PR suggestions': 'general suggestion...',
            'Code suggestions': [
                {
@ -74,12 +74,11 @@ class TestConvertToMarkdown:
 - 🔍 **Description and title:** Test description
 - 📌 **Type of PR:** Test type
 - 🧪 **Relevant tests added:** no
- ✨ **Minimal and focused:** Yes
+- ✨ **Focused PR:** Yes
 - 💡 **General PR suggestions:** general suggestion...

 - 🤖 **Code suggestions:**

- **suggestion 1:**
  - **Code example:**
    - **Before:**
        ```
@ -90,7 +89,6 @@ class TestConvertToMarkdown:
        Code after
        ```

- **suggestion 2:**
  - **Code example:**
    - **Before:**
        ```
@ -116,7 +114,7 @@ class TestConvertToMarkdown:
            'Type of PR': {},
            'Relevant tests added': {},
            'Unrelated changes': {},
-            'Minimal and focused': {},
+            'Focused PR': {},
            'General PR suggestions': {},
            'Code suggestions': {}
        }
--- a/tests/unit/test_parse_code_suggestion.py
+++ b/tests/unit/test_parse_code_suggestion.py
@ -47,7 +47,7 @@ class TestParseCodeSuggestion:
            "Suggestion number": "one",
            "Description": "This is a suggestion"
        }
-        expected_output = "- **suggestion one:**\n  - **Description:** This is a suggestion\n\n"
+        expected_output = "   **Description:** This is a suggestion\n\n"
        assert parse_code_suggestion(input_data) == expected_output

    # Tests that function returns correct output when 'before' or 'after' key has a non-string value
@ -70,7 +70,7 @@ class TestParseCodeSuggestion:
            'before': 'Before 1',
            'after': 'After 1'
        }
-        expected_output = "- **suggestion 1:**\n  - **suggestion:** Suggestion 1\n  - **description:** Description 1\n  - **before:** Before 1\n  - **after:** After 1\n\n"  # noqa: E501
+        expected_output = "   **suggestion:** Suggestion 1\n   **description:** Description 1\n   **before:** Before 1\n   **after:** After 1\n\n"
        assert parse_code_suggestion(code_suggestions) == expected_output

    # Tests that function returns correct output when input dictionary has 'code example' key
@ -84,5 +84,5 @@ class TestParseCodeSuggestion:
                'after': 'After 2'
            }
        }
-        expected_output = "- **suggestion 2:**\n  - **suggestion:** Suggestion 2\n  - **description:** Description 2\n  - **code example:**\n    - **before:**\n        ```\n        Before 2\n        ```\n    - **after:**\n        ```\n        After 2\n        ```\n\n"  # noqa: E501
+        expected_output = "   **suggestion:** Suggestion 2\n   **description:** Description 2\n  - **code example:**\n    - **before:**\n        ```\n        Before 2\n        ```\n    - **after:**\n        ```\n        After 2\n        ```\n\n"
        assert parse_code_suggestion(code_suggestions) == expected_output