diff --git a/README.md b/README.md index 084bd670..364c722f 100644 --- a/README.md +++ b/README.md @@ -86,8 +86,8 @@ pip install -r requirements.txt 3. Copy the secrets template file and fill in your OpenAI key and your GitHub user token: ``` -cp pr_agent/settings/.secrets_template.toml pr_agent/settings/.secrets -# Edit .secrets file +cp pr_agent/settings/.secrets_template.toml pr_agent/settings/.secrets.toml +# Edit .secrets.toml file ``` 4. Run the appropriate Python scripts from the scripts folder: @@ -147,8 +147,8 @@ git clone https://github.com/Codium-ai/pr-agent.git - Copy your app's webhook secret to the webhook_secret field. ``` -cp pr_agent/settings/.secrets_template.toml pr_agent/settings/.secrets -# Edit .secrets file +cp pr_agent/settings/.secrets_template.toml pr_agent/settings/.secrets.toml +# Edit .secrets.toml file ``` 6. Build a Docker image for the app and optionally push it to a Docker repository. We'll use Dockerhub as an example: @@ -186,7 +186,7 @@ Here is a quick overview of the different sub-tools of PR Reviewer: - PR description and title - PR type classification - Is the PR covered by relevant tests - - Is the PR minimal and focused + - Is this a focused PR - Are there security concerns - PR Feedback - General PR suggestions @@ -202,7 +202,7 @@ This is how a typical output of the PR Reviewer looks like: - ๐Ÿ” **Description and title:** Yes - ๐Ÿ“Œ **Type of PR:** Enhancement - ๐Ÿงช **Relevant tests added:** No -- โœจ **Minimal and focused:** Yes, the PR is focused on adding two new handlers for language extension and token counting. +- โœจ **Focused PR:** Yes, the PR is focused on adding two new handlers for language extension and token counting. - ๐Ÿ”’ **Security concerns:** No, the PR does not introduce possible security concerns or issues. #### PR Feedback @@ -245,7 +245,7 @@ The different tools and sub-tools used by CodiumAI pr-agent are easily configura You can enable/disable the different PR Reviewer sub-sections with the following flags: ``` -require_minimal_and_focused_review=true +require_focused_review=true require_tests_review=true require_security_review=true ``` diff --git a/pics/logo-dark.png b/pics/logo-dark.png index 255871cc..6b78f666 100644 Binary files a/pics/logo-dark.png and b/pics/logo-dark.png differ diff --git a/pics/logo-light.png b/pics/logo-light.png index fec132c6..23cb48d2 100644 Binary files a/pics/logo-light.png and b/pics/logo-light.png differ diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index 9b7bb442..0853ce9c 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -2,7 +2,7 @@ from __future__ import annotations import difflib import logging -from typing import Any, Dict, Tuple, Union +from typing import Any, Tuple, Union from pr_agent.algo.git_patch_processing import extend_patch, handle_patch_deletions from pr_agent.algo.language_handler import sort_files_by_main_languages @@ -14,7 +14,8 @@ DELETED_FILES_ = "Deleted files:\n" MORE_MODIFIED_FILES_ = "More modified files:\n" -OUTPUT_BUFFER_TOKENS = 800 +OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD = 1000 +OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD = 600 PATCH_EXTRA_LINES = 3 @@ -32,11 +33,12 @@ def get_pr_diff(git_provider: Union[GithubProvider, Any], token_handler: TokenHa patches_extended, total_tokens = pr_generate_extended_diff(pr_languages, token_handler) # if we are under the limit, return the full diff - if total_tokens + OUTPUT_BUFFER_TOKENS < token_handler.limit: + if total_tokens + OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD < token_handler.limit: return "\n".join(patches_extended) # if we are over the limit, start pruning - patches_compressed, modified_file_names, deleted_file_names = pr_generate_compressed_diff(pr_languages, token_handler) + patches_compressed, modified_file_names, deleted_file_names = pr_generate_compressed_diff(pr_languages, + token_handler) final_diff = "\n".join(patches_compressed) if modified_file_names: modified_list_str = MORE_MODIFIED_FILES_ + "\n".join(modified_file_names) @@ -115,12 +117,12 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler) -> new_patch_tokens = token_handler.count_tokens(patch) # Hard Stop, no more tokens - if total_tokens > token_handler.limit - OUTPUT_BUFFER_TOKENS // 2: + if total_tokens > token_handler.limit - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD: logging.warning(f"File was fully skipped, no more tokens: {file.filename}.") continue # If the patch is too large, just show the file name - if total_tokens + new_patch_tokens > token_handler.limit - OUTPUT_BUFFER_TOKENS: + if total_tokens + new_patch_tokens > token_handler.limit - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD: # Current logic is to skip the patch if it's too large # TODO: Option for alternative logic to remove hunks from the patch to reduce the number of tokens # until we meet the requirements diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index 70994fd8..045144d2 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -12,7 +12,7 @@ def convert_to_markdown(output_data: dict) -> str: "Type of PR": "๐Ÿ“Œ", "Relevant tests added": "๐Ÿงช", "Unrelated changes": "โš ๏ธ", - "Minimal and focused": "โœจ", + "Focused PR": "โœจ", "Security concerns": "๐Ÿ”’", "General PR suggestions": "๐Ÿ’ก", "Code suggestions": "๐Ÿค–" diff --git a/pr_agent/servers/github_app_webhook.py b/pr_agent/servers/github_app.py similarity index 100% rename from pr_agent/servers/github_app_webhook.py rename to pr_agent/servers/github_app.py diff --git a/pr_agent/servers/github_polling.py b/pr_agent/servers/github_polling.py index 45d2942a..06293fd6 100644 --- a/pr_agent/servers/github_polling.py +++ b/pr_agent/servers/github_polling.py @@ -38,6 +38,7 @@ async def polling_loop(): async with aiohttp.ClientSession() as session: while True: try: + await asyncio.sleep(5) headers = { "Accept": "application/vnd.github.v3+json", "Authorization": f"Bearer {token}" @@ -86,10 +87,8 @@ async def polling_loop(): elif response.status != 304: print(f"Failed to fetch notifications. Status code: {response.status}") - await asyncio.sleep(5) except Exception as e: logging.error(f"Exception during processing of a notification: {e}") - await asyncio.sleep(5) if __name__ == '__main__': asyncio.run(polling_loop()) diff --git a/pr_agent/settings/.secrets_template.toml b/pr_agent/settings/.secrets_template.toml index 73bd7de2..7ce3d52f 100644 --- a/pr_agent/settings/.secrets_template.toml +++ b/pr_agent/settings/.secrets_template.toml @@ -1,5 +1,5 @@ # QUICKSTART: -# Copy this file to .secrets in the same folder. +# Copy this file to .secrets.toml in the same folder. # The minimum workable settings - set openai.key to your API key. # Set github.deployment_type to "user" and github.user_token to your GitHub personal access token. # This will allow you to run the CLI scripts in the scripts/ folder and the github_polling server. diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 9d5f1642..6bb6db97 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -5,7 +5,7 @@ publish_review=true verbosity_level=0 # 0,1,2 [pr_reviewer] -require_minimal_and_focused_review=true +require_focused_review=true require_tests_review=true require_security_review=true extended_code_suggestions=false diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index 3b3549f0..678c7520 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -30,10 +30,10 @@ You must use the following JSON schema to format your answer: "description": "yes\\no question: does this PR have relevant tests ?" }, {%- endif %} -{%- if require_minimal_and_focused %} - "Minimal and focused": { +{%- if require_focused %} + "Focused PR": { "type": "string", - "description": "is this PR as minimal and focused as possible, with all code changes centered around a single coherent theme, described in the PR description and title ?" Make sure to explain your answer" + "description": "Is this a focused PR, in the sense that it has a clear and coherent title and description, and all PR code diff changes are properly derived from the title and description? Explain your response." } }, {%- endif %} @@ -106,8 +106,8 @@ Example output: {%- if require_tests %} "Relevant tests added": "No", {%- endif %} -{%- if require_minimal_and_focused %} - "Minimal and focused": "yes\\no, because ..." +{%- if require_focused %} + "Focused PR": "yes\\no, because ..." {%- endif %} }, "PR Feedback": diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index b43f274d..f0a8c485 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -33,7 +33,7 @@ class PRReviewer: "diff": "", # empty diff for initial calculation "require_tests": settings.pr_reviewer.require_tests_review, "require_security": settings.pr_reviewer.require_security_review, - "require_minimal_and_focused": settings.pr_reviewer.require_minimal_and_focused_review, + "require_focused": settings.pr_reviewer.require_focused_review, 'extended_code_suggestions': settings.pr_reviewer.extended_code_suggestions, 'num_code_suggestions': settings.pr_reviewer.num_code_suggestions, } diff --git a/tests/unit/test_convert_to_markdown.py b/tests/unit/test_convert_to_markdown.py index 05c84a77..08a49f76 100644 --- a/tests/unit/test_convert_to_markdown.py +++ b/tests/unit/test_convert_to_markdown.py @@ -1,6 +1,6 @@ # Generated by CodiumAI from pr_agent.algo.utils import convert_to_markdown - +import pytest """ Code Analysis @@ -50,7 +50,7 @@ class TestConvertToMarkdown: 'Type of PR': 'Test type', 'Relevant tests added': 'no', 'Unrelated changes': 'n/a', # won't be included in the output - 'Minimal and focused': 'Yes', + 'Focused PR': 'Yes', 'General PR suggestions': 'general suggestion...', 'Code suggestions': [ { @@ -74,12 +74,11 @@ class TestConvertToMarkdown: - ๐Ÿ” **Description and title:** Test description - ๐Ÿ“Œ **Type of PR:** Test type - ๐Ÿงช **Relevant tests added:** no -- โœจ **Minimal and focused:** Yes +- โœจ **Focused PR:** Yes - ๐Ÿ’ก **General PR suggestions:** general suggestion... - ๐Ÿค– **Code suggestions:** -- **suggestion 1:** - **Code example:** - **Before:** ``` @@ -90,7 +89,6 @@ class TestConvertToMarkdown: Code after ``` -- **suggestion 2:** - **Code example:** - **Before:** ``` @@ -116,7 +114,7 @@ class TestConvertToMarkdown: 'Type of PR': {}, 'Relevant tests added': {}, 'Unrelated changes': {}, - 'Minimal and focused': {}, + 'Focused PR': {}, 'General PR suggestions': {}, 'Code suggestions': {} } diff --git a/tests/unit/test_parse_code_suggestion.py b/tests/unit/test_parse_code_suggestion.py index a0da856d..082fed77 100644 --- a/tests/unit/test_parse_code_suggestion.py +++ b/tests/unit/test_parse_code_suggestion.py @@ -47,7 +47,7 @@ class TestParseCodeSuggestion: "Suggestion number": "one", "Description": "This is a suggestion" } - expected_output = "- **suggestion one:**\n - **Description:** This is a suggestion\n\n" + expected_output = " **Description:** This is a suggestion\n\n" assert parse_code_suggestion(input_data) == expected_output # Tests that function returns correct output when 'before' or 'after' key has a non-string value @@ -70,7 +70,7 @@ class TestParseCodeSuggestion: 'before': 'Before 1', 'after': 'After 1' } - expected_output = "- **suggestion 1:**\n - **suggestion:** Suggestion 1\n - **description:** Description 1\n - **before:** Before 1\n - **after:** After 1\n\n" # noqa: E501 + expected_output = " **suggestion:** Suggestion 1\n **description:** Description 1\n **before:** Before 1\n **after:** After 1\n\n" assert parse_code_suggestion(code_suggestions) == expected_output # Tests that function returns correct output when input dictionary has 'code example' key @@ -84,5 +84,5 @@ class TestParseCodeSuggestion: 'after': 'After 2' } } - expected_output = "- **suggestion 2:**\n - **suggestion:** Suggestion 2\n - **description:** Description 2\n - **code example:**\n - **before:**\n ```\n Before 2\n ```\n - **after:**\n ```\n After 2\n ```\n\n" # noqa: E501 + expected_output = " **suggestion:** Suggestion 2\n **description:** Description 2\n - **code example:**\n - **before:**\n ```\n Before 2\n ```\n - **after:**\n ```\n After 2\n ```\n\n" assert parse_code_suggestion(code_suggestions) == expected_output