Merge branch 'main' into feature/gha-outputs-1

2025-07-21 04:50:39 +08:00 · 2024-04-10 23:27:44 +09:00
parent aef1c6ecde f0c5aec0e4
commit 9e4ffd824c
48 changed files with 441 additions and 429 deletions
--- a/pr_agent/algo/init.py
+++ b/pr_agent/algo/init.py
@ -11,6 +11,8 @@ MAX_TOKENS = {
    'gpt-4-1106-preview': 128000, # 128K, but may be limited by config.max_model_tokens
    'gpt-4-0125-preview': 128000,  # 128K, but may be limited by config.max_model_tokens
    'gpt-4-turbo-preview': 128000,  # 128K, but may be limited by config.max_model_tokens
+    'gpt-4-turbo-2024-04-09': 128000,  # 128K, but may be limited by config.max_model_tokens
+    'gpt-4-turbo': 128000,  # 128K, but may be limited by config.max_model_tokens
    'claude-instant-1': 100000,
    'claude-2': 100000,
    'command-nightly': 4096,
--- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py
+++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
@ -61,6 +61,9 @@ class LiteLLMAIHandler(BaseAiHandler):
        if get_settings().get("HUGGINGFACE.API_BASE", None) and 'huggingface' in get_settings().config.model:
            litellm.api_base = get_settings().huggingface.api_base
            self.api_base = get_settings().huggingface.api_base
+        if get_settings().get("OLLAMA.API_BASE", None) :
+            litellm.api_base = get_settings().ollama.api_base
+            self.api_base = get_settings().ollama.api_base
        if get_settings().get("HUGGINGFACE.REPITITION_PENALTY", None):
            self.repetition_penalty = float(get_settings().huggingface.repetition_penalty)
        if get_settings().get("VERTEXAI.VERTEX_PROJECT", None):
@ -150,4 +153,4 @@ class LiteLLMAIHandler(BaseAiHandler):
            if get_settings().config.verbosity_level >= 2:
                get_logger().info(f"\nAI response:\n{resp}")

-        return resp, finish_reason
+        return resp, finish_reason
--- a/pr_agent/algo/token_handler.py
+++ b/pr_agent/algo/token_handler.py
@ -1,12 +1,25 @@
 from jinja2 import Environment, StrictUndefined
 from tiktoken import encoding_for_model, get_encoding
-
 from pr_agent.config_loader import get_settings
+from threading import Lock


-def get_token_encoder():
-    return encoding_for_model(get_settings().config.model) if "gpt" in get_settings().config.model else get_encoding(
-        "cl100k_base")
+class TokenEncoder:
+    _encoder_instance = None
+    _model = None
+    _lock = Lock()  # Create a lock object
+
+    @classmethod
+    def get_token_encoder(cls):
+        model = get_settings().config.model
+        if cls._encoder_instance is None or model != cls._model:  # Check without acquiring the lock for performance
+            with cls._lock:  # Lock acquisition to ensure thread safety
+                if cls._encoder_instance is None or model != cls._model:
+                    cls._model = model
+                    cls._encoder_instance = encoding_for_model(cls._model) if "gpt" in cls._model else get_encoding(
+                        "cl100k_base")
+        return cls._encoder_instance
+

 class TokenHandler:
    """
@ -31,7 +44,7 @@ class TokenHandler:
        - system: The system string.
        - user: The user string.
        """
-        self.encoder = get_token_encoder()
+        self.encoder = TokenEncoder.get_token_encoder()
        if pr is not None:
            self.prompt_tokens = self._get_system_user_tokens(pr, self.encoder, vars, system, user)

--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@ -13,7 +13,7 @@ import yaml
 from starlette_context import context

 from pr_agent.algo import MAX_TOKENS
-from pr_agent.algo.token_handler import get_token_encoder
+from pr_agent.algo.token_handler import TokenEncoder
 from pr_agent.config_loader import get_settings, global_settings
 from pr_agent.algo.types import FilePatchInfo
 from pr_agent.log import get_logger
@ -567,7 +567,7 @@ def clip_tokens(text: str, max_tokens: int, add_three_dots=True) -> str:
        return text

    try:
-        encoder = get_token_encoder()
+        encoder = TokenEncoder.get_token_encoder()
        num_input_tokens = len(encoder.encode(text))
        if num_input_tokens <= max_tokens:
            return text
@ -576,7 +576,7 @@ def clip_tokens(text: str, max_tokens: int, add_three_dots=True) -> str:
        num_output_chars = int(chars_per_token * max_tokens)
        clipped_text = text[:num_output_chars]
        if add_three_dots:
-            clipped_text += "...(truncated)"
+            clipped_text += "\n...(truncated)"
        return clipped_text
    except Exception as e:
        get_logger().warning(f"Failed to clip tokens: {e}")
--- a/pr_agent/git_providers/codecommit_provider.py
+++ b/pr_agent/git_providers/codecommit_provider.py
@ -10,7 +10,7 @@ from ..algo.utils import load_large_diff
 from .git_provider import GitProvider
 from ..config_loader import get_settings
 from ..log import get_logger
-
+from pr_agent.algo.language_handler import is_valid_file

 class PullRequestCCMimic:
    """
--- a/pr_agent/git_providers/github_provider.py
+++ b/pr_agent/git_providers/github_provider.py
@ -745,22 +745,4 @@ class GithubProvider(GitProvider):
            return False

    def calc_pr_statistics(self, pull_request_data: dict):
-        try:
-            out = {}
-            from datetime import datetime
-            created_at = pull_request_data['created_at']
-            closed_at = pull_request_data['closed_at']
-            closed_at_datetime = datetime.strptime(closed_at, "%Y-%m-%dT%H:%M:%SZ")
-            created_at_datetime = datetime.strptime(created_at, "%Y-%m-%dT%H:%M:%SZ")
-            difference = closed_at_datetime - created_at_datetime
-            out['hours'] = difference.total_seconds() / 3600
-            out['commits'] = pull_request_data['commits']
-            out['comments'] = pull_request_data['comments']
-            out['review_comments'] = pull_request_data['review_comments']
-            out['changed_files'] = pull_request_data['changed_files']
-            out['additions'] = pull_request_data['additions']
-            out['deletions'] = pull_request_data['deletions']
-        except Exception as e:
-            get_logger().exception(f"Failed to calculate PR statistics, error: {e}")
-            return {}
-        return out
+        return {}
--- a/pr_agent/servers/github_action_runner.py
+++ b/pr_agent/servers/github_action_runner.py
@ -103,6 +103,8 @@ async def run_action():
                    await PRReviewer(pr_url).run()
                if auto_improve is None or is_true(auto_improve):
                    await PRCodeSuggestions(pr_url).run()
+        else:
+            get_logger().info(f"Skipping action: {action}")

    # Handle issue comment event
    elif GITHUB_EVENT_NAME == "issue_comment" or GITHUB_EVENT_NAME == "pull_request_review_comment":
--- a/pr_agent/servers/help.py
+++ b/pr_agent/servers/help.py
@ -128,7 +128,7 @@ Be specific, clear, and concise in the instructions. With extra instructions, yo
 Examples for extra instructions:
 ```
 [pr_description] 
-extra_instructions="""
+extra_instructions="""\
 - The PR title should be in the format: '<PR type>: <title>'
 - The title should be short and concise (up to 10 words)
 - ...
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@ -52,7 +52,6 @@ maximal_review_effort=5

 [pr_description] # /describe #
 publish_labels=true
-publish_description_as_comment=false
 add_original_user_description=true
 keep_original_user_title=true
 use_bullet_points=true
@ -61,6 +60,9 @@ enable_pr_type=true
 final_update_message = true
 enable_help_text=false
 enable_help_comment=true
+# describe as comment
+publish_description_as_comment=false
+publish_description_as_comment_persistent=true
 ## changes walkthrough section
 enable_semantic_files_types=true
 collapsible_file_list='adaptive' # true, false, 'adaptive'
@ -110,6 +112,11 @@ file = ""              # in case there are several components with the same name
 class_name = ""        # in case there are several methods with the same name in the same file, you can specify the relevant class name
 enable_help_text=true

+[pr_improve_component] # /improve_component #
+num_code_suggestions=4
+extra_instructions = ""
+file = ""              # in case there are several components with the same name, you can specify the relevant file
+class_name = ""

 [checks] # /checks (pro feature) #
 enable_auto_checks_feedback=true
--- a/pr_agent/tools/pr_code_suggestions.py
+++ b/pr_agent/tools/pr_code_suggestions.py
@ -76,7 +76,7 @@ class PRCodeSuggestions:
            relevant_configs = {'pr_code_suggestions': dict(get_settings().pr_code_suggestions),
                                'config': dict(get_settings().config)}
            get_logger().debug("Relevant configs", artifacts=relevant_configs)
-            if get_settings().config.publish_output:
+            if get_settings().config.publish_output and get_settings().config.publish_output_progress:
                if self.git_provider.is_supported("gfm_markdown"):
                    self.progress_response = self.git_provider.publish_comment(self.progress)
                else:
@ -196,24 +196,31 @@ class PRCodeSuggestions:
        suggestion_list = []
        one_sentence_summary_list = []
        for i, suggestion in enumerate(data['code_suggestions']):
-            if get_settings().pr_code_suggestions.summarize:
-                if not suggestion or 'one_sentence_summary' not in suggestion or 'label' not in suggestion or 'relevant_file' not in suggestion:
-                    get_logger().debug(f"Skipping suggestion {i + 1}, because it is invalid: {suggestion}")
-                    continue
-
-                if suggestion['one_sentence_summary'] in one_sentence_summary_list:
-                    get_logger().debug(f"Skipping suggestion {i + 1}, because it is a duplicate: {suggestion}")
-                    continue
-
-            if ('existing_code' in suggestion) and ('improved_code' in suggestion) and (
-                    suggestion['existing_code'] != suggestion['improved_code']):
-                suggestion = self._truncate_if_needed(suggestion)
+            try:
                if get_settings().pr_code_suggestions.summarize:
-                    one_sentence_summary_list.append(suggestion['one_sentence_summary'])
-                suggestion_list.append(suggestion)
-            else:
-                get_logger().debug(
-                    f"Skipping suggestion {i + 1}, because existing code is equal to improved code {suggestion['existing_code']}")
+                    if not suggestion or 'one_sentence_summary' not in suggestion or 'label' not in suggestion or 'relevant_file' not in suggestion:
+                        get_logger().debug(f"Skipping suggestion {i + 1}, because it is invalid: {suggestion}")
+                        continue
+
+                    if suggestion['one_sentence_summary'] in one_sentence_summary_list:
+                        get_logger().debug(f"Skipping suggestion {i + 1}, because it is a duplicate: {suggestion}")
+                        continue
+
+                if 'const' in suggestion['suggestion_content'] and 'instead' in suggestion['suggestion_content'] and 'let' in suggestion['suggestion_content']:
+                    get_logger().debug(f"Skipping suggestion {i + 1}, because it uses 'const instead let': {suggestion}")
+                    continue
+
+                if ('existing_code' in suggestion) and ('improved_code' in suggestion) and (
+                        suggestion['existing_code'] != suggestion['improved_code']):
+                    suggestion = self._truncate_if_needed(suggestion)
+                    if get_settings().pr_code_suggestions.summarize:
+                        one_sentence_summary_list.append(suggestion['one_sentence_summary'])
+                    suggestion_list.append(suggestion)
+                else:
+                    get_logger().debug(
+                        f"Skipping suggestion {i + 1}, because existing code is equal to improved code {suggestion['existing_code']}")
+            except Exception as e:
+                get_logger().error(f"Error processing suggestion {i + 1}: {suggestion}, error: {e}")
        data['code_suggestions'] = suggestion_list

        return data
--- a/pr_agent/tools/pr_description.py
+++ b/pr_agent/tools/pr_description.py
@ -132,7 +132,14 @@ class PRDescription:
                # publish description
                if get_settings().pr_description.publish_description_as_comment:
                    full_markdown_description = f"## Title\n\n{pr_title}\n\n___\n{pr_body}"
-                    self.git_provider.publish_comment(full_markdown_description)
+                    if get_settings().pr_description.publish_description_as_comment_persistent:
+                        self.git_provider.publish_persistent_comment(full_markdown_description,
+                                                                     initial_header="## Title",
+                                                                     update_header=True,
+                                                                     name="describe",
+                                                                     final_update_message=False, )
+                    else:
+                        self.git_provider.publish_comment(full_markdown_description)
                else:
                    self.git_provider.publish_description(pr_title, pr_body)

--- a/pr_agent/tools/pr_help_message.py
+++ b/pr_agent/tools/pr_help_message.py
@ -27,12 +27,13 @@ class PRHelpMessage:
            tool_names.append(f"[DESCRIBE]({base_path}/describe/)")
            tool_names.append(f"[REVIEW]({base_path}/review/)")
            tool_names.append(f"[IMPROVE]({base_path}/improve/)")
-            tool_names.append(f"[ANALYZE]({base_path}/analyze/) 💎")
            tool_names.append(f"[UPDATE CHANGELOG]({base_path}/update_changelog/)")
-            tool_names.append(f"[ADD DOCUMENTATION]({base_path}/documentation/) 💎")
-            tool_names.append(f"[ASK]({base_path}/ask/)")
-            tool_names.append(f"[GENERATE CUSTOM LABELS]({base_path}/custom_labels/)")
+            tool_names.append(f"[ADD DOCS]({base_path}/documentation/) 💎")
            tool_names.append(f"[TEST]({base_path}/test/) 💎")
+            tool_names.append(f"[IMPROVE COMPONENT]({base_path}/improve_component/) 💎")
+            tool_names.append(f"[ANALYZE]({base_path}/analyze/) 💎")
+            tool_names.append(f"[ASK]({base_path}/ask/)")
+            tool_names.append(f"[GENERATE CUSTOM LABELS]({base_path}/custom_labels/) 💎")
            tool_names.append(f"[CI FEEDBACK]({base_path}/ci_feedback/) 💎")
            tool_names.append(f"[CUSTOM SUGGESTIONS]({base_path}/custom_suggestions/) 💎")
            tool_names.append(f"[SIMILAR ISSUE]({base_path}/similar_issues/)")
@ -40,27 +41,29 @@ class PRHelpMessage:
            descriptions = []
            descriptions.append("Generates PR description - title, type, summary, code walkthrough and labels")
            descriptions.append("Adjustable feedback about the PR, possible issues, security concerns, review effort and more")
-            descriptions.append("Code suggestions for improving the PR.")
-            descriptions.append("Identifies code components that changed in the PR, and enables to interactively generate tests, docs, and code suggestions for each component.")
-            descriptions.append("Automatically updates the changelog.")
-            descriptions.append("Generates documentation to methods/functions/classes that changed in the PR.")
-            descriptions.append("Answering free-text questions about the PR.")
+            descriptions.append("Code suggestions for improving the PR")
+            descriptions.append("Automatically updates the changelog")
+            descriptions.append("Generates documentation to methods/functions/classes that changed in the PR")
+            descriptions.append("Generates unit tests for a specific component, based on the PR code change")
+            descriptions.append("Code suggestions for a specific component that changed in the PR")
+            descriptions.append("Identifies code components that changed in the PR, and enables to interactively generate tests, docs, and code suggestions for each component")
+            descriptions.append("Answering free-text questions about the PR")
            descriptions.append("Generates custom labels for the PR, based on specific guidelines defined by the user")
-            descriptions.append("Generates unit tests for a specific component, based on the PR code change.")
-            descriptions.append("Generates feedback and analysis for a failed CI job.")
-            descriptions.append("Generates custom suggestions for improving the PR code, based on specific guidelines defined by the user.")
-            descriptions.append("Automatically retrieves and presents similar issues.")
+            descriptions.append("Generates feedback and analysis for a failed CI job")
+            descriptions.append("Generates custom suggestions for improving the PR code, based only on specific guidelines defined by the user")
+            descriptions.append("Automatically retrieves and presents similar issues")

            commands  =[]
            commands.append("`/describe`")
            commands.append("`/review`")
            commands.append("`/improve`")
-            commands.append("`/analyze`")
            commands.append("`/update_changelog`")
            commands.append("`/add_docs`")
+            commands.append("`/test`")
+            commands.append("`/improve_component`")
+            commands.append("`/analyze`")
            commands.append("`/ask`")
            commands.append("`/generate_labels`")
-            commands.append("`/test`")
            commands.append("`/checks`")
            commands.append("`/custom_suggestions`")
            commands.append("`/similar_issue`")
@ -69,9 +72,13 @@ class PRHelpMessage:
            checkbox_list.append(" - [ ] Run <!-- /describe -->")
            checkbox_list.append(" - [ ] Run <!-- /review -->")
            checkbox_list.append(" - [ ] Run <!-- /improve -->")
-            checkbox_list.append(" - [ ] Run <!-- /analyze -->")
            checkbox_list.append(" - [ ] Run <!-- /update_changelog -->")
            checkbox_list.append(" - [ ] Run <!-- /add_docs -->")
+            checkbox_list.append(" - [ ] Run <!-- /test -->")
+            checkbox_list.append(" - [ ] Run <!-- /improve_component -->")
+            checkbox_list.append(" - [ ] Run <!-- /analyze -->")
+            checkbox_list.append("[*]")
+            checkbox_list.append("[*]")
            checkbox_list.append("[*]")
            checkbox_list.append("[*]")
            checkbox_list.append("[*]")
@ -80,16 +87,16 @@ class PRHelpMessage:
            checkbox_list.append("[*]")

            if isinstance(self.git_provider, GithubProvider):
-                pr_comment += f"<table><tr align='center'><th align='center'>Tool</th><th align='center'>Description</th><th align='center'>Invoke Interactively :gem:</th></tr>"
+                pr_comment += f"<table><tr align='left'><th align='left'>Tool</th><th align='left'>Description</th><th align='left'>Trigger Interactively :gem:</th></tr>"
                for i in range(len(tool_names)):
-                    pr_comment += f"\n<tr><td align='center'>\n\n<strong>{tool_names[i]}</strong></td>\n<td>{descriptions[i]}</td>\n<td>\n\n{checkbox_list[i]}\n</td></tr>"
+                    pr_comment += f"\n<tr><td align='left'>\n\n<strong>{tool_names[i]}</strong></td>\n<td>{descriptions[i]}</td>\n<td>\n\n{checkbox_list[i]}\n</td></tr>"
                pr_comment += "</table>\n\n"
                pr_comment += f"""\n\n(1) Note that each tool be [triggered automatically](https://github.com/Codium-ai/pr-agent/blob/main/Usage.md#github-app-automatic-tools-for-pr-actions) when a new PR is opened, or called manually by [commenting on a PR](https://github.com/Codium-ai/pr-agent/blob/main/Usage.md#online-usage)."""
                pr_comment += f"""\n\n(2) Tools marked with [*] require additional parameters to be passed. For example, to invoke the `/ask` tool, you need to comment on a PR: `/ask "<question content>"`. See the relevant documentation for each tool for more details."""
            else:
-                pr_comment += f"<table><tr align='center'><th align='center'>Tool</th><th align='left'>Command</th><th align='left'>Description</th></tr>"
+                pr_comment += f"<table><tr align='left'><th align='left'>Tool</th><th align='left'>Command</th><th align='left'>Description</th></tr>"
                for i in range(len(tool_names)):
-                    pr_comment += f"\n<tr><td align='center'>\n\n<strong>{tool_names[i]}</strong></td><td>{commands[i]}</td><td>{descriptions[i]}</td></tr>"
+                    pr_comment += f"\n<tr><td align='left'>\n\n<strong>{tool_names[i]}</strong></td><td>{commands[i]}</td><td>{descriptions[i]}</td></tr>"
                pr_comment += "</table>\n\n"
                pr_comment += f"""\n\nNote that each tool be [invoked automatically](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/) when a new PR is opened, or called manually by [commenting on a PR](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#online-usage)."""
            if get_settings().config.publish_output:
--- a/pr_agent/tools/pr_reviewer.py
+++ b/pr_agent/tools/pr_reviewer.py
@ -358,6 +358,9 @@ class PRReviewer:
        return True

    def set_review_labels(self, data):
+        if not get_settings().config.publish_output:
+            return
+
        if (get_settings().pr_reviewer.enable_review_labels_security or
                get_settings().pr_reviewer.enable_review_labels_effort):
            try: