From c76aabc71eea58acab1412806ebb6a1c4420f3ec Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sat, 17 Aug 2024 09:15:05 +0300 Subject: [PATCH 01/18] Add callback functionality to litellm_ai_handler for enhanced logging and metadata capture --- pr_agent/agent/pr_agent.py | 2 +- .../algo/ai_handlers/litellm_ai_handler.py | 29 +++++++++++++++++++ pr_agent/settings/configuration.toml | 1 + 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/pr_agent/agent/pr_agent.py b/pr_agent/agent/pr_agent.py index d0ac46ca..8bf6cff7 100644 --- a/pr_agent/agent/pr_agent.py +++ b/pr_agent/agent/pr_agent.py @@ -79,7 +79,7 @@ class PRAgent: if action not in command2class: get_logger().debug(f"Unknown command: {action}") return False - with get_logger().contextualize(command=action): + with get_logger().contextualize(command=action, pr_url=pr_url): get_logger().info("PR-Agent request handler started", analytics=True) if action == "reflect_and_review": get_settings().pr_reviewer.ask_and_reflect = True diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py index c8b620fe..1aa93b36 100644 --- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py +++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py @@ -89,6 +89,31 @@ class LiteLLMAIHandler(BaseAiHandler): response_log['main_pr_language'] = 'unknown' return response_log + def add_callbacks(selfs, kwargs): + pr_metadata = [] + + def capture_logs(message): + # Parsing the log message and context + record = message.record + log_entry = {} + if record.get('extra', {}).get('command', None) is not None: + log_entry.update({"command": record['extra']["command"]}) + if record.get('extra', {}).get('pr_url', None) is not None: + log_entry.update({"pr_url": record['extra']["pr_url"]}) + + # Append the log entry to the captured_logs list + pr_metadata.append(log_entry) + + # Adding the custom sink to Loguru + handler_id = get_logger().add(capture_logs) + get_logger().debug("Capturing logs for litellm callbacks") + get_logger().remove(handler_id) + + # Adding the captured logs to the kwargs + kwargs["metadata"] = pr_metadata + + return kwargs + @property def deployment_id(self): """ @@ -133,6 +158,10 @@ class LiteLLMAIHandler(BaseAiHandler): "force_timeout": get_settings().config.ai_timeout, "api_base": self.api_base, } + + if get_settings().litellm.get("enable_callbacks", False): + kwargs = self.add_callbacks(kwargs) + seed = get_settings().config.get("seed", -1) if temperature > 0 and seed >= 0: raise ValueError(f"Seed ({seed}) is not supported with temperature ({temperature}) > 0") diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 67273c46..79d30f1d 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -264,6 +264,7 @@ pr_commands = [ [litellm] # use_client = false # drop_params = false +enable_callbacks = false [pr_similar_issue] skip_comments = false From aa87bc60f6fa448674bd4cd3d4d32a10663fc245 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sat, 17 Aug 2024 09:20:30 +0300 Subject: [PATCH 02/18] Rename 'add_callbacks' to 'add_litellm_callbacks' for clarity in litellm_ai_handler --- pr_agent/algo/ai_handlers/litellm_ai_handler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py index 1aa93b36..b4e3d085 100644 --- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py +++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py @@ -89,7 +89,7 @@ class LiteLLMAIHandler(BaseAiHandler): response_log['main_pr_language'] = 'unknown' return response_log - def add_callbacks(selfs, kwargs): + def add_litellm_callbacks(selfs, kwargs) -> dict: pr_metadata = [] def capture_logs(message): @@ -160,7 +160,7 @@ class LiteLLMAIHandler(BaseAiHandler): } if get_settings().litellm.get("enable_callbacks", False): - kwargs = self.add_callbacks(kwargs) + kwargs = self.add_litellm_callbacks(kwargs) seed = get_settings().config.get("seed", -1) if temperature > 0 and seed >= 0: From 8aa76a0ac57eec6751e1b99573e25c0c320ed818 Mon Sep 17 00:00:00 2001 From: MarkRx Date: Mon, 19 Aug 2024 15:45:47 -0400 Subject: [PATCH 03/18] Add and document abilty to use LiteLLM Logging Observability tools --- .../usage-guide/additional_configurations.md | 21 +++++++++ .../algo/ai_handlers/litellm_ai_handler.py | 45 ++++++++++++++++--- pr_agent/settings/configuration.toml | 3 ++ 3 files changed, 64 insertions(+), 5 deletions(-) diff --git a/docs/docs/usage-guide/additional_configurations.md b/docs/docs/usage-guide/additional_configurations.md index 121d77b6..a9f323da 100644 --- a/docs/docs/usage-guide/additional_configurations.md +++ b/docs/docs/usage-guide/additional_configurations.md @@ -91,3 +91,24 @@ user=""" """ ``` Note that the new prompt will need to generate an output compatible with the relevant [post-process function](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/tools/pr_description.py#L137). + +## Integrating with Logging Observability Platforms + +Various logging observability tools can be used out-of-the box when using the default LiteLLM AI Handler. Simply configure the LiteLLM callback settings in `configuration.toml` and set environment variables according to the LiteLLM [documentation](https://docs.litellm.ai/docs/). + +For example, to use [LangSmith](https://www.langchain.com/langsmith) you can add the following to your `configuration.toml` file: +``` +[litellm] +... +success_callback = ["langsmith"] +failure_callback = ["langsmith"] +service_callback = [] +``` + +Then set the following environment variables: + +``` +LANGSMITH_API_KEY= +LANGSMITH_PROJECT= +LANGSMITH_BASE_URL= +``` \ No newline at end of file diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py index b4e3d085..f577bead 100644 --- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py +++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py @@ -1,10 +1,10 @@ import os import requests -import boto3 import litellm import openai from litellm import acompletion from tenacity import retry, retry_if_exception_type, stop_after_attempt + from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler from pr_agent.config_loader import get_settings from pr_agent.log import get_logger @@ -44,6 +44,12 @@ class LiteLLMAIHandler(BaseAiHandler): litellm.use_client = True if get_settings().get("LITELLM.DROP_PARAMS", None): litellm.drop_params = get_settings().litellm.drop_params + if get_settings().get("LITELLM.SUCCESS_CALLBACK", None): + litellm.success_callback = get_settings().litellm.success_callback + if get_settings().get("LITELLM.FAILURE_CALLBACK", None): + litellm.failure_callback = get_settings().litellm.failure_callback + if get_settings().get("LITELLM.SERVICE_CALLBACK", None): + litellm.service_callback = get_settings().litellm.service_callback if get_settings().get("OPENAI.ORG", None): litellm.organization = get_settings().openai.org if get_settings().get("OPENAI.API_TYPE", None): @@ -90,27 +96,56 @@ class LiteLLMAIHandler(BaseAiHandler): return response_log def add_litellm_callbacks(selfs, kwargs) -> dict: - pr_metadata = [] + captured_extra = [] def capture_logs(message): # Parsing the log message and context record = message.record log_entry = {} - if record.get('extra', {}).get('command', None) is not None: + if record.get('extra', None).get('command', None) is not None: log_entry.update({"command": record['extra']["command"]}) if record.get('extra', {}).get('pr_url', None) is not None: log_entry.update({"pr_url": record['extra']["pr_url"]}) # Append the log entry to the captured_logs list - pr_metadata.append(log_entry) + captured_extra.append(log_entry) # Adding the custom sink to Loguru handler_id = get_logger().add(capture_logs) get_logger().debug("Capturing logs for litellm callbacks") get_logger().remove(handler_id) + context = captured_extra[0] if len(captured_extra) > 0 else None + + command = context.get("command", "unknown") + pr_url = context.get("pr_url", "unknown") + git_provider = get_settings().config.git_provider + + metadata = dict() + callbacks = litellm.success_callback + litellm.failure_callback + litellm.service_callback + if "langfuse" in callbacks: + metadata.update({ + "trace_name": command, + "tags": [git_provider, command], + "trace_metadata": { + "command": command, + "pr_url": pr_url, + }, + }) + if "langsmith" in callbacks: + metadata.update({ + "run_name": command, + "tags": [git_provider, command], + "extra": { + "metadata": { + "command": command, + "pr_url": pr_url, + } + }, + }) + # Adding the captured logs to the kwargs - kwargs["metadata"] = pr_metadata + kwargs["metadata"] = metadata return kwargs diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 79d30f1d..6d34010a 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -265,6 +265,9 @@ pr_commands = [ # use_client = false # drop_params = false enable_callbacks = false +success_callback = [] +failure_callback = [] +service_callback = [] [pr_similar_issue] skip_comments = false From 660a60924e78b08ae7a62717eca0cc94376c7cff Mon Sep 17 00:00:00 2001 From: mrT23 Date: Tue, 20 Aug 2024 11:23:37 +0300 Subject: [PATCH 04/18] Add filename parameter and skip logic to extend_patch function in git_patch_processing.py --- pr_agent/algo/git_patch_processing.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py index de216f6a..30bdd8c9 100644 --- a/pr_agent/algo/git_patch_processing.py +++ b/pr_agent/algo/git_patch_processing.py @@ -7,7 +7,8 @@ from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo from pr_agent.log import get_logger -def extend_patch(original_file_str, patch_str, patch_extra_lines_before=0, patch_extra_lines_after=0) -> str: +def extend_patch(original_file_str, patch_str, patch_extra_lines_before=0, + patch_extra_lines_after=0, filename: str = "") -> str: if not patch_str or (patch_extra_lines_before == 0 and patch_extra_lines_after == 0) or not original_file_str: return patch_str @@ -17,6 +18,13 @@ def extend_patch(original_file_str, patch_str, patch_extra_lines_before=0, patch except UnicodeDecodeError: return "" + # skip patches + skip_types = get_settings().config.skip_types #[".md",".txt"] + if skip_types: + if any([filename.endswith(skip_type) for skip_type in skip_types]): + return patch_str + + # dynamic context settings allow_dynamic_context = get_settings().config.allow_dynamic_context max_extra_lines_before_dynamic_context = get_settings().config.max_extra_lines_before_dynamic_context patch_extra_lines_before_dynamic = patch_extra_lines_before From df573674266ac7c27c88e4ba92f5568dc15d6f03 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Tue, 20 Aug 2024 11:24:52 +0300 Subject: [PATCH 05/18] Update configuration.toml to modify fallback models and add skip types for patch extension logic --- pr_agent/settings/configuration.toml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 80638e2c..0ca90c35 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -2,7 +2,7 @@ # models model="gpt-4-turbo-2024-04-09" model_turbo="gpt-4o-2024-08-06" -fallback_models=["gpt-4-0125-preview"] +fallback_models=["gpt-4o-2024-05-13"] # CLI git_provider="github" publish_output=true @@ -19,7 +19,8 @@ max_description_tokens = 500 max_commits_tokens = 500 max_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities. custom_model_max_tokens=-1 # for models not in the default list -# +# patch extension logic +skip_types =[".md",".txt"] allow_dynamic_context=false max_extra_lines_before_dynamic_context = 10 # will try to include up to 10 extra lines before the hunk in the patch, until we reach an enclosing function or class patch_extra_lines_before = 3 # Number of extra lines (+3 default ones) to include before each hunk in the patch From b7eb6be5a0c588167d06b85b8e03f6abf918331e Mon Sep 17 00:00:00 2001 From: mrT23 Date: Tue, 20 Aug 2024 11:27:35 +0300 Subject: [PATCH 06/18] Update PR code suggestions and reviewer prompts for clarity and consistency --- pr_agent/algo/pr_processing.py | 2 +- .../settings/pr_code_suggestions_prompts.toml | 52 ++++++++++--------- pr_agent/settings/pr_reviewer_prompts.toml | 29 ++++++----- 3 files changed, 45 insertions(+), 38 deletions(-) diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index a467a2d1..d8708ddc 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -191,7 +191,7 @@ def pr_generate_extended_diff(pr_languages: list, # extend each patch with extra lines of context extended_patch = extend_patch(original_file_content_str, patch, - patch_extra_lines_before, patch_extra_lines_after) + patch_extra_lines_before, patch_extra_lines_after, file.filename) if not extended_patch: get_logger().warning(f"Failed to extend patch for file: {file.filename}") continue diff --git a/pr_agent/settings/pr_code_suggestions_prompts.toml b/pr_agent/settings/pr_code_suggestions_prompts.toml index 9b57891d..4bd5f7fb 100644 --- a/pr_agent/settings/pr_code_suggestions_prompts.toml +++ b/pr_agent/settings/pr_code_suggestions_prompts.toml @@ -1,6 +1,6 @@ [pr_code_suggestions_prompt] -system="""You are PR-Reviewer, a language model that specializes in suggesting ways to improve for a Pull Request (PR) code. -Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR diff. +system="""You are PR-Reviewer, a language model that specializes in suggesting improvements to a Pull Request (PR) code. +Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR code diff (lines starting with '+'). The format we will use to present the PR code diff: @@ -9,13 +9,15 @@ The format we will use to present the PR code diff: @@ ... @@ def func1(): __new hunk__ -12 code line1 that remained unchanged in the PR +11 unchanged code line0 in the PR +12 unchanged code line1 in the PR 13 +new code line2 added in the PR -14 code line3 that remained unchanged in the PR +14 unchanged code line3 in the PR __old hunk__ - code line1 that remained unchanged in the PR --old code line2 that was removed in the PR - code line3 that remained unchanged in the PR + unchanged code line0 + unchanged code line1 +-old code line2 removed in the PR + unchanged code line3 @@ ... @@ def func2(): __new hunk__ @@ -27,15 +29,15 @@ __old hunk__ ## file: 'src/file2.py' ... ====== -- In this format, we separated each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed. If no new code was added in a specific hunk, '__new hunk__' section will not be presented. If no code was removed, '__old hunk__' section will not be presented. -- We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference. + +- In this format, we separate each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed. If no new code was added in a specific hunk, '__new hunk__' section will not be presented. If no code was removed, '__old hunk__' section will not be presented. +- We also added line numbers for the '__new hunk__' code, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and should only used for reference. - Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \ -Suggestions should always focus on ways to improve the new code lines introduced in the PR, meaning lines in the '__new hunk__' sections that begin with a '+' symbol (after the line numbers). The '__old hunk__' sections code is for context and reference only. Specific instructions for generating code suggestions: - Provide in total up to {{ num_code_suggestions }} code suggestions. The suggestions should be diverse and insightful. -- The suggestions should focus on improving the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers). +- The suggestions should focus on improving only the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers). The '__old hunk__' sections code is for context and reference only. - Prioritize suggestions that address possible issues, major problems, and bugs in the PR code. - Don't suggest to add docstring, type hints, or comments, or to remove unused imports. - Suggestions should not repeat code already present in the '__new hunk__' sections. @@ -97,7 +99,7 @@ code_suggestions: Each YAML output MUST be after a newline, indented, with block scalar indicator ('|'). """ -user="""PR Info: +user="""--PR Info-- Title: '{{title}}' @@ -114,8 +116,8 @@ Response (should be a valid YAML, and nothing else): [pr_code_suggestions_prompt_claude] -system="""You are PR-Reviewer, a language model that specializes in suggesting ways to improve for a Pull Request (PR) code. -Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR diff. +system="""You are PR-Reviewer, a language model that specializes in suggesting improvements to a Pull Request (PR) code. +Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR code diff (lines starting with '+'). The format we will use to present the PR code diff: @@ -124,13 +126,15 @@ The format we will use to present the PR code diff: @@ ... @@ def func1(): __new hunk__ -12 code line1 that remained unchanged in the PR +11 unchanged code line0 in the PR +12 unchanged code line1 in the PR 13 +new code line2 added in the PR -14 code line3 that remained unchanged in the PR +14 unchanged code line3 in the PR __old hunk__ - code line1 that remained unchanged in the PR --old code line2 that was removed in the PR - code line3 that remained unchanged in the PR + unchanged code line0 + unchanged code line1 +-old code line2 removed in the PR + unchanged code line3 @@ ... @@ def func2(): __new hunk__ @@ -142,15 +146,15 @@ __old hunk__ ## file: 'src/file2.py' ... ====== -- In this format, we separated each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed. If no new code was added in a specific hunk, '__new hunk__' section will not be presented. If no code was removed, '__old hunk__' section will not be presented. -- We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference. + +- In this format, we separate each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed. If no new code was added in a specific hunk, '__new hunk__' section will not be presented. If no code was removed, '__old hunk__' section will not be presented. +- We also added line numbers for the '__new hunk__' code, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and should only used for reference. - Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \ -Suggestions should always focus on ways to improve the new code lines introduced in the PR, meaning lines in the '__new hunk__' sections that begin with a '+' symbol (after the line numbers). The '__old hunk__' sections code is for context and reference only. Specific instructions for generating code suggestions: - Provide in total up to {{ num_code_suggestions }} code suggestions. The suggestions should be diverse and insightful. -- The suggestions should focus on improving the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers). +- The suggestions should focus on improving only the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers). - Prioritize suggestions that address possible issues, major problems, and bugs in the PR code. - Don't suggest to add docstring, type hints, or comments, or to remove unused imports. - Provide the exact line numbers range (inclusive) for each suggestion. Use the line numbers from the '__new hunk__' sections. @@ -173,7 +177,7 @@ The output must be a YAML object equivalent to type $PRCodeSuggestions, accordin class CodeSuggestion(BaseModel): relevant_file: str = Field(description="The full file path of the relevant file") language: str = Field(description="the programming language of the relevant file") - suggestion_content: str = Field(description="an actionable suggestion for meaningfully improving the new code introduced in the PR. Don't present here actual code snippets, just the suggestion. Be short and concise ") + suggestion_content: str = Field(description="an actionable suggestion for meaningfully improving the new code introduced in the PR. Don't present here actual code snippets, just the suggestion. Be short and concise") existing_code: str = Field(description="a short code snippet, demonstrating the relevant code lines from a '__new hunk__' section. It must be without line numbers. Quote only full code lines, not partial ones. Use abbreviations ("...") of full lines if needed") improved_code: str = Field(description="a new code snippet, that can be used to replace the relevant 'existing_code' lines in '__new hunk__' code after applying the suggestion") one_sentence_summary: str = Field(description="a short summary of the suggestion action, in a single sentence. Focus on the 'what'. Be general, and avoid method or variable names.") diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index b3cbfce3..6a4e84ef 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -5,7 +5,7 @@ Your task is to provide constructive and concise feedback for the PR, and also p {%- else %} Your task is to provide constructive and concise feedback for the PR. {%- endif %} -The review should focus on new code added in the PR diff (lines starting with '+') +The review should focus on new code added in the PR code diff (lines starting with '+') The format we will use to present the PR code diff: @@ -14,13 +14,15 @@ The format we will use to present the PR code diff: @@ ... @@ def func1(): __new hunk__ -12 code line1 that remained unchanged in the PR +11 unchanged code line0 in the PR +12 unchanged code line1 in the PR 13 +new code line2 added in the PR -14 code line3 that remained unchanged in the PR +14 unchanged code line3 in the PR __old hunk__ - code line1 that remained unchanged in the PR --old code line2 that was removed in the PR - code line3 that remained unchanged in the PR + unchanged code line0 + unchanged code line1 +-old code line2 removed in the PR + unchanged code line3 @@ ... @@ def func2(): __new hunk__ @@ -32,10 +34,11 @@ __old hunk__ ## file: 'src/file2.py' ... ====== + - In this format, we separated each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed. If no new code was added in a specific hunk, '__new hunk__' section will not be presented. If no code was removed, '__old hunk__' section will not be presented. -- We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference. +- We also added line numbers for the '__new hunk__' code, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and should only used for reference. - Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \ -The review should focus on new code added in the PR diff (lines starting with '+') + The review should address new code added in the PR code diff (lines starting with '+') - When quoting variables or names from the code, use backticks (`) instead of single quote ('). {%- if num_code_suggestions > 0 %} @@ -46,7 +49,7 @@ Code suggestions guidelines: - Focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningful code improvements, like performance, vulnerability, modularity, and best practices. - Avoid making suggestions that have already been implemented in the PR code. For example, if you want to add logs, or change a variable to const, or anything else, make sure it isn't already in the PR code. - Don't suggest to add docstring, type hints, or comments. -- Suggestions should focus on the new code added in the PR diff (lines starting with '+') +- Suggestions should address the new code added in the PR diff (lines starting with '+') {%- endif %} {%- if extra_instructions %} @@ -98,7 +101,7 @@ class Review(BaseModel): {%- endif %} key_issues_to_review: List[KeyIssuesComponentLink] = Field("A list of bugs, issue or major performance concerns introduced in this PR, which the PR reviewer should further investigate") {%- if require_security_review %} - security_concerns: str = Field(description="does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible") + security_concerns: str = Field(description="Does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' (without explaining why) if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible") {%- endif %} {%- if require_can_be_split_review %} can_be_split: List[SubPR] = Field(min_items=0, max_items=3, description="Can this PR, which contains {{ num_pr_files }} changed files in total, be divided into smaller sub-PRs with distinct tasks that can be reviewed and merged independently, regardless of the order ? Make sure that the sub-PRs are indeed independent, with no code dependencies between them, and that each sub-PR represent a meaningful independent task. Output an empty list if the PR code does not need to be split.") @@ -180,7 +183,7 @@ code_feedback: Answer should be a valid YAML, and nothing else. Each YAML output MUST be after a newline, with proper indent, and block scalar indicator ('|') """ -user="""PR Info: +user="""--PR Info-- Title: '{{title}}' @@ -188,7 +191,7 @@ Branch: '{{branch}}' {%- if description %} -Description: +PR Description: ====== {{ description|trim }} ====== @@ -209,7 +212,7 @@ User answers: {%- endif %} -The PR Diff: +The PR code diff: ====== {{ diff|trim }} ====== From 2d5b0601689ee3a1bab125134e9cbfe0edf7cd18 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Tue, 20 Aug 2024 11:33:56 +0300 Subject: [PATCH 07/18] patch_extension_skip_types --- pr_agent/algo/git_patch_processing.py | 6 +++--- pr_agent/settings/configuration.toml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py index 30bdd8c9..83348d31 100644 --- a/pr_agent/algo/git_patch_processing.py +++ b/pr_agent/algo/git_patch_processing.py @@ -19,9 +19,9 @@ def extend_patch(original_file_str, patch_str, patch_extra_lines_before=0, return "" # skip patches - skip_types = get_settings().config.skip_types #[".md",".txt"] - if skip_types: - if any([filename.endswith(skip_type) for skip_type in skip_types]): + patch_extension_skip_types = get_settings().config.skip_types #[".md",".txt"] + if patch_extension_skip_types: + if any([filename.endswith(skip_type) for skip_type in patch_extension_skip_types]): return patch_str # dynamic context settings diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 0ca90c35..7d609863 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -20,7 +20,7 @@ max_commits_tokens = 500 max_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities. custom_model_max_tokens=-1 # for models not in the default list # patch extension logic -skip_types =[".md",".txt"] +patch_extension_skip_types =[".md",".txt"] allow_dynamic_context=false max_extra_lines_before_dynamic_context = 10 # will try to include up to 10 extra lines before the hunk in the patch, until we reach an enclosing function or class patch_extra_lines_before = 3 # Number of extra lines (+3 default ones) to include before each hunk in the patch From d467f5a7fde3defee95d1ed25915126f58d16f4b Mon Sep 17 00:00:00 2001 From: mrT23 Date: Tue, 20 Aug 2024 11:37:27 +0300 Subject: [PATCH 08/18] patch_extension_skip_types --- pr_agent/algo/git_patch_processing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py index 83348d31..f9e0f73e 100644 --- a/pr_agent/algo/git_patch_processing.py +++ b/pr_agent/algo/git_patch_processing.py @@ -19,8 +19,8 @@ def extend_patch(original_file_str, patch_str, patch_extra_lines_before=0, return "" # skip patches - patch_extension_skip_types = get_settings().config.skip_types #[".md",".txt"] - if patch_extension_skip_types: + patch_extension_skip_types = get_settings().config.patch_extension_skip_types #[".md",".txt"] + if patch_extension_skip_types and filename: if any([filename.endswith(skip_type) for skip_type in patch_extension_skip_types]): return patch_str From 2591a5d6c185431d45a38a560a25e2cda7fa49d4 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Tue, 20 Aug 2024 12:11:34 +0300 Subject: [PATCH 09/18] patch_extension_skip_types Add validation for latest_comment and cast suggestions_score_threshold to int --- pr_agent/servers/github_polling.py | 2 ++ pr_agent/tools/pr_code_suggestions.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pr_agent/servers/github_polling.py b/pr_agent/servers/github_polling.py index 627390c0..52c00552 100644 --- a/pr_agent/servers/github_polling.py +++ b/pr_agent/servers/github_polling.py @@ -80,6 +80,8 @@ async def polling_loop(): if 'subject' in notification and notification['subject']['type'] == 'PullRequest': pr_url = notification['subject']['url'] latest_comment = notification['subject']['latest_comment_url'] + if not latest_comment or not isinstance(latest_comment, str): + continue async with session.get(latest_comment, headers=headers) as comment_response: if comment_response.status == 200: comment = await comment_response.json() diff --git a/pr_agent/tools/pr_code_suggestions.py b/pr_agent/tools/pr_code_suggestions.py index ca7b4d88..170bb00e 100644 --- a/pr_agent/tools/pr_code_suggestions.py +++ b/pr_agent/tools/pr_code_suggestions.py @@ -522,7 +522,7 @@ class PRCodeSuggestions: data = {"code_suggestions": []} for j, predictions in enumerate(prediction_list): # each call adds an element to the list if "code_suggestions" in predictions: - score_threshold = max(1, get_settings().pr_code_suggestions.suggestions_score_threshold) + score_threshold = max(1, int(get_settings().pr_code_suggestions.suggestions_score_threshold)) for i, prediction in enumerate(predictions["code_suggestions"]): try: if get_settings().pr_code_suggestions.self_reflect_on_suggestions: From 4d9d6f74773b54dbed2c801cf00d9a9a7c5d52f3 Mon Sep 17 00:00:00 2001 From: Paolo Mainardi Date: Wed, 21 Aug 2024 11:36:33 +0200 Subject: [PATCH 10/18] fix: remove CI_MERGE_REQUEST_STATE as is not a Gitlab variable --- docs/docs/installation/gitlab.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/installation/gitlab.md b/docs/docs/installation/gitlab.md index 8529d7da..6df2abdb 100644 --- a/docs/docs/installation/gitlab.md +++ b/docs/docs/installation/gitlab.md @@ -23,7 +23,7 @@ pr_agent_job: - python -m pr_agent.cli --pr_url="$MR_URL" review - python -m pr_agent.cli --pr_url="$MR_URL" improve rules: - - if: '$CI_PIPELINE_SOURCE == "merge_request_event" && $CI_MERGE_REQUEST_STATE == "opened"' + - if: '$CI_PIPELINE_SOURCE == "merge_request_event"' ``` This script will run PR-Agent on every new merge request. You can modify the `rules` section to run PR-Agent on different events. You can also modify the `script` section to run different PR-Agent commands, or with different parameters by exporting different environment variables. From 771d0b8c60b77565d05f5a572698381d5a5fc462 Mon Sep 17 00:00:00 2001 From: Tal Date: Thu, 22 Aug 2024 07:51:09 +0300 Subject: [PATCH 11/18] Update github.md --- docs/docs/installation/github.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/installation/github.md b/docs/docs/installation/github.md index 70c691d3..83cd0981 100644 --- a/docs/docs/installation/github.md +++ b/docs/docs/installation/github.md @@ -38,7 +38,7 @@ if you want to pin your action to a specific release (v0.23 for example) for sta ... ``` -For enhanced security, you can also specify the Docker image by its digest: +For enhanced security, you can also specify the Docker image by its [digest](https://hub.docker.com/repository/docker/codiumai/pr-agent/tags): ```yaml ... steps: From cd526a233c0077ea4c6398576d99de2de4e1998e Mon Sep 17 00:00:00 2001 From: Tal Date: Thu, 22 Aug 2024 11:26:38 +0300 Subject: [PATCH 12/18] Update additional_configurations.md --- docs/docs/usage-guide/additional_configurations.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/docs/usage-guide/additional_configurations.md b/docs/docs/usage-guide/additional_configurations.md index a9f323da..2adfa360 100644 --- a/docs/docs/usage-guide/additional_configurations.md +++ b/docs/docs/usage-guide/additional_configurations.md @@ -99,7 +99,7 @@ Various logging observability tools can be used out-of-the box when using the de For example, to use [LangSmith](https://www.langchain.com/langsmith) you can add the following to your `configuration.toml` file: ``` [litellm] -... +enable_callbacks = true success_callback = ["langsmith"] failure_callback = ["langsmith"] service_callback = [] @@ -111,4 +111,4 @@ Then set the following environment variables: LANGSMITH_API_KEY= LANGSMITH_PROJECT= LANGSMITH_BASE_URL= -``` \ No newline at end of file +``` From ffaf5d5271aa65462f65397ebcaecbc27b6e1303 Mon Sep 17 00:00:00 2001 From: Paolo Mainardi Date: Thu, 22 Aug 2024 15:43:43 +0200 Subject: [PATCH 13/18] feat: Handle the gitlab MR draft status closes #1160 --- pr_agent/servers/gitlab_webhook.py | 12 ++++++++++++ pr_agent/settings/configuration.toml | 1 + 2 files changed, 13 insertions(+) diff --git a/pr_agent/servers/gitlab_webhook.py b/pr_agent/servers/gitlab_webhook.py index 4a814e9f..df8f9baf 100644 --- a/pr_agent/servers/gitlab_webhook.py +++ b/pr_agent/servers/gitlab_webhook.py @@ -124,14 +124,26 @@ async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request): return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"})) log_context["sender"] = sender + should_skip_draft = get_settings().get("GITLAB.SKIP_DRAFT_MR", False) if data.get('object_kind') == 'merge_request' and data['object_attributes'].get('action') in ['open', 'reopen']: url = data['object_attributes'].get('url') + draft = data['object_attributes'].get('draft') get_logger().info(f"New merge request: {url}") + + if draft and should_skip_draft: + get_logger().info(f"Skipping draft MR: {url}") + return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"})) + await _perform_commands_gitlab("pr_commands", PRAgent(), url, log_context) elif data.get('object_kind') == 'note' and data.get('event_type') == 'note': # comment on MR if 'merge_request' in data: mr = data['merge_request'] url = mr.get('url') + draft = mr.get('draft') + if draft and should_skip_draft: + get_logger().info(f"Skipping draft MR: {url}") + return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"})) + get_logger().info(f"A comment has been added to a merge request: {url}") body = data.get('object_attributes', {}).get('note') if data.get('object_attributes', {}).get('type') == 'DiffNote' and '/ask' in body: # /ask_line diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index b128aca0..89d8dd11 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -230,6 +230,7 @@ push_commands = [ "/describe", "/review --pr_reviewer.num_code_suggestions=0", ] +skip_draft_mr = false [bitbucket_app] pr_commands = [ From 61837c69a3224ab3658ad6f601e5d088cb00f1fc Mon Sep 17 00:00:00 2001 From: Paolo Mainardi Date: Thu, 22 Aug 2024 21:13:54 +0200 Subject: [PATCH 14/18] Update gitlab_webhook.py --- pr_agent/servers/gitlab_webhook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pr_agent/servers/gitlab_webhook.py b/pr_agent/servers/gitlab_webhook.py index df8f9baf..5bed83f3 100644 --- a/pr_agent/servers/gitlab_webhook.py +++ b/pr_agent/servers/gitlab_webhook.py @@ -124,7 +124,7 @@ async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request): return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"})) log_context["sender"] = sender - should_skip_draft = get_settings().get("GITLAB.SKIP_DRAFT_MR", False) + should_skip_draft = get_settings().get("GITLAB.SKIP_DRAFT_MR", True) if data.get('object_kind') == 'merge_request' and data['object_attributes'].get('action') in ['open', 'reopen']: url = data['object_attributes'].get('url') draft = data['object_attributes'].get('draft') From 8793f8d9b02e69a5191ef62c2fdb528314dd651f Mon Sep 17 00:00:00 2001 From: Paolo Mainardi Date: Thu, 22 Aug 2024 21:14:49 +0200 Subject: [PATCH 15/18] Update gitlab_webhook.py --- pr_agent/servers/gitlab_webhook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pr_agent/servers/gitlab_webhook.py b/pr_agent/servers/gitlab_webhook.py index 5bed83f3..df8f9baf 100644 --- a/pr_agent/servers/gitlab_webhook.py +++ b/pr_agent/servers/gitlab_webhook.py @@ -124,7 +124,7 @@ async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request): return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"})) log_context["sender"] = sender - should_skip_draft = get_settings().get("GITLAB.SKIP_DRAFT_MR", True) + should_skip_draft = get_settings().get("GITLAB.SKIP_DRAFT_MR", False) if data.get('object_kind') == 'merge_request' and data['object_attributes'].get('action') in ['open', 'reopen']: url = data['object_attributes'].get('url') draft = data['object_attributes'].get('draft') From 3778cc2745e50990f306fb6ef5721686b47db7b4 Mon Sep 17 00:00:00 2001 From: Paolo Mainardi Date: Thu, 22 Aug 2024 21:59:01 +0200 Subject: [PATCH 16/18] feat: skip draft by default --- pr_agent/servers/gitlab_webhook.py | 5 ++--- pr_agent/settings/configuration.toml | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/pr_agent/servers/gitlab_webhook.py b/pr_agent/servers/gitlab_webhook.py index df8f9baf..143e45e4 100644 --- a/pr_agent/servers/gitlab_webhook.py +++ b/pr_agent/servers/gitlab_webhook.py @@ -124,13 +124,12 @@ async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request): return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"})) log_context["sender"] = sender - should_skip_draft = get_settings().get("GITLAB.SKIP_DRAFT_MR", False) if data.get('object_kind') == 'merge_request' and data['object_attributes'].get('action') in ['open', 'reopen']: url = data['object_attributes'].get('url') draft = data['object_attributes'].get('draft') get_logger().info(f"New merge request: {url}") - if draft and should_skip_draft: + if draft: get_logger().info(f"Skipping draft MR: {url}") return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"})) @@ -140,7 +139,7 @@ async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request): mr = data['merge_request'] url = mr.get('url') draft = mr.get('draft') - if draft and should_skip_draft: + if draft: get_logger().info(f"Skipping draft MR: {url}") return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"})) diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 89d8dd11..b128aca0 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -230,7 +230,6 @@ push_commands = [ "/describe", "/review --pr_reviewer.num_code_suggestions=0", ] -skip_draft_mr = false [bitbucket_app] pr_commands = [ From 8fb9b8ed3e81ea3c24819d7392385042c506c650 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Fri, 23 Aug 2024 11:22:55 +0300 Subject: [PATCH 17/18] Update PR code suggestions prompts to avoid repeating changes already present in the PR --- pr_agent/settings/pr_code_suggestions_prompts.toml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pr_agent/settings/pr_code_suggestions_prompts.toml b/pr_agent/settings/pr_code_suggestions_prompts.toml index 4bd5f7fb..bcbc6618 100644 --- a/pr_agent/settings/pr_code_suggestions_prompts.toml +++ b/pr_agent/settings/pr_code_suggestions_prompts.toml @@ -36,9 +36,9 @@ __old hunk__ Specific instructions for generating code suggestions: -- Provide in total up to {{ num_code_suggestions }} code suggestions. The suggestions should be diverse and insightful. -- The suggestions should focus on improving only the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers). The '__old hunk__' sections code is for context and reference only. -- Prioritize suggestions that address possible issues, major problems, and bugs in the PR code. +- Provide up to {{ num_code_suggestions }} code suggestions. +- The suggestions should be diverse and insightful. They should focus on improving only the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers). +- Prioritize suggestions that address possible issues, major problems, and bugs in the PR code. Don't repeat changes already present in the PR. If there are no relevant suggestions for the PR, return an empty list. - Don't suggest to add docstring, type hints, or comments, or to remove unused imports. - Suggestions should not repeat code already present in the '__new hunk__' sections. - Provide the exact line numbers range (inclusive) for each suggestion. Use the line numbers from the '__new hunk__' sections. @@ -153,9 +153,9 @@ __old hunk__ Specific instructions for generating code suggestions: -- Provide in total up to {{ num_code_suggestions }} code suggestions. The suggestions should be diverse and insightful. -- The suggestions should focus on improving only the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers). -- Prioritize suggestions that address possible issues, major problems, and bugs in the PR code. +- Provide up to {{ num_code_suggestions }} code suggestions. +- The suggestions should be diverse and insightful. They should focus on improving only the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers). +- Prioritize suggestions that address possible issues, major problems, and bugs in the PR code. Don't repeat changes already present in the PR. If there are no relevant suggestions for the PR, return an empty list. - Don't suggest to add docstring, type hints, or comments, or to remove unused imports. - Provide the exact line numbers range (inclusive) for each suggestion. Use the line numbers from the '__new hunk__' sections. - Every time you cite variables or names from the code, use backticks ('`'). For example: 'ensure that `variable_name` is ...' From 415f44d763692561a8768145d077f7b0cb49b8a9 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Fri, 23 Aug 2024 11:27:50 +0300 Subject: [PATCH 18/18] type --- pr_agent/settings/pr_code_suggestions_prompts.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pr_agent/settings/pr_code_suggestions_prompts.toml b/pr_agent/settings/pr_code_suggestions_prompts.toml index bcbc6618..8cca3fe8 100644 --- a/pr_agent/settings/pr_code_suggestions_prompts.toml +++ b/pr_agent/settings/pr_code_suggestions_prompts.toml @@ -37,7 +37,7 @@ __old hunk__ Specific instructions for generating code suggestions: - Provide up to {{ num_code_suggestions }} code suggestions. -- The suggestions should be diverse and insightful. They should focus on improving only the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers). +- The suggestions should be diverse and insightful. They should focus on improving only the new code introduced in the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers). - Prioritize suggestions that address possible issues, major problems, and bugs in the PR code. Don't repeat changes already present in the PR. If there are no relevant suggestions for the PR, return an empty list. - Don't suggest to add docstring, type hints, or comments, or to remove unused imports. - Suggestions should not repeat code already present in the '__new hunk__' sections. @@ -154,7 +154,7 @@ __old hunk__ Specific instructions for generating code suggestions: - Provide up to {{ num_code_suggestions }} code suggestions. -- The suggestions should be diverse and insightful. They should focus on improving only the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers). +- The suggestions should be diverse and insightful. They should focus on improving only the new code introduced in the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers). - Prioritize suggestions that address possible issues, major problems, and bugs in the PR code. Don't repeat changes already present in the PR. If there are no relevant suggestions for the PR, return an empty list. - Don't suggest to add docstring, type hints, or comments, or to remove unused imports. - Provide the exact line numbers range (inclusive) for each suggestion. Use the line numbers from the '__new hunk__' sections.