diff --git a/pr_agent/algo/language_handler.py b/pr_agent/algo/language_handler.py index 66e85025..b4c02bee 100644 --- a/pr_agent/algo/language_handler.py +++ b/pr_agent/algo/language_handler.py @@ -3,8 +3,7 @@ from typing import Dict from pr_agent.config_loader import get_settings -language_extension_map_org = get_settings().language_extension_map_org -language_extension_map = {k.lower(): v for k, v in language_extension_map_org.items()} + # Bad Extensions, source: https://github.com/EleutherAI/github-downloader/blob/345e7c4cbb9e0dc8a0615fd995a08bf9d73b3fe6/download_repo_text.py # noqa: E501 bad_extensions = get_settings().bad_extensions.default @@ -29,6 +28,8 @@ def sort_files_by_main_languages(languages: Dict, files: list): # languages_sorted = sorted(languages, key=lambda x: x[1], reverse=True) # get all extensions for the languages main_extensions = [] + language_extension_map_org = get_settings().language_extension_map_org + language_extension_map = {k.lower(): v for k, v in language_extension_map_org.items()} for language in languages_sorted_list: if language.lower() in language_extension_map: main_extensions.append(language_extension_map[language.lower()]) diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index 6063dece..4c1352f0 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -10,7 +10,7 @@ from github import RateLimitExceededException from pr_agent.algo.git_patch_processing import convert_to_hunks_with_lines_numbers, extend_patch, handle_patch_deletions from pr_agent.algo.language_handler import sort_files_by_main_languages from pr_agent.algo.file_filter import filter_ignored -from pr_agent.algo.token_handler import TokenHandler, get_token_encoder +from pr_agent.algo.token_handler import TokenHandler from pr_agent.algo.utils import get_max_tokens from pr_agent.config_loader import get_settings from pr_agent.git_providers.git_provider import FilePatchInfo, GitProvider, EDIT_TYPE @@ -326,35 +326,6 @@ def find_line_number_of_relevant_line_in_file(diff_files: List[FilePatchInfo], return position, absolute_position -def clip_tokens(text: str, max_tokens: int) -> str: - """ - Clip the number of tokens in a string to a maximum number of tokens. - - Args: - text (str): The string to clip. - max_tokens (int): The maximum number of tokens allowed in the string. - - Returns: - str: The clipped string. - """ - if not text: - return text - - try: - encoder = get_token_encoder() - num_input_tokens = len(encoder.encode(text)) - if num_input_tokens <= max_tokens: - return text - num_chars = len(text) - chars_per_token = num_chars / num_input_tokens - num_output_chars = int(chars_per_token * max_tokens) - clipped_text = text[:num_output_chars] - return clipped_text - except Exception as e: - get_logger().warning(f"Failed to clip tokens: {e}") - return text - - def get_pr_multi_diffs(git_provider: GitProvider, token_handler: TokenHandler, model: str, diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index d3377dee..73074098 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -11,6 +11,7 @@ import yaml from starlette_context import context from pr_agent.algo import MAX_TOKENS +from pr_agent.algo.token_handler import get_token_encoder from pr_agent.config_loader import get_settings, global_settings from pr_agent.log import get_logger @@ -338,12 +339,15 @@ def set_custom_labels(variables): labels_list = f" - {labels_list}" if labels_list else "" variables["custom_labels"] = labels_list return - final_labels = "" + #final_labels = "" + #for k, v in labels.items(): + # final_labels += f" - {k} ({v['description']})\n" + #variables["custom_labels"] = final_labels + #variables["custom_labels_examples"] = f" - {list(labels.keys())[0]}" + variables["custom_labels_class"] = "class Label(str, Enum):" for k, v in labels.items(): - final_labels += f" - {k} ({v['description']})\n" - variables["custom_labels"] = final_labels - variables["custom_labels_examples"] = f" - {list(labels.keys())[0]}" - + description = v['description'].strip('\n').replace('\n', '\\n') + variables["custom_labels_class"] += f"\n {k.lower().replace(' ', '_')} = '{k}' # {description}" def get_user_labels(current_labels: List[str] = None): """ @@ -375,3 +379,34 @@ def get_max_tokens(model): max_tokens_model = min(settings.config.max_model_tokens, max_tokens_model) # get_logger().debug(f"limiting max tokens to {max_tokens_model}") return max_tokens_model + + +def clip_tokens(text: str, max_tokens: int, add_three_dots=True) -> str: + """ + Clip the number of tokens in a string to a maximum number of tokens. + + Args: + text (str): The string to clip. + max_tokens (int): The maximum number of tokens allowed in the string. + add_three_dots (bool, optional): A boolean indicating whether to add three dots at the end of the clipped + Returns: + str: The clipped string. + """ + if not text: + return text + + try: + encoder = get_token_encoder() + num_input_tokens = len(encoder.encode(text)) + if num_input_tokens <= max_tokens: + return text + num_chars = len(text) + chars_per_token = num_chars / num_input_tokens + num_output_chars = int(chars_per_token * max_tokens) + clipped_text = text[:num_output_chars] + if add_three_dots: + clipped_text += "...(truncated)" + return clipped_text + except Exception as e: + get_logger().warning(f"Failed to clip tokens: {e}") + return text diff --git a/pr_agent/cli.py b/pr_agent/cli.py index 91d4889c..5a6a6640 100644 --- a/pr_agent/cli.py +++ b/pr_agent/cli.py @@ -23,18 +23,22 @@ For example: - cli.py --issue_url=... similar_issue Supported commands: --review / review_pr - Add a review that includes a summary of the PR and specific suggestions for improvement. +- review / review_pr - Add a review that includes a summary of the PR and specific suggestions for improvement. --ask / ask_question [question] - Ask a question about the PR. +- ask / ask_question [question] - Ask a question about the PR. --describe / describe_pr - Modify the PR title and description based on the PR's contents. +- describe / describe_pr - Modify the PR title and description based on the PR's contents. --improve / improve_code - Suggest improvements to the code in the PR as pull request comments ready to commit. +- improve / improve_code - Suggest improvements to the code in the PR as pull request comments ready to commit. Extended mode ('improve --extended') employs several calls, and provides a more thorough feedback --reflect - Ask the PR author questions about the PR. +- reflect - Ask the PR author questions about the PR. --update_changelog - Update the changelog based on the PR's contents. +- update_changelog - Update the changelog based on the PR's contents. + +- add_docs + +- generate_labels Configuration: diff --git a/pr_agent/git_providers/azuredevops_provider.py b/pr_agent/git_providers/azuredevops_provider.py index 6a404532..ca11b9d8 100644 --- a/pr_agent/git_providers/azuredevops_provider.py +++ b/pr_agent/git_providers/azuredevops_provider.py @@ -14,9 +14,8 @@ try: except ImportError: AZURE_DEVOPS_AVAILABLE = False -from ..algo.pr_processing import clip_tokens from ..config_loader import get_settings -from ..algo.utils import load_large_diff +from ..algo.utils import load_large_diff, clip_tokens from ..algo.language_handler import is_valid_file from .git_provider import EDIT_TYPE, FilePatchInfo diff --git a/pr_agent/git_providers/codecommit_provider.py b/pr_agent/git_providers/codecommit_provider.py index a4836849..399f0a94 100644 --- a/pr_agent/git_providers/codecommit_provider.py +++ b/pr_agent/git_providers/codecommit_provider.py @@ -6,9 +6,9 @@ from urllib.parse import urlparse from pr_agent.git_providers.codecommit_client import CodeCommitClient -from ..algo.language_handler import is_valid_file, language_extension_map from ..algo.utils import load_large_diff from .git_provider import EDIT_TYPE, FilePatchInfo, GitProvider +from ..config_loader import get_settings from ..log import get_logger @@ -269,6 +269,8 @@ class CodeCommitProvider(GitProvider): # where each dictionary item is a language name. # We build that language->extension dictionary here in main_extensions_flat. main_extensions_flat = {} + language_extension_map_org = get_settings().language_extension_map_org + language_extension_map = {k.lower(): v for k, v in language_extension_map_org.items()} for language, extensions in language_extension_map.items(): for ext in extensions: main_extensions_flat[ext] = language diff --git a/pr_agent/git_providers/git_provider.py b/pr_agent/git_providers/git_provider.py index 05122f9c..a341f43a 100644 --- a/pr_agent/git_providers/git_provider.py +++ b/pr_agent/git_providers/git_provider.py @@ -5,6 +5,7 @@ from dataclasses import dataclass from enum import Enum from typing import Optional +from pr_agent.config_loader import get_settings from pr_agent.log import get_logger @@ -62,7 +63,7 @@ class GitProvider(ABC): def get_pr_description(self, *, full: bool = True) -> str: from pr_agent.config_loader import get_settings - from pr_agent.algo.pr_processing import clip_tokens + from pr_agent.algo.utils import clip_tokens max_tokens_description = get_settings().get("CONFIG.MAX_DESCRIPTION_TOKENS", None) description = self.get_pr_description_full() if full else self.get_user_description() if max_tokens_description: @@ -173,26 +174,42 @@ def get_main_pr_language(languages, files) -> str: extension_list.append(file.filename.rsplit('.')[-1]) # get the most common extension - most_common_extension = max(set(extension_list), key=extension_list.count) + most_common_extension = '.' + max(set(extension_list), key=extension_list.count) + try: + language_extension_map_org = get_settings().language_extension_map_org + language_extension_map = {k.lower(): v for k, v in language_extension_map_org.items()} - # look for a match. TBD: add more languages, do this systematically - if most_common_extension == 'py' and top_language == 'python' or \ - most_common_extension == 'js' and top_language == 'javascript' or \ - most_common_extension == 'ts' and top_language == 'typescript' or \ - most_common_extension == 'go' and top_language == 'go' or \ - most_common_extension == 'java' and top_language == 'java' or \ - most_common_extension == 'c' and top_language == 'c' or \ - most_common_extension == 'cpp' and top_language == 'c++' or \ - most_common_extension == 'cs' and top_language == 'c#' or \ - most_common_extension == 'swift' and top_language == 'swift' or \ - most_common_extension == 'php' and top_language == 'php' or \ - most_common_extension == 'rb' and top_language == 'ruby' or \ - most_common_extension == 'rs' and top_language == 'rust' or \ - most_common_extension == 'scala' and top_language == 'scala' or \ - most_common_extension == 'kt' and top_language == 'kotlin' or \ - most_common_extension == 'pl' and top_language == 'perl' or \ - most_common_extension == top_language: - main_language_str = top_language + if top_language in language_extension_map and most_common_extension in language_extension_map[top_language]: + main_language_str = top_language + else: + for language, extensions in language_extension_map.items(): + if most_common_extension in extensions: + main_language_str = language + break + except Exception as e: + get_logger().exception(f"Failed to get main language: {e}") + pass + + ## old approach: + # most_common_extension = max(set(extension_list), key=extension_list.count) + # if most_common_extension == 'py' and top_language == 'python' or \ + # most_common_extension == 'js' and top_language == 'javascript' or \ + # most_common_extension == 'ts' and top_language == 'typescript' or \ + # most_common_extension == 'tsx' and top_language == 'typescript' or \ + # most_common_extension == 'go' and top_language == 'go' or \ + # most_common_extension == 'java' and top_language == 'java' or \ + # most_common_extension == 'c' and top_language == 'c' or \ + # most_common_extension == 'cpp' and top_language == 'c++' or \ + # most_common_extension == 'cs' and top_language == 'c#' or \ + # most_common_extension == 'swift' and top_language == 'swift' or \ + # most_common_extension == 'php' and top_language == 'php' or \ + # most_common_extension == 'rb' and top_language == 'ruby' or \ + # most_common_extension == 'rs' and top_language == 'rust' or \ + # most_common_extension == 'scala' and top_language == 'scala' or \ + # most_common_extension == 'kt' and top_language == 'kotlin' or \ + # most_common_extension == 'pl' and top_language == 'perl' or \ + # most_common_extension == top_language: + # main_language_str = top_language except Exception as e: get_logger().exception(e) diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 634b8694..46afbad6 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -8,8 +8,8 @@ from retry import retry from starlette_context import context from ..algo.language_handler import is_valid_file -from ..algo.pr_processing import clip_tokens, find_line_number_of_relevant_line_in_file -from ..algo.utils import load_large_diff +from ..algo.pr_processing import find_line_number_of_relevant_line_in_file +from ..algo.utils import load_large_diff, clip_tokens from ..config_loader import get_settings from ..log import get_logger from ..servers.utils import RateLimitExceeded diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index 078ca9dd..2eb00ce1 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -7,8 +7,8 @@ import gitlab from gitlab import GitlabGetError from ..algo.language_handler import is_valid_file -from ..algo.pr_processing import clip_tokens, find_line_number_of_relevant_line_in_file -from ..algo.utils import load_large_diff +from ..algo.pr_processing import find_line_number_of_relevant_line_in_file +from ..algo.utils import load_large_diff, clip_tokens from ..config_loader import get_settings from .git_provider import EDIT_TYPE, FilePatchInfo, GitProvider from ..log import get_logger diff --git a/pr_agent/settings/custom_labels.toml b/pr_agent/settings/custom_labels.toml index 43e14b0e..9c751d0e 100644 --- a/pr_agent/settings/custom_labels.toml +++ b/pr_agent/settings/custom_labels.toml @@ -3,16 +3,16 @@ enable_custom_labels=false ## template for custom labels #[custom_labels."Bug fix"] -#description = "Fixes a bug in the code" +#description = """Fixes a bug in the code""" #[custom_labels."Tests"] -#description = "Adds or modifies tests" +#description = """Adds or modifies tests""" #[custom_labels."Bug fix with tests"] -#description = "Fixes a bug in the code and adds or modifies tests" +#description = """Fixes a bug in the code and adds or modifies tests""" #[custom_labels."Refactoring"] -#description = "Code refactoring without changing functionality" +#description = """Code refactoring without changing functionality""" #[custom_labels."Enhancement"] -#description = "Adds new features or functionality" +#description = """Adds new features or functionality""" #[custom_labels."Documentation"] -#description = "Adds or modifies documentation" +#description = """Adds or modifies documentation""" #[custom_labels."Other"] -#description = "Other changes that do not fit in any of the above categories" \ No newline at end of file +#description = """Other changes that do not fit in any of the above categories""" \ No newline at end of file diff --git a/pr_agent/settings/pr_add_docs.toml b/pr_agent/settings/pr_add_docs.toml index 31b7195c..fbf4b475 100644 --- a/pr_agent/settings/pr_add_docs.toml +++ b/pr_agent/settings/pr_add_docs.toml @@ -1,6 +1,6 @@ [pr_add_docs_prompt] system="""You are a language model called PR-Code-Documentation Agent, that specializes in generating documentation for code. -Your task is to generate meaningfull {{ docs_for_language }} to a PR (the '+' lines). +Your task is to generate meaningfull {{ docs_for_language }} to a PR (lines starting with '+'). Example for a PR Diff input: ' @@ -103,7 +103,7 @@ Description: '{{description}}' {%- if language %} -Main language: {{language}} +Main PR language: '{{language}}' {%- endif %} diff --git a/pr_agent/settings/pr_code_suggestions_prompts.toml b/pr_agent/settings/pr_code_suggestions_prompts.toml index 42ec7441..4b752272 100644 --- a/pr_agent/settings/pr_code_suggestions_prompts.toml +++ b/pr_agent/settings/pr_code_suggestions_prompts.toml @@ -1,6 +1,6 @@ [pr_code_suggestions_prompt] -system="""You are a language model called PR-Code-Reviewer, that specializes in suggesting code improvements for Pull Request (PR). -Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR (the '+' lines in the diff). +system="""You are PR-Reviewer, a language model that specializes in suggesting code improvements for a Pull Request (PR). +Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR diff (lines starting with '+'). Example for a PR Diff input: ' @@ -120,7 +120,7 @@ Description: '{{description}}' {%- if language %} -Main language: {{language}} +Main PR language: '{{ language }}' {%- endif %} diff --git a/pr_agent/settings/pr_custom_labels.toml b/pr_agent/settings/pr_custom_labels.toml index 1dbb6f8d..ddcc8cb0 100644 --- a/pr_agent/settings/pr_custom_labels.toml +++ b/pr_agent/settings/pr_custom_labels.toml @@ -1,8 +1,10 @@ [pr_custom_labels_prompt] -system="""You are CodiumAI-PR-Reviewer, a language model designed to review git pull requests. -Your task is to label the type of the PR content. -- Make sure not to focus the new PR code (the '+' lines). -- If needed, each YAML output should be in block scalar format ('|-') +system="""You are PR-Reviewer, a language model designed to review a git Pull Request (PR). +Your task is to provide labels that describe the PR content. +{%- if enable_custom_labels %} +Thoroughly read the labels name and the provided description, and decide whether the label is relevant to the PR. +{%- endif %} + {%- if extra_instructions %} Extra instructions from the user: @@ -11,52 +13,56 @@ Extra instructions from the user: ' {% endif %} -You must use the following YAML schema to format your answer: -```yaml -PR Type: - type: array + +The output must be a YAML object equivalent to type $Labels, according to the following Pydantic definitions: +' {%- if enable_custom_labels %} - description: Labels that are applicable to the Pull Request. Don't output the description in the parentheses. If none of the labels is relevant to the PR, output an empty array. -{%- endif %} - items: - type: string - enum: -{%- if enable_custom_labels %} -{{ custom_labels }} + +{{ custom_labels_class }} + {%- else %} - - Bug fix - - Tests - - Refactoring - - Enhancement - - Documentation - - Other +class Label(str, Enum): + bug_fix = "Bug fix" + tests = "Tests" + refactoring = "Refactoring" + enhancement = "Enhancement" + documentation = "Documentation" + other = "Other" {%- endif %} +class Labels(BaseModel): + labels: List[Label] = Field(min_items=0, description="custom labels that describe the PR. Return the label value, not the name.") +' + + Example output: ```yaml -PR Type: -{%- if enable_custom_labels %} -{{ custom_labels_examples }} -{%- else %} - - Bug fix -{%- endif %} +labels: +- ... +- ... ``` -Make sure to output a valid YAML. Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields. +Answer should be a valid YAML, and nothing else. """ user="""PR Info: + Previous title: '{{title}}' -Previous description: '{{description}}' -Branch: '{{branch}}' + +Branch: '{{ branch }}' + +Description: '{{ description }}' + {%- if language %} -Main language: {{language}} +Main PR language: '{{ language }}' {%- endif %} {%- if commit_messages_str %} Commit messages: -{{commit_messages_str}} +' +{{ commit_messages_str }} +' {%- endif %} diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml index 761c36c1..9aefe0da 100644 --- a/pr_agent/settings/pr_description_prompts.toml +++ b/pr_agent/settings/pr_description_prompts.toml @@ -1,9 +1,9 @@ [pr_description_prompt] -system="""You are CodiumAI-PR-Reviewer, a language model designed to review git pull requests. -Your task is to provide full description of a Pull Request (PR) content. -- Make sure to focus on the new PR code (the '+' lines). -- Notice that the 'Previous title', 'Previous description' and 'Commit messages' sections may be partial, simplistic, non-informative or not up-to-date. Hence, compare them to the PR diff code, and use them only as a reference. -- Emphasize first the most important changes, and then the less important ones. +system="""You are PR-Reviewer, a language model designed to review a git Pull Request (PR). +Your task is to provide a full description for the PR content. +- Make sure to focus on the new PR code (lines starting with '+'). +- Keep in mind that the 'Previous title', 'Previous description' and 'Commit messages' sections may be partial, simplistic, non-informative or out of date. Hence, compare them to the PR diff code, and use them only as a reference. +- Prioritize the most significant PR changes first, followed by the minor ones. - If needed, each YAML output should be in block scalar format ('|-') {%- if extra_instructions %} @@ -13,81 +13,83 @@ Extra instructions from the user: ' {% endif %} -You must use the following YAML schema to format your answer: -```yaml -PR Title: - type: string - description: an informative title for the PR, describing its main theme -PR Type: - type: string - enum: - - Bug fix - - Tests - - Refactoring - - Enhancement - - Documentation - - Other + +The output must be a YAML object equivalent to type $PRDescription, according to the following Pydantic definitions: +' +class PRType(str, Enum): + bug_fix = "Bug fix" + tests = "Tests" + refactoring = "Refactoring" + enhancement = "Enhancement" + documentation = "Documentation" + other = "Other" + {%- if enable_custom_labels %} -PR Labels: - type: array - description: Labels that are applicable to the Pull Request. Don't output the description in the parentheses. If none of the labels is relevant to the PR, output an empty array. - items: - type: string - enum: -{{ custom_labels }} + +{{ custom_labels_class }} + {%- endif %} -PR Description: - type: string - description: an informative and concise description of the PR. - {%- if use_bullet_points %} Use bullet points. {% endif %} -PR Main Files Walkthrough: - type: array - maxItems: 10 - description: |- - a walkthrough of the PR changes. Review main files, and shortly describe the changes in each file (up to 10 most important files). - items: - filename: - type: string - description: the relevant file full path - changes in file: - type: string - description: minimal and concise description of the changes in the relevant file -``` + +class FileWalkthrough(BaseModel): + filename: str = Field(description="the relevant file full path") + changes_in_file: str = Field(description="minimal and concise description of the changes in the relevant file") + +Class PRDescription(BaseModel): + title: str = Field(description="an informative title for the PR, describing its main theme") + type: List[PRType] = Field(description="one or more types that describe the PR type. . Return the label value, not the name.") + description: str = Field(description="an informative and concise description of the PR. {%- if use_bullet_points %} Use bullet points. {% endif %}") +{%- if enable_custom_labels %} + labels: List[Label] = Field(min_items=0, description="custom labels that describe the PR. Return the label value, not the name.") +{%- endif %} + main_files_walkthrough: List[FileWalkthrough] = Field(max_items=10) +' Example output: ```yaml -PR Title: |- - ... -PR Type: +title: |- ... +type: +- ... +- ... {%- if enable_custom_labels %} -PR Labels: +labels: - ... - ... {%- endif %} -PR Description: |- +description: |- ... -PR Main Files Walkthrough: - - ... - - ... +main_files_walkthrough: +- ... +- ... ``` -Make sure to output a valid YAML. Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields. +Answer should be a valid YAML, and nothing else. Each YAML output MUST be after a newline, with proper indent, and block scalar indicator ('|-') """ user="""PR Info: + Previous title: '{{title}}' -Previous description: '{{description}}' + +{%- if description %} + +Previous description: +' +{{ description }} +' +{%- endif %} + Branch: '{{branch}}' {%- if language %} -Main language: {{language}} +Main PR language: '{{ language }}' {%- endif %} {%- if commit_messages_str %} Commit messages: -{{commit_messages_str}} +' +{{ commit_messages_str }} +' {%- endif %} @@ -95,6 +97,8 @@ The PR Git Diff: ``` {{diff}} ``` + + Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines. Response (should be a valid YAML, and nothing else): diff --git a/pr_agent/settings/pr_information_from_user_prompts.toml b/pr_agent/settings/pr_information_from_user_prompts.toml index 8d628f7a..ca4cbe3c 100644 --- a/pr_agent/settings/pr_information_from_user_prompts.toml +++ b/pr_agent/settings/pr_information_from_user_prompts.toml @@ -1,5 +1,5 @@ [pr_information_from_user_prompt] -system="""You are CodiumAI-PR-Reviewer, a language model designed to review git pull requests. +system="""You are PR-Reviewer, a language model designed to review a git Pull Request (PR). Given the PR Info and the PR Git Diff, generate 3 short questions about the PR code for the PR author. The goal of the questions is to help the language model understand the PR better, so the questions should be insightful, informative, non-trivial, and relevant to the PR. You should prefer asking yes\\no questions, or multiple choice questions. Also add at least one open-ended question, but make sure they are not too difficult, and can be answered in a sentence or two. @@ -16,15 +16,21 @@ Questions to better understand the PR: user="""PR Info: Title: '{{title}}' + Branch: '{{branch}}' + Description: '{{description}}' + {%- if language %} -Main language: {{language}} + +Main PR language: '{{ language }}' {%- endif %} {%- if commit_messages_str %} Commit messages: +' {{commit_messages_str}} +' {%- endif %} diff --git a/pr_agent/settings/pr_questions_prompts.toml b/pr_agent/settings/pr_questions_prompts.toml index e306684d..63569197 100644 --- a/pr_agent/settings/pr_questions_prompts.toml +++ b/pr_agent/settings/pr_questions_prompts.toml @@ -1,22 +1,29 @@ [pr_questions_prompt] -system="""You are CodiumAI-PR-Reviewer, a language model designed to review git pull requests. -Your task is to answer questions about the new PR code (the '+' lines), and provide feedback. +system="""You are PR-Reviewer, a language model designed to review a git Pull Request (PR). +Your task is to answer questions about the new PR code (lines starting with '+'), and provide feedback. Be informative, constructive, and give examples. Try to be as specific as possible. Don't avoid answering the questions. You must answer the questions, as best as you can, without adding unrelated content. Make sure not to repeat modifications already implemented in the new PR code (the '+' lines). """ user="""PR Info: + Title: '{{title}}' + Branch: '{{branch}}' + Description: '{{description}}' + {%- if language %} -Main language: {{language}} + +Main PR language: '{{ language }}' {%- endif %} {%- if commit_messages_str %} Commit messages: -{{commit_messages_str}} +' +{{ commit_messages_str }} +' {%- endif %} diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index b75c296a..b3e8f9b4 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -1,6 +1,7 @@ [pr_review_prompt] -system="""You are PR-Reviewer, a language model designed to review git pull requests. +system="""You are PR-Reviewer, a language model designed to review a git Pull Request (PR). Your task is to provide constructive and concise feedback for the PR, and also provide meaningful code suggestions. +The review should focus on new code added in the PR diff (lines starting with '+') Example PR Diff input: ' @@ -22,14 +23,14 @@ code line that already existed in the file.... ... ' -The review should focus on new code added in the PR (lines starting with '+'), and not on code that already existed in the file (lines starting with '-', or without prefix). - {%- if num_code_suggestions > 0 %} + +Code suggestions guidelines: - Provide up to {{ num_code_suggestions }} code suggestions. Try to provide diverse and insightful suggestions. - Focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningful code improvements, like performance, vulnerability, modularity, and best practices. - Avoid making suggestions that have already been implemented in the PR code. For example, if you want to add logs, or change a variable to const, or anything else, make sure it isn't already in the PR code. - Don't suggest to add docstring, type hints, or comments. -- Suggestions should focus on improving the new code added in the PR (lines starting with '+') +- Suggestions should focus on the new code added in the PR diff (lines starting with '+') {%- endif %} {%- if extra_instructions %} @@ -179,16 +180,29 @@ Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'desc """ user="""PR Info: + Title: '{{title}}' + Branch: '{{branch}}' -Description: '{{description}}' + +{%- if description %} + +Description: +' +{{description}} +' +{%- endif %} + {%- if language %} -Main language: {{language}} + +Main PR language: '{{ language }}' {%- endif %} {%- if commit_messages_str %} Commit messages: +' {{commit_messages_str}} +' {%- endif %} {%- if question_str %} @@ -208,7 +222,7 @@ The PR Git Diff: ``` {{diff}} ``` -Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions. Focus on the '+' lines. + Response (should be a valid YAML, and nothing else): ```yaml diff --git a/pr_agent/settings/pr_sort_code_suggestions_prompts.toml b/pr_agent/settings/pr_sort_code_suggestions_prompts.toml index 16b6e861..f4a3f5bf 100644 --- a/pr_agent/settings/pr_sort_code_suggestions_prompts.toml +++ b/pr_agent/settings/pr_sort_code_suggestions_prompts.toml @@ -2,10 +2,10 @@ system=""" """ -user="""You are given a list of code suggestions to improve a PR: - +user="""You are given a list of code suggestions to improve a git Pull Request (PR): +' {{ suggestion_str|trim }} - +' Your task is to sort the code suggestions by their order of importance, and return a list with sorting order. The sorting order is a list of pairs, where each pair contains the index of the suggestion in the original list. diff --git a/pr_agent/settings/pr_update_changelog_prompts.toml b/pr_agent/settings/pr_update_changelog_prompts.toml index e9133e34..9d00f251 100644 --- a/pr_agent/settings/pr_update_changelog_prompts.toml +++ b/pr_agent/settings/pr_update_changelog_prompts.toml @@ -15,16 +15,23 @@ Extra instructions from the user: """ user="""PR Info: + Title: '{{title}}' + Branch: '{{branch}}' + Description: '{{description}}' + {%- if language %} -Main language: {{language}} + +Main PR language: '{{ language }}' {%- endif %} {%- if commit_messages_str %} Commit messages: -{{commit_messages_str}} +' +{{ commit_messages_str }} +' {%- endif %} diff --git a/pr_agent/tools/pr_description.py b/pr_agent/tools/pr_description.py index 47e3f03f..0e7244d3 100644 --- a/pr_agent/tools/pr_description.py +++ b/pr_agent/tools/pr_description.py @@ -44,8 +44,7 @@ class PRDescription: "extra_instructions": get_settings().pr_description.extra_instructions, "commit_messages_str": self.git_provider.get_commit_messages(), "enable_custom_labels": get_settings().config.enable_custom_labels, - "custom_labels": "", - "custom_labels_examples": "", + "custom_labels_class": "", # will be filled if necessary in 'set_custom_labels' function } self.user_description = self.git_provider.get_user_description() @@ -175,16 +174,16 @@ class PRDescription: pr_types = [] # If the 'PR Type' key is present in the dictionary, split its value by comma and assign it to 'pr_types' - if 'PR Labels' in self.data: - if type(self.data['PR Labels']) == list: - pr_types = self.data['PR Labels'] - elif type(self.data['PR Labels']) == str: - pr_types = self.data['PR Labels'].split(',') - elif 'PR Type' in self.data: - if type(self.data['PR Type']) == list: - pr_types = self.data['PR Type'] - elif type(self.data['PR Type']) == str: - pr_types = self.data['PR Type'].split(',') + if 'labels' in self.data: + if type(self.data['labels']) == list: + pr_types = self.data['labels'] + elif type(self.data['labels']) == str: + pr_types = self.data['labels'].split(',') + elif 'type' in self.data: + if type(self.data['type']) == list: + pr_types = self.data['type'] + elif type(self.data['type']) == str: + pr_types = self.data['type'].split(',') return pr_types def _prepare_pr_answer_with_markers(self) -> Tuple[str, str]: @@ -196,12 +195,12 @@ class PRDescription: else: ai_header = "" - ai_type = self.data.get('PR Type') + ai_type = self.data.get('type') if ai_type and not re.search(r'', body): pr_type = f"{ai_header}{ai_type}" body = body.replace('pr_agent:type', pr_type) - ai_summary = self.data.get('PR Description') + ai_summary = self.data.get('description') if ai_summary and not re.search(r'', body): summary = f"{ai_header}{ai_summary}" body = body.replace('pr_agent:summary', summary) @@ -231,16 +230,16 @@ class PRDescription: # Iterate over the dictionary items and append the key and value to 'markdown_text' in a markdown format markdown_text = "" # Don't display 'PR Labels' - if 'PR Labels' in self.data and self.git_provider.is_supported("get_labels"): - self.data.pop('PR Labels') + if 'labels' in self.data and self.git_provider.is_supported("get_labels"): + self.data.pop('labels') if not get_settings().pr_description.enable_pr_type: - self.data.pop('PR Type') + self.data.pop('type') for key, value in self.data.items(): markdown_text += f"## {key}\n\n" markdown_text += f"{value}\n\n" # Remove the 'PR Title' key from the dictionary - ai_title = self.data.pop('PR Title', self.vars["title"]) + ai_title = self.data.pop('title', self.vars["title"]) if get_settings().pr_description.keep_original_user_title: # Assign the original PR title to the 'title' variable title = self.vars["title"] @@ -259,7 +258,7 @@ class PRDescription: pr_body += "
files:\n\n" for file in value: filename = file['filename'].replace("'", "`") - description = file['changes in file'] + description = file['changes_in_file'] pr_body += f'- `{filename}`: {description}\n' if self.git_provider.is_supported("gfm_markdown"): pr_body +="
\n" diff --git a/pr_agent/tools/pr_generate_labels.py b/pr_agent/tools/pr_generate_labels.py index e413e96f..6ea322a4 100644 --- a/pr_agent/tools/pr_generate_labels.py +++ b/pr_agent/tools/pr_generate_labels.py @@ -43,9 +43,8 @@ class PRGenerateLabels: "use_bullet_points": get_settings().pr_description.use_bullet_points, "extra_instructions": get_settings().pr_description.extra_instructions, "commit_messages_str": self.git_provider.get_commit_messages(), - "custom_labels": "", - "custom_labels_examples": "", "enable_custom_labels": get_settings().config.enable_custom_labels, + "custom_labels_class": "", # will be filled if necessary in 'set_custom_labels' function } # Initialize the token handler @@ -148,6 +147,9 @@ class PRGenerateLabels: user=user_prompt ) + if get_settings().config.verbosity_level >= 2: + get_logger().info(f"\nAI response:\n{response}") + return response def _prepare_data(self): @@ -159,11 +161,11 @@ class PRGenerateLabels: def _prepare_labels(self) -> List[str]: pr_types = [] - # If the 'PR Type' key is present in the dictionary, split its value by comma and assign it to 'pr_types' - if 'PR Type' in self.data: - if type(self.data['PR Type']) == list: - pr_types = self.data['PR Type'] - elif type(self.data['PR Type']) == str: - pr_types = self.data['PR Type'].split(',') + # If the 'labels' key is present in the dictionary, split its value by comma and assign it to 'pr_types' + if 'labels' in self.data: + if type(self.data['labels']) == list: + pr_types = self.data['labels'] + elif type(self.data['labels']) == str: + pr_types = self.data['labels'].split(',') return pr_types diff --git a/tests/unittest/test_clip_tokens.py b/tests/unittest/test_clip_tokens.py new file mode 100644 index 00000000..cc52ab7e --- /dev/null +++ b/tests/unittest/test_clip_tokens.py @@ -0,0 +1,19 @@ + +# Generated by CodiumAI + +import pytest + +from pr_agent.algo.utils import clip_tokens + + +class TestClipTokens: + def test_clip(self): + text = "line1\nline2\nline3\nline4\nline5\nline6" + max_tokens = 25 + result = clip_tokens(text, max_tokens) + assert result == text + + max_tokens = 10 + result = clip_tokens(text, max_tokens) + expected_results = 'line1\nline2\nline3\nli...(truncated)' + assert result == expected_results