Merge pull request #447 from Codium-ai/tr/pydantic

Refactor PR label handling and update CLI commands
2025-07-21 04:50:39 +08:00 · 2023-11-25 23:37:02 -08:00
parent 46d4d04e94 668041c09f
commit d4e979cb02
22 changed files with 307 additions and 214 deletions
--- a/pr_agent/algo/language_handler.py
+++ b/pr_agent/algo/language_handler.py
@ -3,8 +3,7 @@ from typing import Dict

 from pr_agent.config_loader import get_settings

-language_extension_map_org = get_settings().language_extension_map_org
-language_extension_map = {k.lower(): v for k, v in language_extension_map_org.items()}
+

 # Bad Extensions, source: https://github.com/EleutherAI/github-downloader/blob/345e7c4cbb9e0dc8a0615fd995a08bf9d73b3fe6/download_repo_text.py  # noqa: E501
 bad_extensions = get_settings().bad_extensions.default
@ -29,6 +28,8 @@ def sort_files_by_main_languages(languages: Dict, files: list):
    # languages_sorted = sorted(languages, key=lambda x: x[1], reverse=True)
    # get all extensions for the languages
    main_extensions = []
+    language_extension_map_org = get_settings().language_extension_map_org
+    language_extension_map = {k.lower(): v for k, v in language_extension_map_org.items()}
    for language in languages_sorted_list:
        if language.lower() in language_extension_map:
            main_extensions.append(language_extension_map[language.lower()])
--- a/pr_agent/algo/pr_processing.py
+++ b/pr_agent/algo/pr_processing.py
@ -10,7 +10,7 @@ from github import RateLimitExceededException
 from pr_agent.algo.git_patch_processing import convert_to_hunks_with_lines_numbers, extend_patch, handle_patch_deletions
 from pr_agent.algo.language_handler import sort_files_by_main_languages
 from pr_agent.algo.file_filter import filter_ignored
-from pr_agent.algo.token_handler import TokenHandler, get_token_encoder
+from pr_agent.algo.token_handler import TokenHandler
 from pr_agent.algo.utils import get_max_tokens
 from pr_agent.config_loader import get_settings
 from pr_agent.git_providers.git_provider import FilePatchInfo, GitProvider, EDIT_TYPE
@ -326,35 +326,6 @@ def find_line_number_of_relevant_line_in_file(diff_files: List[FilePatchInfo],
    return position, absolute_position


-def clip_tokens(text: str, max_tokens: int) -> str:
-    """
-    Clip the number of tokens in a string to a maximum number of tokens.
-
-    Args:
-        text (str): The string to clip.
-        max_tokens (int): The maximum number of tokens allowed in the string.
-
-    Returns:
-        str: The clipped string.
-    """
-    if not text:
-        return text
-
-    try:
-        encoder = get_token_encoder()
-        num_input_tokens = len(encoder.encode(text))
-        if num_input_tokens <= max_tokens:
-            return text
-        num_chars = len(text)
-        chars_per_token = num_chars / num_input_tokens
-        num_output_chars = int(chars_per_token * max_tokens)
-        clipped_text = text[:num_output_chars]
-        return clipped_text
-    except Exception as e:
-        get_logger().warning(f"Failed to clip tokens: {e}")
-        return text
-
-
 def get_pr_multi_diffs(git_provider: GitProvider,
                       token_handler: TokenHandler,
                       model: str,
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@ -11,6 +11,7 @@ import yaml
 from starlette_context import context

 from pr_agent.algo import MAX_TOKENS
+from pr_agent.algo.token_handler import get_token_encoder
 from pr_agent.config_loader import get_settings, global_settings
 from pr_agent.log import get_logger

@ -338,12 +339,15 @@ def set_custom_labels(variables):
        labels_list = f"      - {labels_list}" if labels_list else ""
        variables["custom_labels"] = labels_list
        return
-    final_labels = ""
+    #final_labels = ""
+    #for k, v in labels.items():
+    #    final_labels += f"      - {k} ({v['description']})\n"
+    #variables["custom_labels"] = final_labels
+    #variables["custom_labels_examples"] = f"      - {list(labels.keys())[0]}"
+    variables["custom_labels_class"] = "class Label(str, Enum):"
    for k, v in labels.items():
-        final_labels += f"      - {k} ({v['description']})\n"
-    variables["custom_labels"] = final_labels
-    variables["custom_labels_examples"] = f"      - {list(labels.keys())[0]}"
-
+        description = v['description'].strip('\n').replace('\n', '\\n')
+        variables["custom_labels_class"] += f"\n    {k.lower().replace(' ', '_')} = '{k}' # {description}"

 def get_user_labels(current_labels: List[str] = None):
    """
@ -375,3 +379,34 @@ def get_max_tokens(model):
        max_tokens_model = min(settings.config.max_model_tokens, max_tokens_model)
        # get_logger().debug(f"limiting max tokens to {max_tokens_model}")
    return max_tokens_model
+
+
+def clip_tokens(text: str, max_tokens: int, add_three_dots=True) -> str:
+    """
+    Clip the number of tokens in a string to a maximum number of tokens.
+
+    Args:
+        text (str): The string to clip.
+        max_tokens (int): The maximum number of tokens allowed in the string.
+        add_three_dots (bool, optional): A boolean indicating whether to add three dots at the end of the clipped
+    Returns:
+        str: The clipped string.
+    """
+    if not text:
+        return text
+
+    try:
+        encoder = get_token_encoder()
+        num_input_tokens = len(encoder.encode(text))
+        if num_input_tokens <= max_tokens:
+            return text
+        num_chars = len(text)
+        chars_per_token = num_chars / num_input_tokens
+        num_output_chars = int(chars_per_token * max_tokens)
+        clipped_text = text[:num_output_chars]
+        if add_three_dots:
+            clipped_text += "...(truncated)"
+        return clipped_text
+    except Exception as e:
+        get_logger().warning(f"Failed to clip tokens: {e}")
+        return text
--- a/pr_agent/cli.py
+++ b/pr_agent/cli.py
@ -23,18 +23,22 @@ For example:
 - cli.py --issue_url=... similar_issue

 Supported commands:
-review / review_pr - Add a review that includes a summary of the PR and specific suggestions for improvement.
+- review / review_pr - Add a review that includes a summary of the PR and specific suggestions for improvement.

-ask / ask_question [question] - Ask a question about the PR.
+- ask / ask_question [question] - Ask a question about the PR.

-describe / describe_pr - Modify the PR title and description based on the PR's contents.
+- describe / describe_pr - Modify the PR title and description based on the PR's contents.

-improve / improve_code - Suggest improvements to the code in the PR as pull request comments ready to commit.
+- improve / improve_code - Suggest improvements to the code in the PR as pull request comments ready to commit.
 Extended mode ('improve --extended') employs several calls, and provides a more thorough feedback

-reflect - Ask the PR author questions about the PR.
+- reflect - Ask the PR author questions about the PR.

-update_changelog - Update the changelog based on the PR's contents.
+- update_changelog - Update the changelog based on the PR's contents.
+
+- add_docs
+
+- generate_labels


 Configuration:
--- a/pr_agent/git_providers/azuredevops_provider.py
+++ b/pr_agent/git_providers/azuredevops_provider.py
@ -14,9 +14,8 @@ try:
 except ImportError:
    AZURE_DEVOPS_AVAILABLE = False

-from ..algo.pr_processing import clip_tokens
 from ..config_loader import get_settings
-from ..algo.utils import load_large_diff
+from ..algo.utils import load_large_diff, clip_tokens
 from ..algo.language_handler import is_valid_file
 from .git_provider import EDIT_TYPE, FilePatchInfo

--- a/pr_agent/git_providers/codecommit_provider.py
+++ b/pr_agent/git_providers/codecommit_provider.py
@ -6,9 +6,9 @@ from urllib.parse import urlparse

 from pr_agent.git_providers.codecommit_client import CodeCommitClient

-from ..algo.language_handler import is_valid_file, language_extension_map
 from ..algo.utils import load_large_diff
 from .git_provider import EDIT_TYPE, FilePatchInfo, GitProvider
+from ..config_loader import get_settings
 from ..log import get_logger


@ -269,6 +269,8 @@ class CodeCommitProvider(GitProvider):
        # where each dictionary item is a language name.
        # We build that language->extension dictionary here in main_extensions_flat.
        main_extensions_flat = {}
+        language_extension_map_org = get_settings().language_extension_map_org
+        language_extension_map = {k.lower(): v for k, v in language_extension_map_org.items()}
        for language, extensions in language_extension_map.items():
            for ext in extensions:
                main_extensions_flat[ext] = language
--- a/pr_agent/git_providers/git_provider.py
+++ b/pr_agent/git_providers/git_provider.py
@ -5,6 +5,7 @@ from dataclasses import dataclass
 from enum import Enum
 from typing import Optional

+from pr_agent.config_loader import get_settings
 from pr_agent.log import get_logger


@ -62,7 +63,7 @@ class GitProvider(ABC):

    def get_pr_description(self, *, full: bool = True) -> str:
        from pr_agent.config_loader import get_settings
-        from pr_agent.algo.pr_processing import clip_tokens
+        from pr_agent.algo.utils import clip_tokens
        max_tokens_description = get_settings().get("CONFIG.MAX_DESCRIPTION_TOKENS", None)
        description = self.get_pr_description_full() if full else self.get_user_description()
        if max_tokens_description:
@ -173,26 +174,42 @@ def get_main_pr_language(languages, files) -> str:
            extension_list.append(file.filename.rsplit('.')[-1])

        # get the most common extension
-        most_common_extension = max(set(extension_list), key=extension_list.count)
+        most_common_extension = '.' + max(set(extension_list), key=extension_list.count)
+        try:
+            language_extension_map_org = get_settings().language_extension_map_org
+            language_extension_map = {k.lower(): v for k, v in language_extension_map_org.items()}

-        # look for a match. TBD: add more languages, do this systematically
-        if most_common_extension == 'py' and top_language == 'python' or \
-                most_common_extension == 'js' and top_language == 'javascript' or \
-                most_common_extension == 'ts' and top_language == 'typescript' or \
-                most_common_extension == 'go' and top_language == 'go' or \
-                most_common_extension == 'java' and top_language == 'java' or \
-                most_common_extension == 'c' and top_language == 'c' or \
-                most_common_extension == 'cpp' and top_language == 'c++' or \
-                most_common_extension == 'cs' and top_language == 'c#' or \
-                most_common_extension == 'swift' and top_language == 'swift' or \
-                most_common_extension == 'php' and top_language == 'php' or \
-                most_common_extension == 'rb' and top_language == 'ruby' or \
-                most_common_extension == 'rs' and top_language == 'rust' or \
-                most_common_extension == 'scala' and top_language == 'scala' or \
-                most_common_extension == 'kt' and top_language == 'kotlin' or \
-                most_common_extension == 'pl' and top_language == 'perl' or \
-                most_common_extension == top_language:
-            main_language_str = top_language
+            if top_language in language_extension_map and most_common_extension in language_extension_map[top_language]:
+                main_language_str = top_language
+            else:
+                for language, extensions in language_extension_map.items():
+                    if most_common_extension in extensions:
+                        main_language_str = language
+                        break
+        except Exception as e:
+            get_logger().exception(f"Failed to get main language: {e}")
+            pass
+
+        ## old approach:
+        # most_common_extension = max(set(extension_list), key=extension_list.count)
+        # if most_common_extension == 'py' and top_language == 'python' or \
+        #         most_common_extension == 'js' and top_language == 'javascript' or \
+        #         most_common_extension == 'ts' and top_language == 'typescript' or \
+        #         most_common_extension == 'tsx' and top_language == 'typescript' or \
+        #         most_common_extension == 'go' and top_language == 'go' or \
+        #         most_common_extension == 'java' and top_language == 'java' or \
+        #         most_common_extension == 'c' and top_language == 'c' or \
+        #         most_common_extension == 'cpp' and top_language == 'c++' or \
+        #         most_common_extension == 'cs' and top_language == 'c#' or \
+        #         most_common_extension == 'swift' and top_language == 'swift' or \
+        #         most_common_extension == 'php' and top_language == 'php' or \
+        #         most_common_extension == 'rb' and top_language == 'ruby' or \
+        #         most_common_extension == 'rs' and top_language == 'rust' or \
+        #         most_common_extension == 'scala' and top_language == 'scala' or \
+        #         most_common_extension == 'kt' and top_language == 'kotlin' or \
+        #         most_common_extension == 'pl' and top_language == 'perl' or \
+        #         most_common_extension == top_language:
+        #     main_language_str = top_language

    except Exception as e:
        get_logger().exception(e)
--- a/pr_agent/git_providers/github_provider.py
+++ b/pr_agent/git_providers/github_provider.py
@ -8,8 +8,8 @@ from retry import retry
 from starlette_context import context

 from ..algo.language_handler import is_valid_file
-from ..algo.pr_processing import clip_tokens, find_line_number_of_relevant_line_in_file
-from ..algo.utils import load_large_diff
+from ..algo.pr_processing import find_line_number_of_relevant_line_in_file
+from ..algo.utils import load_large_diff, clip_tokens
 from ..config_loader import get_settings
 from ..log import get_logger
 from ..servers.utils import RateLimitExceeded
--- a/pr_agent/git_providers/gitlab_provider.py
+++ b/pr_agent/git_providers/gitlab_provider.py
@ -7,8 +7,8 @@ import gitlab
 from gitlab import GitlabGetError

 from ..algo.language_handler import is_valid_file
-from ..algo.pr_processing import clip_tokens, find_line_number_of_relevant_line_in_file
-from ..algo.utils import load_large_diff
+from ..algo.pr_processing import find_line_number_of_relevant_line_in_file
+from ..algo.utils import load_large_diff, clip_tokens
 from ..config_loader import get_settings
 from .git_provider import EDIT_TYPE, FilePatchInfo, GitProvider
 from ..log import get_logger
--- a/pr_agent/settings/custom_labels.toml
+++ b/pr_agent/settings/custom_labels.toml
@ -3,16 +3,16 @@ enable_custom_labels=false

 ## template for custom labels
 #[custom_labels."Bug fix"]
-#description = "Fixes a bug in the code"
+#description = """Fixes a bug in the code"""
 #[custom_labels."Tests"]
-#description = "Adds or modifies tests"
+#description = """Adds or modifies tests"""
 #[custom_labels."Bug fix with tests"]
-#description = "Fixes a bug in the code and adds or modifies tests"
+#description = """Fixes a bug in the code and adds or modifies tests"""
 #[custom_labels."Refactoring"]
-#description = "Code refactoring without changing functionality"
+#description = """Code refactoring without changing functionality"""
 #[custom_labels."Enhancement"]
-#description = "Adds new features or functionality"
+#description = """Adds new features or functionality"""
 #[custom_labels."Documentation"]
-#description = "Adds or modifies documentation"
+#description = """Adds or modifies documentation"""
 #[custom_labels."Other"]
-#description = "Other changes that do not fit in any of the above categories"
+#description = """Other changes that do not fit in any of the above categories"""
--- a/pr_agent/settings/pr_add_docs.toml
+++ b/pr_agent/settings/pr_add_docs.toml
@ -1,6 +1,6 @@
 [pr_add_docs_prompt]
 system="""You are a language model called PR-Code-Documentation Agent, that specializes in generating documentation for code.
-Your task is to generate meaningfull {{ docs_for_language }} to a PR (the '+' lines).
+Your task is to generate meaningfull {{ docs_for_language }} to a PR (lines starting with '+').

 Example for a PR Diff input:
 '
@ -103,7 +103,7 @@ Description: '{{description}}'

 {%- if language %}

-Main language: {{language}}
+Main PR language: '{{language}}'
 {%- endif %}


--- a/pr_agent/settings/pr_code_suggestions_prompts.toml
+++ b/pr_agent/settings/pr_code_suggestions_prompts.toml
@ -1,6 +1,6 @@
 [pr_code_suggestions_prompt]
-system="""You are a language model called PR-Code-Reviewer, that specializes in suggesting code improvements for Pull Request (PR).
-Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR (the '+' lines in the diff).
+system="""You are PR-Reviewer, a language model that specializes in suggesting code improvements for a Pull Request (PR).
+Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR diff (lines starting with '+').

 Example for a PR Diff input:
 '
@ -120,7 +120,7 @@ Description: '{{description}}'

 {%- if language %}

-Main language: {{language}}
+Main PR language: '{{ language }}'
 {%- endif %}


--- a/pr_agent/settings/pr_custom_labels.toml
+++ b/pr_agent/settings/pr_custom_labels.toml
@ -1,8 +1,10 @@
 [pr_custom_labels_prompt]
-system="""You are CodiumAI-PR-Reviewer, a language model designed to review git pull requests.
-Your task is to label the type of the PR content.
- Make sure not to focus the new PR code (the '+' lines).
- If needed, each YAML output should be in block scalar format ('|-')
+system="""You are PR-Reviewer, a language model designed to review a git Pull Request (PR).
+Your task is to provide labels that describe the PR content.
+{%- if enable_custom_labels %}
+Thoroughly read the labels name and the provided description, and decide whether the label is relevant to the PR.
+{%- endif %}
+
 {%- if extra_instructions %}

 Extra instructions from the user:
@ -11,52 +13,56 @@ Extra instructions from the user:
 '
 {% endif %}

-You must use the following YAML schema to format your answer:
-```yaml
-PR Type:
-  type: array
+
+The output must be a YAML object equivalent to type $Labels, according to the following Pydantic definitions:
+'
 {%- if enable_custom_labels %}
-  description: Labels that are applicable to the Pull Request. Don't output the description in the parentheses. If none of the labels is relevant to the PR, output an empty array.
-{%- endif %}
-  items:
-    type: string
-    enum:
-{%- if enable_custom_labels %}
-{{ custom_labels }}
+
+{{ custom_labels_class }}
+
 {%- else %}
-      - Bug fix
-      - Tests
-      - Refactoring
-      - Enhancement
-      - Documentation
-      - Other
+class Label(str, Enum):
+    bug_fix = "Bug fix"
+    tests = "Tests"
+    refactoring = "Refactoring"
+    enhancement = "Enhancement"
+    documentation = "Documentation"
+    other = "Other"
 {%- endif %}

+class Labels(BaseModel):
+    labels: List[Label] = Field(min_items=0, description="custom labels that describe the PR. Return the label value, not the name.")
+'
+
+
 Example output:
 ```yaml
-PR Type:
-{%- if enable_custom_labels %}
-{{ custom_labels_examples }}
-{%- else %}
-  - Bug fix
-{%- endif %}
+labels:
+- ...
+- ...
 ```

-Make sure to output a valid YAML. Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields.
+Answer should be a valid YAML, and nothing else.
 """

 user="""PR Info:
+
 Previous title: '{{title}}'
-Previous description: '{{description}}'
-Branch: '{{branch}}'
+
+Branch: '{{ branch }}'
+
+Description: '{{ description }}'
+
 {%- if language %}

-Main language: {{language}}
+Main PR language: '{{ language }}'
 {%- endif %}
 {%- if commit_messages_str %}

 Commit messages:
-{{commit_messages_str}}
+'
+{{ commit_messages_str }}
+'
 {%- endif %}


--- a/pr_agent/settings/pr_description_prompts.toml
+++ b/pr_agent/settings/pr_description_prompts.toml
@ -1,9 +1,9 @@
 [pr_description_prompt]
-system="""You are CodiumAI-PR-Reviewer, a language model designed to review git pull requests.
-Your task is to provide full description of a Pull Request (PR) content.
- Make sure to focus on the new PR code (the '+' lines).
- Notice that the 'Previous title', 'Previous description' and 'Commit messages' sections may be partial, simplistic, non-informative or not up-to-date. Hence, compare them to the PR diff code, and use them only as a reference.
- Emphasize first the most important changes, and then the less important ones.
+system="""You are PR-Reviewer, a language model designed to review a git Pull Request (PR).
+Your task is to provide a full description for the PR content.
+- Make sure to focus on the new PR code (lines starting with '+').
+- Keep in mind that the 'Previous title', 'Previous description' and 'Commit messages' sections may be partial, simplistic, non-informative or out of date. Hence, compare them to the PR diff code, and use them only as a reference.
+- Prioritize the most significant PR changes first, followed by the minor ones.
 - If needed, each YAML output should be in block scalar format ('|-')
 {%- if extra_instructions %}

@ -13,81 +13,83 @@ Extra instructions from the user:
 '
 {% endif %}

-You must use the following YAML schema to format your answer:
-```yaml
-PR Title:
-  type: string
-  description: an informative title for the PR, describing its main theme
-PR Type:
-  type: string
-  enum:
-    - Bug fix
-    - Tests
-    - Refactoring
-    - Enhancement
-    - Documentation
-    - Other
+
+The output must be a YAML object equivalent to type $PRDescription, according to the following Pydantic definitions:
+'
+class PRType(str, Enum):
+    bug_fix = "Bug fix"
+    tests = "Tests"
+    refactoring = "Refactoring"
+    enhancement = "Enhancement"
+    documentation = "Documentation"
+    other = "Other"
+
 {%- if enable_custom_labels %}
-PR Labels:
-  type: array
-  description: Labels that are applicable to the Pull Request. Don't output the description in the parentheses. If none of the labels is relevant to the PR, output an empty array.
-  items:
-    type: string
-    enum:
-{{ custom_labels }}
+
+{{ custom_labels_class }}
+
 {%- endif %}
-PR Description:
-  type: string
-  description: an informative and concise description of the PR.
-  {%- if use_bullet_points %} Use bullet points. {% endif %}
-PR Main Files Walkthrough:
-  type: array
-  maxItems: 10
-  description: |-
-    a walkthrough of the PR changes. Review main files, and shortly describe the changes in each file (up to 10 most important files).
-  items:
-    filename:
-      type: string
-      description: the relevant file full path
-    changes in file:
-      type: string
-      description: minimal and concise description of the changes in the relevant file
-```
+
+class FileWalkthrough(BaseModel):
+    filename: str = Field(description="the relevant file full path")
+    changes_in_file: str = Field(description="minimal and concise description of the changes in the relevant file")
+
+Class PRDescription(BaseModel):
+    title: str = Field(description="an informative title for the PR, describing its main theme")
+    type: List[PRType] = Field(description="one or more types that describe the PR type. . Return the label value, not the name.")
+    description: str = Field(description="an informative and concise description of the PR. {%- if use_bullet_points %} Use bullet points. {% endif %}")
+{%- if enable_custom_labels %}
+    labels: List[Label] = Field(min_items=0, description="custom labels that describe the PR. Return the label value, not the name.")
+{%- endif %}
+    main_files_walkthrough: List[FileWalkthrough] = Field(max_items=10)
+'


 Example output:
 ```yaml
-PR Title: |-
-  ...
-PR Type:
+title: |-
  ...
+type:
+- ...
+- ...
 {%- if enable_custom_labels %}
-PR Labels:
+labels:
 - ...
 - ...
 {%- endif %}
-PR Description: |-
+description: |-
  ...
-PR Main Files Walkthrough:
-  - ...
-  - ...
+main_files_walkthrough:
+- ...
+- ...
 ```

-Make sure to output a valid YAML. Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields.
+Answer should be a valid YAML, and nothing else. Each YAML output MUST be after a newline, with proper indent, and block scalar indicator ('|-')
 """

 user="""PR Info:
+
 Previous title: '{{title}}'
-Previous description: '{{description}}'
+
+{%- if description %}
+
+Previous description:
+'
+{{ description }}
+'
+{%- endif %}
+
 Branch: '{{branch}}'
 {%- if language %}

-Main language: {{language}}
+Main PR language: '{{ language }}'
 {%- endif %}
 {%- if commit_messages_str %}

 Commit messages:
-{{commit_messages_str}}
+'
+{{ commit_messages_str }}
+'
 {%- endif %}


@ -95,6 +97,8 @@ The PR Git Diff:
 ```
 {{diff}}
 ```
+
+
 Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines.

 Response (should be a valid YAML, and nothing else):
--- a/pr_agent/settings/pr_information_from_user_prompts.toml
+++ b/pr_agent/settings/pr_information_from_user_prompts.toml
@ -1,5 +1,5 @@
 [pr_information_from_user_prompt]
-system="""You are CodiumAI-PR-Reviewer, a language model designed to review git pull requests.
+system="""You are PR-Reviewer, a language model designed to review a git Pull Request (PR).
 Given the PR Info and the PR Git Diff, generate 3 short questions about the PR code for the PR author.
 The goal of the questions is to help the language model understand the PR better, so the questions should be insightful, informative, non-trivial, and relevant to the PR.
 You should prefer asking yes\\no questions, or multiple choice questions. Also add at least one open-ended question, but make sure they are not too difficult, and can be answered in a sentence or two.
@ -16,15 +16,21 @@ Questions to better understand the PR:

 user="""PR Info:
 Title: '{{title}}'
+
 Branch: '{{branch}}'
+
 Description: '{{description}}'
+
 {%- if language %}
-Main language: {{language}}
+
+Main PR language: '{{ language }}'
 {%- endif %}
 {%- if commit_messages_str %}

 Commit messages:
+'
 {{commit_messages_str}}
+'
 {%- endif %}


--- a/pr_agent/settings/pr_questions_prompts.toml
+++ b/pr_agent/settings/pr_questions_prompts.toml
@ -1,22 +1,29 @@
 [pr_questions_prompt]
-system="""You are CodiumAI-PR-Reviewer, a language model designed to review git pull requests.
-Your task is to answer questions about the new PR code (the '+' lines), and provide feedback.
+system="""You are PR-Reviewer, a language model designed to review a git Pull Request (PR).
+Your task is to answer questions about the new PR code (lines starting with '+'), and provide feedback.
 Be informative, constructive, and give examples. Try to be as specific as possible.
 Don't avoid answering the questions. You must answer the questions, as best as you can, without adding unrelated content.
 Make sure not to repeat modifications already implemented in the new PR code (the '+' lines).
 """

 user="""PR Info:
+
 Title: '{{title}}'
+
 Branch: '{{branch}}'
+
 Description: '{{description}}'
+
 {%- if language %}
-Main language: {{language}}
+
+Main PR language: '{{ language }}'
 {%- endif %}
 {%- if commit_messages_str %}

 Commit messages:
-{{commit_messages_str}}
+'
+{{ commit_messages_str }}
+'
 {%- endif %}


--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@ -1,6 +1,7 @@
 [pr_review_prompt]
-system="""You are PR-Reviewer, a language model designed to review git pull requests.
+system="""You are PR-Reviewer, a language model designed to review a git Pull Request (PR).
 Your task is to provide constructive and concise feedback for the PR, and also provide meaningful code suggestions.
+The review should focus on new code added in the PR diff (lines starting with '+')

 Example PR Diff input:
 '
@ -22,14 +23,14 @@ code line that already existed in the file....
 ...
 '

-The review should focus on new code added in the PR (lines starting with '+'), and not on code that already existed in the file (lines starting with '-', or without prefix).
-
 {%- if num_code_suggestions > 0 %}
+
+Code suggestions guidelines:
 - Provide up to {{ num_code_suggestions }} code suggestions. Try to provide diverse and insightful suggestions.
 - Focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningful code improvements, like performance, vulnerability, modularity, and best practices.
 - Avoid making suggestions that have already been implemented in the PR code. For example, if you want to add logs, or change a variable to const, or anything else, make sure it isn't already in the PR code.
 - Don't suggest to add docstring, type hints, or comments.
- Suggestions should focus on improving the new code added in the PR (lines starting with '+')
+- Suggestions should focus on the new code added in the PR diff (lines starting with '+')
 {%- endif %}

 {%- if extra_instructions %}
@ -179,16 +180,29 @@ Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'desc
 """

 user="""PR Info:
+
 Title: '{{title}}'
+
 Branch: '{{branch}}'
-Description: '{{description}}'
+
+{%- if description %}
+
+Description:
+'
+{{description}}
+'
+{%- endif %}
+
 {%- if language %}
-Main language: {{language}}
+
+Main PR language: '{{ language }}'
 {%- endif %}
 {%- if commit_messages_str %}

 Commit messages:
+'
 {{commit_messages_str}}
+'
 {%- endif %}

 {%- if question_str %}
@ -208,7 +222,7 @@ The PR Git Diff:
 ```
 {{diff}}
 ```
-Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions. Focus on the '+' lines.
+

 Response (should be a valid YAML, and nothing else):
 ```yaml
--- a/pr_agent/settings/pr_sort_code_suggestions_prompts.toml
+++ b/pr_agent/settings/pr_sort_code_suggestions_prompts.toml
@ -2,10 +2,10 @@
 system="""
 """

-user="""You are given a list of code suggestions to improve a PR:
-
+user="""You are given a list of code suggestions to improve a git Pull Request (PR):
+'
 {{ suggestion_str|trim }}
-
+'

 Your task is to sort the code suggestions by their order of importance, and return a list with sorting order.
 The sorting order is a list of pairs, where each pair contains the index of the suggestion in the original list.
--- a/pr_agent/settings/pr_update_changelog_prompts.toml
+++ b/pr_agent/settings/pr_update_changelog_prompts.toml
@ -15,16 +15,23 @@ Extra instructions from the user:
 """

 user="""PR Info:
+
 Title: '{{title}}'
+
 Branch: '{{branch}}'
+
 Description: '{{description}}'
+
 {%- if language %}
-Main language: {{language}}
+
+Main PR language: '{{ language }}'
 {%- endif %}
 {%- if commit_messages_str %}

 Commit messages:
-{{commit_messages_str}}
+'
+{{ commit_messages_str }}
+'
 {%- endif %}


--- a/pr_agent/tools/pr_description.py
+++ b/pr_agent/tools/pr_description.py
@ -44,8 +44,7 @@ class PRDescription:
            "extra_instructions": get_settings().pr_description.extra_instructions,
            "commit_messages_str": self.git_provider.get_commit_messages(),
            "enable_custom_labels": get_settings().config.enable_custom_labels,
-            "custom_labels": "",
-            "custom_labels_examples": "",
+            "custom_labels_class": "",  # will be filled if necessary in 'set_custom_labels' function
        }

        self.user_description = self.git_provider.get_user_description()
@ -175,16 +174,16 @@ class PRDescription:
        pr_types = []

        # If the 'PR Type' key is present in the dictionary, split its value by comma and assign it to 'pr_types'
-        if 'PR Labels' in self.data:
-            if type(self.data['PR Labels']) == list:
-                pr_types = self.data['PR Labels']
-            elif type(self.data['PR Labels']) == str:
-                pr_types = self.data['PR Labels'].split(',')
-        elif 'PR Type' in self.data:
-            if type(self.data['PR Type']) == list:
-                pr_types = self.data['PR Type']
-            elif type(self.data['PR Type']) == str:
-                pr_types = self.data['PR Type'].split(',')
+        if 'labels' in self.data:
+            if type(self.data['labels']) == list:
+                pr_types = self.data['labels']
+            elif type(self.data['labels']) == str:
+                pr_types = self.data['labels'].split(',')
+        elif 'type' in self.data:
+            if type(self.data['type']) == list:
+                pr_types = self.data['type']
+            elif type(self.data['type']) == str:
+                pr_types = self.data['type'].split(',')
        return pr_types

    def _prepare_pr_answer_with_markers(self) -> Tuple[str, str]:
@ -196,12 +195,12 @@ class PRDescription:
        else:
            ai_header = ""

-        ai_type = self.data.get('PR Type')
+        ai_type = self.data.get('type')
        if ai_type and not re.search(r'<!--\s*pr_agent:type\s*-->', body):
            pr_type = f"{ai_header}{ai_type}"
            body = body.replace('pr_agent:type', pr_type)

-        ai_summary = self.data.get('PR Description')
+        ai_summary = self.data.get('description')
        if ai_summary and not re.search(r'<!--\s*pr_agent:summary\s*-->', body):
            summary = f"{ai_header}{ai_summary}"
            body = body.replace('pr_agent:summary', summary)
@ -231,16 +230,16 @@ class PRDescription:
        # Iterate over the dictionary items and append the key and value to 'markdown_text' in a markdown format
        markdown_text = ""
        # Don't display 'PR Labels'
-        if 'PR Labels' in self.data and self.git_provider.is_supported("get_labels"):
-            self.data.pop('PR Labels')
+        if 'labels' in self.data and self.git_provider.is_supported("get_labels"):
+            self.data.pop('labels')
        if not get_settings().pr_description.enable_pr_type:
-            self.data.pop('PR Type')
+            self.data.pop('type')
        for key, value in self.data.items():
            markdown_text += f"## {key}\n\n"
            markdown_text += f"{value}\n\n"

        # Remove the 'PR Title' key from the dictionary
-        ai_title = self.data.pop('PR Title', self.vars["title"])
+        ai_title = self.data.pop('title', self.vars["title"])
        if get_settings().pr_description.keep_original_user_title:
            # Assign the original PR title to the 'title' variable
            title = self.vars["title"]
@ -259,7 +258,7 @@ class PRDescription:
                    pr_body += "<details> <summary>files:</summary>\n\n"
                for file in value:
                    filename = file['filename'].replace("'", "`")
-                    description = file['changes in file']
+                    description = file['changes_in_file']
                    pr_body += f'- `{filename}`: {description}\n'
                if self.git_provider.is_supported("gfm_markdown"):
                    pr_body +="</details>\n"
--- a/pr_agent/tools/pr_generate_labels.py
+++ b/pr_agent/tools/pr_generate_labels.py
@ -43,9 +43,8 @@ class PRGenerateLabels:
            "use_bullet_points": get_settings().pr_description.use_bullet_points,
            "extra_instructions": get_settings().pr_description.extra_instructions,
            "commit_messages_str": self.git_provider.get_commit_messages(),
-            "custom_labels": "",
-            "custom_labels_examples": "",
            "enable_custom_labels": get_settings().config.enable_custom_labels,
+            "custom_labels_class": "",  # will be filled if necessary in 'set_custom_labels' function
        }

        # Initialize the token handler
@ -148,6 +147,9 @@ class PRGenerateLabels:
            user=user_prompt
        )

+        if get_settings().config.verbosity_level >= 2:
+            get_logger().info(f"\nAI response:\n{response}")
+
        return response

    def _prepare_data(self):
@ -159,11 +161,11 @@ class PRGenerateLabels:
    def _prepare_labels(self) -> List[str]:
        pr_types = []

-        # If the 'PR Type' key is present in the dictionary, split its value by comma and assign it to 'pr_types'
-        if 'PR Type' in self.data:
-            if type(self.data['PR Type']) == list:
-                pr_types = self.data['PR Type']
-            elif type(self.data['PR Type']) == str:
-                pr_types = self.data['PR Type'].split(',')
+        # If the 'labels' key is present in the dictionary, split its value by comma and assign it to 'pr_types'
+        if 'labels' in self.data:
+            if type(self.data['labels']) == list:
+                pr_types = self.data['labels']
+            elif type(self.data['labels']) == str:
+                pr_types = self.data['labels'].split(',')

        return pr_types
--- a/tests/unittest/test_clip_tokens.py
+++ b/tests/unittest/test_clip_tokens.py
@ -0,0 +1,19 @@
+
+# Generated by CodiumAI
+
+import pytest
+
+from pr_agent.algo.utils import clip_tokens
+
+
+class TestClipTokens:
+    def test_clip(self):
+        text = "line1\nline2\nline3\nline4\nline5\nline6"
+        max_tokens = 25
+        result = clip_tokens(text, max_tokens)
+        assert result == text
+
+        max_tokens = 10
+        result = clip_tokens(text, max_tokens)
+        expected_results = 'line1\nline2\nline3\nli...(truncated)'
+        assert result == expected_results