Merge pull request #1421 from dceoy/main

Update Groq models and tokens
fix: sanitize Ask tool answers to prevent markdown formatting issues with leading slashes
2025-07-09 23:30:38 +08:00 · 2024-12-30 19:25:23 +02:00 · 2024-12-30 16:54:03 +02:00 · 2024-12-30 15:12:28 +02:00 · 2024-12-30 15:06:27 +02:00 · 2024-12-30 14:58:53 +02:00
11 changed files with 309 additions and 197 deletions
--- a/pr_agent/agent/pr_agent.py
+++ b/pr_agent/agent/pr_agent.py
@ -46,7 +46,6 @@ commands = list(command2class.keys())
 class PRAgent:
    def __init__(self, ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):
        self.ai_handler = ai_handler  # will be initialized in run_action
        self.forbidden_cli_args = ['enable_auto_approval']
    async def handle_request(self, pr_url, request, notify=None) -> bool:
        # First, apply repo specific settings if exists
@ -61,10 +60,13 @@ class PRAgent:
        else:
            action, *args = request
        forbidden_cli_args = ['enable_auto_approval', 'base_url', 'url', 'app_name', 'secret_provider',
                              'git_provider', 'skip_keys', 'key', 'ANALYTICS_FOLDER', 'uri', 'app_id', 'webhook_secret',
                              'bearer_token', 'PERSONAL_ACCESS_TOKEN', 'override_deployment_type', 'private_key', 'api_base', 'api_type', 'api_version']
        if args:
-            for forbidden_arg in self.forbidden_cli_args:
+            for forbidden_arg in forbidden_cli_args:
                for arg in args:
-                    if forbidden_arg in arg:
+                    if forbidden_arg.lower() in arg.lower():
                        get_logger().error(
                            f"CLI argument for param '{forbidden_arg}' is forbidden. Use instead a configuration file."
                        )
--- a/pr_agent/algo/init.py
+++ b/pr_agent/algo/init.py
@ -66,10 +66,10 @@ MAX_TOKENS = {
    'claude-3-5-sonnet': 100000,
    'groq/llama3-8b-8192': 8192,
    'groq/llama3-70b-8192': 8192,
    'groq/llama-3.1-8b-instant': 8192,
    'groq/llama-3.3-70b-versatile': 128000,
    'groq/mixtral-8x7b-32768': 32768,
-    'groq/llama-3.1-8b-instant': 131072,
+    'groq/gemma2-9b-it': 8192,
    'groq/llama-3.1-70b-versatile': 131072,
    'groq/llama-3.1-405b-reasoning': 131072,
    'ollama/llama3': 4096,
    'watsonx/meta-llama/llama-3-8b-instruct': 4096,
    "watsonx/meta-llama/llama-3-70b-instruct": 4096,
--- a/pr_agent/algo/git_patch_processing.py
+++ b/pr_agent/algo/git_patch_processing.py
@ -364,7 +364,7 @@ __old hunk__
 def extract_hunk_lines_from_patch(patch: str, file_name, line_start, line_end, side) -> tuple[str, str]:
-
+    try:
        patch_with_lines_str = f"\n\n## File: '{file_name.strip()}'\n\n"
        selected_lines = ""
        patch_lines = patch.splitlines()
@ -407,5 +407,8 @@ def extract_hunk_lines_from_patch(patch: str, file_name, line_start, line_end, s
                patch_with_lines_str += line + '\n'
                if not line.startswith('-'): # currently we don't support /ask line for deleted lines
                    selected_lines_num += 1
    except Exception as e:
        get_logger().error(f"Failed to extract hunk lines from patch: {e}", artifact={"traceback": traceback.format_exc()})
        return "", ""
    return patch_with_lines_str.rstrip(), selected_lines.rstrip()
--- a/pr_agent/algo/pr_processing.py
+++ b/pr_agent/algo/pr_processing.py
@ -316,13 +316,13 @@ def generate_full_patch(convert_hunks_to_line_numbers, file_dict, max_tokens_mod
            # TODO: Option for alternative logic to remove hunks from the patch to reduce the number of tokens
            #  until we meet the requirements
            if get_settings().config.verbosity_level >= 2:
-                get_logger().warning(f"Patch too large, skipping it, {filename}")
+                get_logger().warning(f"Patch too large, skipping it: '{filename}'")
            remaining_files_list_new.append(filename)
            continue
        if patch:
            if not convert_hunks_to_line_numbers:
-                patch_final = f"\n\n## File: '{filename.strip()}\n\n{patch.strip()}\n'"
+                patch_final = f"\n\n## File: '{filename.strip()}'\n\n{patch.strip()}\n"
            else:
                patch_final = "\n\n" + patch.strip()
            patches.append(patch_final)
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@ -23,6 +23,7 @@ from pydantic import BaseModel
 from starlette_context import context
 from pr_agent.algo import MAX_TOKENS
 from pr_agent.algo.git_patch_processing import extract_hunk_lines_from_patch
 from pr_agent.algo.token_handler import TokenEncoder
 from pr_agent.algo.types import FilePatchInfo
 from pr_agent.config_loader import get_settings, global_settings
@ -272,7 +273,11 @@ def convert_to_markdown_v2(output_data: dict,
    return markdown_text
-def extract_relevant_lines_str(end_line, files, relevant_file, start_line, dedent=False):
+
 def extract_relevant_lines_str(end_line, files, relevant_file, start_line, dedent=False) -> str:
    """
    Finds 'relevant_file' in 'files', and extracts the lines from 'start_line' to 'end_line' string from the file content.
    """
    try:
        relevant_lines_str = ""
        if files:
@ -280,10 +285,23 @@ def extract_relevant_lines_str(end_line, files, relevant_file, start_line, deden
            for file in files:
                if file.filename.strip() == relevant_file:
                    if not file.head_file:
-                        get_logger().warning(f"No content found in file: {file.filename}")
+                        # as a fallback, extract relevant lines directly from patch
                        patch = file.patch
                        get_logger().info(f"No content found in file: '{file.filename}' for 'extract_relevant_lines_str'. Using patch instead")
                        _, selected_lines = extract_hunk_lines_from_patch(patch, file.filename, start_line, end_line,side='right')
                        if not selected_lines:
                            get_logger().error(f"Failed to extract relevant lines from patch: {file.filename}")
                            return ""
                        # filter out '-' lines
                        relevant_lines_str = ""
                        for line in selected_lines.splitlines():
                            if line.startswith('-'):
                                continue
                            relevant_lines_str += line[1:] + '\n'
                    else:
                        relevant_file_lines = file.head_file.splitlines()
                        relevant_lines_str = "\n".join(relevant_file_lines[start_line - 1:end_line])
                    if dedent and relevant_lines_str:
                        # Remove the longest leading string of spaces and tabs common to all lines.
                        relevant_lines_str = textwrap.dedent(relevant_lines_str)
@ -565,27 +583,20 @@ def load_large_diff(filename, new_file_content_str: str, original_file_content_s
    """
    Generate a patch for a modified file by comparing the original content of the file with the new content provided as
    input.
    Args:
        new_file_content_str: The new content of the file as a string.
        original_file_content_str: The original content of the file as a string.
    Returns:
        The generated or provided patch string.
    Raises:
        None.
    """
-    patch = ""
+    if not original_file_content_str and not new_file_content_str:
        return ""
    try:
        diff = difflib.unified_diff(original_file_content_str.splitlines(keepends=True),
                                    new_file_content_str.splitlines(keepends=True))
        if get_settings().config.verbosity_level >= 2 and show_warning:
-            get_logger().warning(f"File was modified, but no patch was found. Manually creating patch: {filename}.")
+            get_logger().info(f"File was modified, but no patch was found. Manually creating patch: {filename}.")
        patch = ''.join(diff)
    except Exception:
        pass
        return patch
    except Exception as e:
        get_logger().exception(f"Failed to generate patch for file: {filename}")
        return ""
 def update_settings_from_args(args: List[str]) -> List[str]:
--- a/pr_agent/git_providers/github_provider.py
+++ b/pr_agent/git_providers/github_provider.py
@ -174,6 +174,24 @@ class GithubProvider(GitProvider):
            diff_files = []
            invalid_files_names = []
            is_close_to_rate_limit = False
            # The base.sha will point to the current state of the base branch (including parallel merges), not the original base commit when the PR was created
            # We can fix this by finding the merge base commit between the PR head and base branches
            # Note that The pr.head.sha is actually correct as is - it points to the latest commit in your PR branch.
            # This SHA isn't affected by parallel merges to the base branch since it's specific to your PR's branch.
            repo = self.repo_obj
            pr = self.pr
            try:
                compare = repo.compare(pr.base.sha, pr.head.sha) # communication with GitHub
                merge_base_commit = compare.merge_base_commit
            except Exception as e:
                get_logger().error(f"Failed to get merge base commit: {e}")
                merge_base_commit = pr.base
            if merge_base_commit.sha != pr.base.sha:
                get_logger().info(
                    f"Using merge base commit {merge_base_commit.sha} instead of base commit ")
            counter_valid = 0
            for file in files:
                if not is_valid_file(file.filename):
@ -181,7 +199,10 @@ class GithubProvider(GitProvider):
                    continue
                patch = file.patch
-
+                if is_close_to_rate_limit:
                    new_file_content_str = ""
                    original_file_content_str = ""
                else:
                    # allow only a limited number of files to be fully loaded. We can manage the rest with diffs only
                    counter_valid += 1
                    avoid_load = False
@ -203,27 +224,12 @@ class GithubProvider(GitProvider):
                        if avoid_load:
                            original_file_content_str = ""
                        else:
                        # The base.sha will point to the current state of the base branch (including parallel merges), not the original base commit when the PR was created
                        # We can fix this by finding the merge base commit between the PR head and base branches
                        # Note that The pr.head.sha is actually correct as is - it points to the latest commit in your PR branch.
                        # This SHA isn't affected by parallel merges to the base branch since it's specific to your PR's branch.
                        repo = self.repo_obj
                        pr = self.pr
                        try:
                            compare = repo.compare(pr.base.sha, pr.head.sha)
                            merge_base_commit = compare.merge_base_commit
                        except Exception as e:
                            get_logger().error(f"Failed to get merge base commit: {e}")
                            merge_base_commit = pr.base
                        if merge_base_commit.sha != pr.base.sha:
                            get_logger().info(
                                f"Using merge base commit {merge_base_commit.sha} instead of base commit "
                                f"{pr.base.sha} for {file.filename}")
                            original_file_content_str = self._get_pr_file_content(file, merge_base_commit.sha)
-
+                            # original_file_content_str = self._get_pr_file_content(file, self.pr.base.sha)
                        if not patch:
                            patch = load_large_diff(file.filename, new_file_content_str, original_file_content_str)
                if file.status == 'added':
                    edit_type = EDIT_TYPE.ADDED
                elif file.status == 'removed':
@ -237,9 +243,14 @@ class GithubProvider(GitProvider):
                    edit_type = EDIT_TYPE.UNKNOWN
                # count number of lines added and removed
                if hasattr(file, 'additions') and hasattr(file, 'deletions'):
                    num_plus_lines = file.additions
                    num_minus_lines = file.deletions
                else:
                    patch_lines = patch.splitlines(keepends=True)
                    num_plus_lines = len([line for line in patch_lines if line.startswith('+')])
                    num_minus_lines = len([line for line in patch_lines if line.startswith('-')])
                file_patch_canonical_structure = FilePatchInfo(original_file_content_str, new_file_content_str, patch,
                                                               file.filename, edit_type=edit_type,
                                                               num_plus_lines=num_plus_lines,
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@ -88,6 +88,7 @@ publish_description_as_comment_persistent=true
 ## changes walkthrough section
 enable_semantic_files_types=true
 collapsible_file_list='adaptive' # true, false, 'adaptive'
 collapsible_file_list_threshold=8
 inline_file_summary=false # false, true, 'table'
 # markers
 use_description_markers=false
@ -96,7 +97,6 @@ include_generated_by_header=true
 enable_large_pr_handling=true
 max_ai_calls=4
 async_ai_calls=true
 mention_extra_files=true
 #custom_labels = ['Bug fix', 'Tests', 'Bug fix with tests', 'Enhancement', 'Documentation', 'Other']
 [pr_questions] # /ask #
--- a/pr_agent/settings/pr_description_prompts.toml
+++ b/pr_agent/settings/pr_description_prompts.toml
@ -9,7 +9,7 @@ Your task is to provide a full description for the PR content - files walkthroug
 - Keep in mind that the 'Previous title', 'Previous description' and 'Commit messages' sections may be partial, simplistic, non-informative or out of date. Hence, compare them to the PR diff code, and use them only as a reference.
 - The generated title and description should prioritize the most significant changes.
 - If needed, each YAML output should be in block scalar indicator ('|-')
- When quoting variables or names from the code, use backticks (`) instead of single quote (').
+- When quoting variables, names or file paths from the code, use backticks (`) instead of single quote (').
 {%- if extra_instructions %}
@ -38,23 +38,22 @@ class PRType(str, Enum):
 {%- if enable_semantic_files_types %}
 class FileDescription(BaseModel):
-    filename: str = Field(description="The full file path of the relevant file.")
+    filename: str = Field(description="The full file path of the relevant file")
-    language: str = Field(description="The programming language of the relevant file.")
+    language: str = Field(description="The programming language of the relevant file")
 {%- if include_file_summary_changes %}
    changes_summary: str = Field(description="concise summary of the changes in the relevant file, in bullet points (1-4 bullet points).")
-    changes_title: str = Field(description="an informative title for the changes in the files, describing its main theme (5-10 words).")
+{%- endif %}
    changes_title: str = Field(description="one-line summary (5-10 words) capturing the main theme of changes in the file")
    label: str = Field(description="a single semantic label that represents a type of code changes that occurred in the File. Possible values (partial list): 'bug fix', 'tests', 'enhancement', 'documentation', 'error handling', 'configuration changes', 'dependencies', 'formatting', 'miscellaneous', ...")
 {%- endif %}
 class PRDescription(BaseModel):
    type: List[PRType] = Field(description="one or more types that describe the PR content. Return the label member value (e.g. 'Bug fix', not 'bug_fix')")
 {%- if enable_semantic_files_types %}
-    pr_files: List[FileDescription] = Field(max_items=15, description="a list of the files in the PR, and summary of their changes")
+    pr_files: List[FileDescription] = Field(max_items=20, description="a list of all the files that were changed in the PR, and summary of their changes. Each file must be analyzed regardless of change size.")
 {%- endif %}
    description: str = Field(description="an informative and concise description of the PR. Use bullet points. Display first the most significant changes.")
    title: str = Field(description="an informative title for the PR, describing its main theme")
 {%- if enable_custom_labels %}
    labels: List[Label] =  Field(min_items=0, description="choose the relevant custom labels that describe the PR content, and return their keys. Use the value field of the Label object to better understand the label meaning.")
 {%- endif %}
 =====
@ -70,25 +69,20 @@ pr_files:
    ...
  language: |
    ...
 {%- if include_file_summary_changes %}
  changes_summary: |
    ...
 {%- endif %}
  changes_title: |
    ...
  label: |
-    ...
+    label_key_1
 ...
 {%- endif %}
 description: |
  ...
 title: |
  ...
 {%- if enable_custom_labels %}
 labels:
 - |
  ...
 - |
  ...
 {%- endif %}
 ```
 Answer should be a valid YAML, and nothing else. Each YAML output MUST be after a newline, with proper indent, and block scalar indicator ('|')
--- a/pr_agent/tools/pr_description.py
+++ b/pr_agent/tools/pr_description.py
@ -1,6 +1,7 @@
 import asyncio
 import copy
 import re
 import traceback
 from functools import partial
 from typing import List, Tuple
@ -57,6 +58,7 @@ class PRDescription:
        self.ai_handler.main_pr_language = self.main_pr_language
        # Initialize the variables dictionary
        self.COLLAPSIBLE_FILE_LIST_THRESHOLD = get_settings().pr_description.get("collapsible_file_list_threshold", 8)
        self.vars = {
            "title": self.git_provider.pr.title,
            "branch": self.git_provider.get_pr_branch(),
@ -69,6 +71,7 @@ class PRDescription:
            "custom_labels_class": "",  # will be filled if necessary in 'set_custom_labels' function
            "enable_semantic_files_types": get_settings().pr_description.enable_semantic_files_types,
            "related_tickets": "",
            "include_file_summary_changes": len(self.git_provider.get_diff_files()) <= self.COLLAPSIBLE_FILE_LIST_THRESHOLD
        }
        self.user_description = self.git_provider.get_user_description()
@ -85,7 +88,6 @@ class PRDescription:
        self.patches_diff = None
        self.prediction = None
        self.file_label_dict = None
        self.COLLAPSIBLE_FILE_LIST_THRESHOLD = 8
    async def run(self):
        try:
@ -114,6 +116,8 @@ class PRDescription:
            pr_labels, pr_file_changes = [], []
            if get_settings().pr_description.publish_labels:
                pr_labels = self._prepare_labels()
            else:
                get_logger().debug(f"Publishing labels disabled")
            if get_settings().pr_description.use_description_markers:
                pr_title, pr_body, changes_walkthrough, pr_file_changes = self._prepare_pr_answer_with_markers()
@ -137,6 +141,7 @@ class PRDescription:
                pr_body += show_relevant_configurations(relevant_section='pr_description')
            if get_settings().config.publish_output:
                # publish labels
                if get_settings().pr_description.publish_labels and pr_labels and self.git_provider.is_supported("get_labels"):
                    original_labels = self.git_provider.get_pr_labels(update=True)
@ -164,7 +169,7 @@ class PRDescription:
                    self.git_provider.publish_description(pr_title, pr_body)
                    # publish final update message
-                    if (get_settings().pr_description.final_update_message):
+                    if (get_settings().pr_description.final_update_message and not get_settings().config.get('is_auto_command', False)):
                        latest_commit_url = self.git_provider.get_latest_commit_url()
                        if latest_commit_url:
                            pr_url = self.git_provider.get_pr_url()
@ -176,35 +181,37 @@ class PRDescription:
                get_settings().data = {"artifact": pr_body}
                return
        except Exception as e:
-            get_logger().error(f"Error generating PR description {self.pr_id}: {e}")
+            get_logger().error(f"Error generating PR description {self.pr_id}: {e}",
                               artifact={"traceback": traceback.format_exc()})
        return ""
    async def _prepare_prediction(self, model: str) -> None:
        if get_settings().pr_description.use_description_markers and 'pr_agent:' not in self.user_description:
-            get_logger().info(
+            get_logger().info("Markers were enabled, but user description does not contain markers. skipping AI prediction")
                "Markers were enabled, but user description does not contain markers. skipping AI prediction")
            return None
        large_pr_handling = get_settings().pr_description.enable_large_pr_handling and "pr_description_only_files_prompts" in get_settings()
-        output = get_pr_diff(self.git_provider, self.token_handler, model, large_pr_handling=large_pr_handling,
+        output = get_pr_diff(self.git_provider, self.token_handler, model, large_pr_handling=large_pr_handling, return_remaining_files=True)
                             return_remaining_files=True)
        if isinstance(output, tuple):
            patches_diff, remaining_files_list = output
        else:
            patches_diff = output
            remaining_files_list = []
        if not large_pr_handling or patches_diff:
            self.patches_diff = patches_diff
            if patches_diff:
                # generate the prediction
                get_logger().debug(f"PR diff", artifact=self.patches_diff)
                self.prediction = await self._get_prediction(model, patches_diff, prompt="pr_description_prompt")
-                if (remaining_files_list and 'pr_files' in self.prediction and 'label:' in self.prediction and
+
-                        get_settings().pr_description.mention_extra_files):
+                # extend the prediction with additional files not shown
-                    get_logger().debug(f"Extending additional files, {len(remaining_files_list)} files")
+                if get_settings().pr_description.enable_semantic_files_types:
-                    self.prediction = await self.extend_additional_files(remaining_files_list)
+                    self.prediction = await self.extend_uncovered_files(self.prediction)
            else:
-                get_logger().error(f"Error getting PR diff {self.pr_id}")
+                get_logger().error(f"Error getting PR diff {self.pr_id}",
                                   artifact={"traceback": traceback.format_exc()})
                self.prediction = None
        else:
            # get the diff in multiple patches, with the token handler only for the files prompt
@ -289,43 +296,81 @@ class PRDescription:
                                                            prompt="pr_description_only_description_prompts")
            prediction_headers = prediction_headers.strip().removeprefix('```yaml').strip('`').strip()
-            # manually add extra files to final prediction
+            # extend the tables with the files not shown
-            MAX_EXTRA_FILES_TO_OUTPUT = 100
+            files_walkthrough_extended = await self.extend_uncovered_files(files_walkthrough)
            if get_settings().pr_description.mention_extra_files:
                for i, file in enumerate(remaining_files_list):
                    extra_file_yaml = f"""\
 - filename: |
    {file}
  changes_summary: |
    ...
  changes_title: |
    ...
  label: |
    additional files (token-limit)
 """
                    files_walkthrough = files_walkthrough.strip() + "\n" + extra_file_yaml.strip()
                    if i >= MAX_EXTRA_FILES_TO_OUTPUT:
                        files_walkthrough += f"""\
 extra_file_yaml =
 - filename: |
    Additional {len(remaining_files_list) - MAX_EXTRA_FILES_TO_OUTPUT} files not shown
  changes_summary: |
    ...
  changes_title: |
    ...
  label: |
    additional files (token-limit)
 """
                        break
            # final processing
-            self.prediction = prediction_headers + "\n" + "pr_files:\n" + files_walkthrough
+            self.prediction = prediction_headers + "\n" + "pr_files:\n" + files_walkthrough_extended
            if not load_yaml(self.prediction, keys_fix_yaml=self.keys_fix):
                get_logger().error(f"Error getting valid YAML in large PR handling for describe {self.pr_id}")
                if load_yaml(prediction_headers, keys_fix_yaml=self.keys_fix):
                    get_logger().debug(f"Using only headers for describe {self.pr_id}")
                    self.prediction = prediction_headers
    async def extend_uncovered_files(self, original_prediction: str) -> str:
        try:
            prediction = original_prediction
            # get the original prediction filenames
            original_prediction_loaded = load_yaml(original_prediction, keys_fix_yaml=self.keys_fix)
            if isinstance(original_prediction_loaded, list):
                original_prediction_dict = {"pr_files": original_prediction_loaded}
            else:
                original_prediction_dict = original_prediction_loaded
            filenames_predicted = [file['filename'].strip() for file in original_prediction_dict.get('pr_files', [])]
            # extend the prediction with additional files not included in the original prediction
            pr_files = self.git_provider.get_diff_files()
            prediction_extra = "pr_files:"
            MAX_EXTRA_FILES_TO_OUTPUT = 100
            counter_extra_files = 0
            for file in pr_files:
                if file.filename in filenames_predicted:
                    continue
                # add up to MAX_EXTRA_FILES_TO_OUTPUT files
                counter_extra_files += 1
                if counter_extra_files > MAX_EXTRA_FILES_TO_OUTPUT:
                    extra_file_yaml = f"""\
 - filename: |
    Additional files not shown
  changes_title: |
    ...
  label: |
    additional files
 """
                    prediction_extra = prediction_extra + "\n" + extra_file_yaml.strip()
                    get_logger().debug(f"Too many remaining files, clipping to {MAX_EXTRA_FILES_TO_OUTPUT}")
                    break
                extra_file_yaml = f"""\
 - filename: |
    {file.filename}
  changes_title: |
    ...
  label: |
    additional files
 """
                prediction_extra = prediction_extra + "\n" + extra_file_yaml.strip()
            # merge the two dictionaries
            if counter_extra_files > 0:
                get_logger().info(f"Adding {counter_extra_files} unprocessed extra files to table prediction")
                prediction_extra_dict = load_yaml(prediction_extra, keys_fix_yaml=self.keys_fix)
                if isinstance(original_prediction_dict, dict) and isinstance(prediction_extra_dict, dict):
                    original_prediction_dict["pr_files"].extend(prediction_extra_dict["pr_files"])
                    new_yaml = yaml.dump(original_prediction_dict)
                    if load_yaml(new_yaml, keys_fix_yaml=self.keys_fix):
                        prediction = new_yaml
                if isinstance(original_prediction, list):
                    prediction = yaml.dump(original_prediction_dict["pr_files"])
            return prediction
        except Exception as e:
            get_logger().error(f"Error extending uncovered files {self.pr_id}: {e}")
            return original_prediction
    async def extend_additional_files(self, remaining_files_list) -> str:
        prediction = self.prediction
        try:
@ -397,31 +442,31 @@ extra_file_yaml =
            self.data['pr_files'] = self.data.pop('pr_files')
    def _prepare_labels(self) -> List[str]:
-        pr_types = []
+        pr_labels = []
        # If the 'PR Type' key is present in the dictionary, split its value by comma and assign it to 'pr_types'
-        if 'labels' in self.data:
+        if 'labels' in self.data and self.data['labels']:
            if type(self.data['labels']) == list:
-                pr_types = self.data['labels']
+                pr_labels = self.data['labels']
            elif type(self.data['labels']) == str:
-                pr_types = self.data['labels'].split(',')
+                pr_labels = self.data['labels'].split(',')
-        elif 'type' in self.data:
+        elif 'type' in self.data and self.data['type'] and get_settings().pr_description.publish_labels:
            if type(self.data['type']) == list:
-                pr_types = self.data['type']
+                pr_labels = self.data['type']
            elif type(self.data['type']) == str:
-                pr_types = self.data['type'].split(',')
+                pr_labels = self.data['type'].split(',')
-        pr_types = [label.strip() for label in pr_types]
+        pr_labels = [label.strip() for label in pr_labels]
        # convert lowercase labels to original case
        try:
            if "labels_minimal_to_labels_dict" in self.variables:
                d: dict = self.variables["labels_minimal_to_labels_dict"]
-                for i, label_i in enumerate(pr_types):
+                for i, label_i in enumerate(pr_labels):
                    if label_i in d:
-                        pr_types[i] = d[label_i]
+                        pr_labels[i] = d[label_i]
        except Exception as e:
            get_logger().error(f"Error converting labels to original case {self.pr_id}: {e}")
-        return pr_types
+        return pr_labels
    def _prepare_pr_answer_with_markers(self) -> Tuple[str, str, str, List[dict]]:
        get_logger().info(f"Using description marker replacements {self.pr_id}")
@ -528,14 +573,18 @@ extra_file_yaml =
            return file_label_dict
        for file in self.data['pr_files']:
            try:
-                required_fields = ['changes_summary', 'changes_title', 'filename', 'label']
+                required_fields = ['changes_title', 'filename', 'label']
                if not all(field in file for field in required_fields):
                    # can happen for example if a YAML generation was interrupted in the middle (no more tokens)
                    get_logger().warning(f"Missing required fields in file label dict {self.pr_id}, skipping file",
                                         artifact={"file": file})
                    continue
                if not file.get('changes_title'):
                    get_logger().warning(f"Empty changes title or summary in file label dict {self.pr_id}, skipping file",
                                         artifact={"file": file})
                    continue
                filename = file['filename'].replace("'", "`").replace('"', '`')
-                changes_summary = file['changes_summary']
+                changes_summary = file.get('changes_summary', "").strip()
                changes_title = file['changes_title'].strip()
                label = file.get('label').strip().lower()
                if label not in file_label_dict:
@ -578,12 +627,14 @@ extra_file_yaml =
                for filename, file_changes_title, file_change_description in list_tuples:
                    filename = filename.replace("'", "`").rstrip()
                    filename_publish = filename.split("/")[-1]
-
+                    if file_changes_title and file_changes_title.strip() != "...":
                        file_changes_title_code = f"<code>{file_changes_title}</code>"
                        file_changes_title_code_br = insert_br_after_x_chars(file_changes_title_code, x=(delta - 5)).strip()
                        if len(file_changes_title_code_br) < (delta - 5):
                            file_changes_title_code_br += "&nbsp; " * ((delta - 5) - len(file_changes_title_code_br))
                        filename_publish = f"<strong>{filename_publish}</strong><dd>{file_changes_title_code_br}</dd>"
                    else:
                        filename_publish = f"<strong>{filename_publish}</strong>"
                    diff_plus_minus = ""
                    delta_nbsp = ""
                    diff_files = self.git_provider.get_diff_files()
@ -592,6 +643,8 @@ extra_file_yaml =
                            num_plus_lines = f.num_plus_lines
                            num_minus_lines = f.num_minus_lines
                            diff_plus_minus += f"+{num_plus_lines}/-{num_minus_lines}"
                            if len(diff_plus_minus) > 12 or diff_plus_minus == "+0/-0":
                                diff_plus_minus = "[link]"
                            delta_nbsp = "&nbsp; " * max(0, (8 - len(diff_plus_minus)))
                            break
@ -600,8 +653,39 @@ extra_file_yaml =
                    if hasattr(self.git_provider, 'get_line_link'):
                        filename = filename.strip()
                        link = self.git_provider.get_line_link(filename, relevant_line_start=-1)
                    if (not link or not diff_plus_minus) and ('additional files' not in filename.lower()):
                        get_logger().warning(f"Error getting line link for '{filename}'")
                        continue
                    # Add file data to the PR body
                    file_change_description_br = insert_br_after_x_chars(file_change_description, x=(delta - 5))
                    pr_body = self.add_file_data(delta_nbsp, diff_plus_minus, file_change_description_br, filename,
                                                 filename_publish, link, pr_body)
                # Close the collapsible file list
                if use_collapsible_file_list:
                    pr_body += """</table></details></td></tr>"""
                else:
                    pr_body += """</table></td></tr>"""
            pr_body += """</tr></tbody></table>"""
        except Exception as e:
            get_logger().error(f"Error processing pr files to markdown {self.pr_id}: {str(e)}")
            pass
        return pr_body, pr_comments
    def add_file_data(self, delta_nbsp, diff_plus_minus, file_change_description_br, filename, filename_publish, link,
                      pr_body) -> str:
        if not file_change_description_br:
            pr_body += f"""
 <tr>
  <td>{filename_publish}</td>
  <td><a href="{link}">{diff_plus_minus}</a>{delta_nbsp}</td>
 </tr>
 """
        else:
            pr_body += f"""
 <tr>
  <td>
@ -622,17 +706,7 @@ extra_file_yaml =
 </tr>
 """
-                if use_collapsible_file_list:
+        return pr_body
                    pr_body += """</table></details></td></tr>"""
                else:
                    pr_body += """</table></td></tr>"""
            pr_body += """</tr></tbody></table>"""
        except Exception as e:
            get_logger().error(f"Error processing pr files to markdown {self.pr_id}: {e}")
            pass
        return pr_body, pr_comments
 def count_chars_without_html(string):
    if '<' not in string:
@ -641,11 +715,14 @@ def count_chars_without_html(string):
    return len(no_html_string)
-def insert_br_after_x_chars(text, x=70):
+def insert_br_after_x_chars(text: str, x=70):
    """
    Insert <br> into a string after a word that increases its length above x characters.
    Use proper HTML tags for code and new lines.
    """
    if not text:
        return ""
    if count_chars_without_html(text) < x:
        return text
--- a/pr_agent/tools/pr_line_questions.py
+++ b/pr_agent/tools/pr_line_questions.py
@ -79,13 +79,17 @@ class PR_LineQuestions:
                                                                                               line_end=line_end,
                                                                                               side=side)
        if self.patch_with_lines:
-            response = await retry_with_fallback_models(self._get_prediction, model_type=ModelType.WEAK)
+            model_answer = await retry_with_fallback_models(self._get_prediction, model_type=ModelType.WEAK)
            # sanitize the answer so that no line will start with "/"
            model_answer_sanitized = model_answer.strip().replace("\n/", "\n /")
            if model_answer_sanitized.startswith("/"):
                model_answer_sanitized = " " + model_answer_sanitized
            get_logger().info('Preparing answer...')
            if comment_id:
-                self.git_provider.reply_to_comment_from_comment_id(comment_id, response)
+                self.git_provider.reply_to_comment_from_comment_id(comment_id, model_answer_sanitized)
            else:
-                self.git_provider.publish_comment(response)
+                self.git_provider.publish_comment(model_answer_sanitized)
        return ""
--- a/pr_agent/tools/pr_questions.py
+++ b/pr_agent/tools/pr_questions.py
@ -117,6 +117,16 @@ class PRQuestions:
        return response
    def _prepare_pr_answer(self) -> str:
        model_answer = self.prediction.strip()
        # sanitize the answer so that no line will start with "/"
        model_answer_sanitized = model_answer.replace("\n/", "\n /")
        if model_answer_sanitized.startswith("/"):
            model_answer_sanitized = " " + model_answer_sanitized
        if model_answer_sanitized != model_answer:
            get_logger().debug(f"Sanitized model answer",
                               artifact={"model_answer": model_answer, "sanitized_answer": model_answer_sanitized})
        answer_str = f"### **Ask**❓\n{self.question_str}\n\n"
-        answer_str += f"### **Answer:**\n{self.prediction.strip()}\n\n"
+        answer_str += f"### **Answer:**\n{model_answer_sanitized}\n\n"
        return answer_str
Author	SHA1	Message	Date
Tal	8218fa6e13	Merge pull request #1421 from dceoy/main Update Groq models and tokens	2024-12-30 19:25:23 +02:00
mrT23	8463c4f549	fix: sanitize Ask tool answers to prevent markdown formatting issues with leading slashes	2024-12-30 16:54:03 +02:00
Tal	014b1f20c5	Merge pull request #1426 from Codium-ai/tr/ask_fix fix: sanitize Ask tool answers to prevent markdown formatting issues	2024-12-30 15:12:28 +02:00
mrT23	2f73ab6eab	fix: sanitize Ask tool answers to prevent markdown formatting issues with leading slashes	2024-12-30 15:06:27 +02:00
mrT23	16dc29a23a	fix: sanitize Ask tool answers to prevent markdown formatting issues	2024-12-30 14:58:53 +02:00
mrT23	bd9522057f	fix: add OpenAI configuration parameters to restricted CLI arguments	2024-12-30 14:33:26 +02:00
Tal	b3d4af6cbf	Merge pull request #1425 from Codium-ai/tr/limit_online_commenting fix: restrict sensitive configuration parameters in CLI arguments	2024-12-30 14:11:28 +02:00
mrT23	5df9698bae	fix: restrict sensitive configuration parameters in CLI arguments	2024-12-30 13:57:55 +02:00
dceoy	e89b65ed38	Increase the max token of groq/llama-3.3-70b-versatile	2024-12-30 11:35:02 +09:00
mrT23	6a145af159	fix: make semantic file types extension optional in PR description	2024-12-29 21:43:46 +02:00
Tal	39a375b3e4	Merge pull request #1423 from Codium-ai/tr/describe_v2 Tr/describe v2	2024-12-29 16:02:03 +02:00
mrT23	dbd76ecde5	refactor: improve file changes title description length guidance	2024-12-29 11:42:05 +02:00
mrT23	e95920c58c	refactor: improve file handling and description generation in PR description tool	2024-12-29 11:37:05 +02:00
mrT23	59899f0c62	fix: improve patch generation error handling and logging	2024-12-29 11:27:53 +02:00
mrT23	5e46955d52	fix: improve file path formatting in patch output	2024-12-29 11:26:13 +02:00
mrT23	95d0fafa75	refactor: optimize file content loading and improve rate limit handling	2024-12-29 11:25:33 +02:00
dceoy	71c558d306	Update Groq models and tokens	2024-12-28 01:51:33 +09:00
Tal	7b2c41e0d2	Merge pull request #1420 from Codium-ai/tr/review_fix fix: improve line extraction from files with missing content	2024-12-27 09:02:41 +02:00
mrT23	4aad67b563	fix: improve line extraction from files with missing content	2024-12-27 09:00:20 +02:00