Merge branch 'main' into nocode_suggestions_config

2025-07-21 04:50:39 +08:00 · 2024-11-04 07:50:22 +02:00
parent 443d06df06 16029e66ad
commit fa49dd5167
9 changed files with 165 additions and 112 deletions
--- a/README.md
+++ b/README.md
@ -43,6 +43,13 @@ Qode Merge PR-Agent aims to help efficiently review and handle pull requests, by
  
 ## News and Updates

+### November 3, 2024
+
+Meaningful improvement to the quality of code suggestions by separating the code suggestion generation from [line number detection](https://github.com/Codium-ai/pr-agent/pull/1338)
+
+<kbd>![image](https://github.com/user-attachments/assets/093c185c-31ca-47a1-a4fe-be7d9335ea66)</kbd>
+
+
 ### October 27, 2024

 Qodo Merge PR Agent will now automatically document accepted code suggestions in a dedicated wiki page (`.pr_agent_accepted_suggestions`), enabling users to track historical changes, assess the tool's effectiveness, and learn from previously implemented recommendations in the repository.
--- a/docs/docs/core-abilities/self_reflection.md
+++ b/docs/docs/core-abilities/self_reflection.md
@ -46,6 +46,5 @@ This results in a more refined and valuable set of suggestions for the user, sav
 ## Appendix - Relevant Configuration Options
 ```
 [pr_code_suggestions]
-self_reflect_on_suggestions	= true # Enable self-reflection on code suggestions
 suggestions_score_threshold	= 0 # Filter out suggestions with a score below this threshold (0-10)
 ```
--- a/docs/docs/tools/improve.md
+++ b/docs/docs/tools/improve.md
@ -279,10 +279,6 @@ Using a combination of both can help the AI model to provide relevant and tailor
        <td><b>persistent_comment</b></td>
        <td>If set to true, the improve comment will be persistent, meaning that every new improve request will edit the previous one. Default is false.</td>
      </tr>
-      <tr>
-        <td><b>self_reflect_on_suggestions</b></td>
-        <td>If set to true, the improve tool will calculate an importance score for each suggestion [1-10], and sort the suggestion labels group based on this score. Default is true.</td>
-      </tr>
      <tr>
        <td><b>suggestions_score_threshold</b></td>
        <td> Any suggestion with importance score less than this threshold will be removed. Default is 0. Highly recommend not to set this value above 7-8, since above it may clip relevant suggestions that can be useful. </td>
--- a/docs/docs/usage-guide/additional_configurations.md
+++ b/docs/docs/usage-guide/additional_configurations.md
@ -160,3 +160,13 @@ ignore_pr_target_branches = ["qa"]

 Where the `ignore_pr_source_branches` and `ignore_pr_target_branches` are lists of regex patterns to match the source and target branches you want to ignore.
 They are not mutually exclusive, you can use them together or separately.
+
+
+To allow only specific folders (often needed in large monorepos), set:
+
+```
+[config]
+allow_only_specific_folders=['folder1','folder2']
+```
+
+For the configuration above, automatic feedback will only be triggered when the PR changes include files from 'folder1' or 'folder2'
--- a/pr_agent/algo/ai_handlers/openai_ai_handler.py
+++ b/pr_agent/algo/ai_handlers/openai_ai_handler.py
@ -1,6 +1,7 @@
+from os import environ
 from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
 import openai
-from openai.error import APIError, RateLimitError, Timeout, TryAgain
+from openai import APIError, AsyncOpenAI, RateLimitError, Timeout
 from retry import retry

 from pr_agent.config_loader import get_settings
@ -14,7 +15,7 @@ class OpenAIHandler(BaseAiHandler):
        # Initialize OpenAIHandler specific attributes here
        try:
            super().__init__()
-            openai.api_key = get_settings().openai.key
+            environ["OPENAI_API_KEY"] = get_settings().openai.key
            if get_settings().get("OPENAI.ORG", None):
                openai.organization = get_settings().openai.org
            if get_settings().get("OPENAI.API_TYPE", None):
@ -24,7 +25,7 @@ class OpenAIHandler(BaseAiHandler):
            if get_settings().get("OPENAI.API_VERSION", None):
                openai.api_version = get_settings().openai.api_version
            if get_settings().get("OPENAI.API_BASE", None):
-                openai.api_base = get_settings().openai.api_base
+                environ["OPENAI_BASE_URL"] = get_settings().openai.api_base

        except AttributeError as e:
            raise ValueError("OpenAI key is required") from e
@ -36,7 +37,7 @@ class OpenAIHandler(BaseAiHandler):
        """
        return get_settings().get("OPENAI.DEPLOYMENT_ID", None)

-    @retry(exceptions=(APIError, Timeout, TryAgain, AttributeError, RateLimitError),
+    @retry(exceptions=(APIError, Timeout, AttributeError, RateLimitError),
           tries=OPENAI_RETRIES, delay=2, backoff=2, jitter=(1, 3))
    async def chat_completion(self, model: str, system: str, user: str, temperature: float = 0.2):
        try:
@ -44,20 +45,19 @@ class OpenAIHandler(BaseAiHandler):
            get_logger().info("System: ", system)
            get_logger().info("User: ", user)
            messages = [{"role": "system", "content": system}, {"role": "user", "content": user}]
-
-            chat_completion = await openai.ChatCompletion.acreate(
+            client = AsyncOpenAI()
+            chat_completion = await client.chat.completions.create(
                model=model,
-                deployment_id=deployment_id,
                messages=messages,
                temperature=temperature,
            )
-            resp = chat_completion["choices"][0]['message']['content']
-            finish_reason = chat_completion["choices"][0]["finish_reason"]
-            usage = chat_completion.get("usage")
+            resp = chat_completion.choices[0].message.content
+            finish_reason = chat_completion.choices[0].finish_reason
+            usage = chat_completion.usage
            get_logger().info("AI response", response=resp, messages=messages, finish_reason=finish_reason,
                              model=model, usage=usage)
            return resp, finish_reason
-        except (APIError, Timeout, TryAgain) as e:
+        except (APIError, Timeout) as e:
            get_logger().error("Error during OpenAI inference: ", e)
            raise
        except (RateLimitError) as e:
@ -65,4 +65,4 @@ class OpenAIHandler(BaseAiHandler):
            raise
        except (Exception) as e:
            get_logger().error("Unknown error during OpenAI inference: ", e)
-            raise TryAgain from e
+            raise
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@ -122,7 +122,6 @@ max_history_len=4
 # enable to apply suggestion 💎
 apply_suggestions_checkbox=true
 # suggestions scoring
-self_reflect_on_suggestions=true
 suggestions_score_threshold=0 # [0-10]| recommend not to set this value above 8, since above it may clip highly relevant suggestions
 # params for '/improve --extended' mode
 auto_extended_mode=true
--- a/pr_agent/settings/pr_code_suggestions_prompts.toml
+++ b/pr_agent/settings/pr_code_suggestions_prompts.toml
@ -14,10 +14,10 @@ The PR code diff will be in the following structured format:

@@ ... @@ def func1():
 __new hunk__
-11  unchanged code line0 in the PR
-12  unchanged code line1 in the PR
-13 +new code line2 added in the PR
-14  unchanged code line3 in the PR
+ unchanged code line0 in the PR
+ unchanged code line1 in the PR
+new code line2 added in the PR
+ unchanged code line3 in the PR
 __old hunk__
 unchanged code line0
 unchanged code line1
@ -35,7 +35,6 @@ __new hunk__
 ======

 - In the format above, the diff is organized into separate '__new hunk__' and '__old hunk__' sections for each code chunk. '__new hunk__' contains the updated code, while '__old hunk__' shows the removed code. If no code was removed in a specific chunk, the __old hunk__ section will be omitted.
- Line numbers were added for the '__new hunk__' sections to help referencing specific lines in the code suggestions. These numbers are for reference only and are not part of the actual code.
 - Code lines are prefixed with symbols: '+' for new code added in the PR, '-' for code removed, and ' ' for unchanged code.
 {%- if is_ai_metadata %}
 - When available, an AI-generated summary will precede each file's diff, with a high-level overview of the changes. Note that this summary may not be fully accurate or complete.
@ -44,7 +43,7 @@ __new hunk__

 Specific guidelines for generating code suggestions:
 - Provide up to {{ num_code_suggestions }} distinct and insightful code suggestions.
- Focus solely on enhancing new code introduced in the PR, identified by '+' prefixes in '__new hunk__' sections (after the line numbers).
+- Focus solely on enhancing new code introduced in the PR, identified by '+' prefixes in '__new hunk__' sections.
 - Prioritize suggestions that address potential issues, critical problems, and bugs in the PR code. Avoid repeating changes already implemented in the PR. If no pertinent suggestions are applicable, return an empty list.
 - Don't suggest to add docstring, type hints, or comments, to remove unused imports, or to use more specific exception types.
 - When referencing variables or names from the code, enclose them in backticks (`). Example: "ensure that `variable_name` is..."
@ -67,12 +66,10 @@ class CodeSuggestion(BaseModel):
    relevant_file: str = Field(description="Full path of the relevant file")
    language: str = Field(description="Programming language used by the relevant file")
    suggestion_content: str = Field(description="An actionable suggestion to enhance, improve or fix the new code introduced in the PR. Don't present here actual code snippets, just the suggestion. Be short and concise")
-    existing_code: str = Field(description="A short code snippet from a '__new hunk__' section that the suggestion aims to enhance or fix. Include only complete code lines, without line numbers. Use ellipsis (...) for brevity if needed. This snippet should represent the specific PR code targeted for improvement.")
+    existing_code: str = Field(description="A short code snippet from a '__new hunk__' section that the suggestion aims to enhance or fix. Include only complete code lines. Use ellipsis (...) for brevity if needed. This snippet should represent the specific PR code targeted for improvement.")
    improved_code: str = Field(description="A refined code snippet that replaces the 'existing_code' snippet after implementing the suggestion.")
    one_sentence_summary: str = Field(description="A concise, single-sentence overview of the suggested improvement. Focus on the 'what'. Be general, and avoid method or variable names.")
-    relevant_lines_start: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion starts (inclusive). Should be derived from the hunk line numbers, and correspond to the beginning of the 'existing code' snippet above")
-    relevant_lines_end: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion ends (inclusive). Should be derived from the hunk line numbers, and correspond to the end of the 'existing code' snippet above")
-    label: str = Field(description="A single, descriptive label that best characterizes the suggestion type. Possible labels include 'security', 'possible bug', 'possible issue', 'performance', 'enhancement', 'best practice', 'maintainability'. Other relevant labels are also acceptable.")
+    label: str = Field(description="A single, descriptive label that best characterizes the suggestion type. Possible labels include 'security', 'possible bug', 'possible issue', 'performance', 'enhancement', 'best practice', 'maintainability', 'typo'. Other relevant labels are also acceptable.")


 class PRCodeSuggestions(BaseModel):
@ -95,8 +92,6 @@ code_suggestions:
    ...
  one_sentence_summary: |
    ...
-  relevant_lines_start: 12
-  relevant_lines_end: 13
  label: |
    ...
 ```
@ -112,7 +107,7 @@ Title: '{{title}}'

 The PR Diff:
 ======
-{{ diff|trim }}
+{{ diff_no_line_numbers|trim }}
 ======


--- a/pr_agent/settings/pr_code_suggestions_reflect_prompts.toml
+++ b/pr_agent/settings/pr_code_suggestions_reflect_prompts.toml
@ -15,8 +15,8 @@ Be particularly vigilant for suggestions that:
    - Contradict or ignore parts of the PR's modifications
 In such cases, assign the suggestion a score of 0.

-For valid suggestions, your role is to provide an impartial and precise score assessment that accurately reflects each suggestion's potential impact on the PR's correctness, quality and functionality.
-
+Evaluate each valid suggestion by scoring its potential impact on the PR's correctness, quality and functionality.
+In addition, you should also detect the line numbers in the '__new hunk__' section that correspond to the 'existing_code' snippet.

 Key guidelines for evaluation:
 - Thoroughly examine both the suggestion content and the corresponding PR code diff. Be vigilant for potential errors in each suggestion, ensuring they are logically sound, accurate, and directly derived from the PR code diff.
@ -82,6 +82,8 @@ The output must be a YAML object equivalent to type $PRCodeSuggestionsFeedback,
 class CodeSuggestionFeedback(BaseModel):
    suggestion_summary: str = Field(description="Repeated from the input")
    relevant_file: str = Field(description="Repeated from the input")
+    relevant_lines_start: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion starts (inclusive). Should be derived from the hunk line numbers, and correspond to the beginning of the relevant 'existing code' snippet")
+    relevant_lines_end: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion ends (inclusive). Should be derived from the hunk line numbers, and correspond to the end of the relevant 'existing code' snippet")
    suggestion_score: int = Field(description="Evaluate the suggestion and assign a score from 0 to 10. Give 0 if the suggestion is wrong. For valid suggestions, score from 1 (lowest impact/importance) to 10 (highest impact/importance).")
    why: str = Field(description="Briefly explain the score given in 1-2 sentences, focusing on the suggestion's impact, relevance, and accuracy.")

@ -96,6 +98,8 @@ code_suggestions:
 - suggestion_summary: |
    Use a more descriptive variable name here
  relevant_file: "src/file1.py"
+  relevant_lines_start: 13
+  relevant_lines_end: 14
  suggestion_score: 6
  why: |
    The variable name 't' is not descriptive enough
--- a/pr_agent/tools/pr_code_suggestions.py
+++ b/pr_agent/tools/pr_code_suggestions.py
@ -1,6 +1,7 @@
 import asyncio
 import copy
 import textwrap
+import traceback
 from functools import partial
 from typing import Dict, List
 from jinja2 import Environment, StrictUndefined
@ -44,7 +45,7 @@ class PRCodeSuggestions:
            self.is_extended = self._get_is_extended(args or [])
        except:
            self.is_extended = False
-        num_code_suggestions = get_settings().pr_code_suggestions.num_code_suggestions_per_chunk
+        num_code_suggestions = int(get_settings().pr_code_suggestions.num_code_suggestions_per_chunk)


        self.ai_handler = ai_handler()
@ -69,6 +70,7 @@ class PRCodeSuggestions:
            "description": self.pr_description,
            "language": self.main_language,
            "diff": "",  # empty diff for initial calculation
+            "diff_no_line_numbers": "",  # empty diff for initial calculation
            "num_code_suggestions": num_code_suggestions,
            "extra_instructions": get_settings().pr_code_suggestions.extra_instructions,
            "commit_messages_str": self.git_provider.get_commit_messages(),
@ -110,18 +112,17 @@ class PRCodeSuggestions:
            if not data:
                data = {"code_suggestions": []}

-            if (data is None or 'code_suggestions' not in data or not data['code_suggestions']
-                    and get_settings().config.publish_output):
-                get_logger().warning('No code suggestions found for the PR.')
-
-                if (get_settings().config.publish_output_no_suggestions):
+            if (data is None or 'code_suggestions' not in data or not data['code_suggestions']):
                pr_body = "## PR Code Suggestions ✨\n\nNo code suggestions found for the PR."
+                get_logger().warning('No code suggestions found for the PR.')
+                if get_settings().config.publish_output and get_settings().config.publish_output_no_suggestions:
                    get_logger().debug(f"PR output", artifact=pr_body)
                    if self.progress_response:
                        self.git_provider.edit_comment(self.progress_response, body=pr_body)
                    else:
                        self.git_provider.publish_comment(pr_body)
-
+                else:
+                    get_settings().data = {"artifact": ""}
                return

            if (not self.is_extended and get_settings().pr_code_suggestions.rank_suggestions) or \
@ -198,8 +199,11 @@ class PRCodeSuggestions:
                        self.git_provider.remove_comment(self.progress_response)
            else:
                get_logger().info('Code suggestions generated for PR, but not published since publish_output is False.')
+                get_settings().data = {"artifact": data}
+                return
        except Exception as e:
-            get_logger().error(f"Failed to generate code suggestions for PR, error: {e}")
+            get_logger().error(f"Failed to generate code suggestions for PR, error: {e}",
+                               artifact={"traceback": traceback.format_exc()})
            if get_settings().config.publish_output:
                if self.progress_response:
                    self.progress_response.delete()
@ -331,7 +335,7 @@ class PRCodeSuggestions:

        if self.patches_diff:
            get_logger().debug(f"PR diff", artifact=self.patches_diff)
-            self.prediction = await self._get_prediction(model, self.patches_diff)
+            self.prediction = await self._get_prediction(model, self.patches_diff, self.patches_diff_no_line_number)
        else:
            get_logger().warning(f"Empty PR diff")
            self.prediction = None
@ -339,23 +343,26 @@ class PRCodeSuggestions:
        data = self.prediction
        return data

-    async def _get_prediction(self, model: str, patches_diff: str) -> dict:
+    async def _get_prediction(self, model: str, patches_diff: str, patches_diff_no_line_number: str) -> dict:
        variables = copy.deepcopy(self.vars)
        variables["diff"] = patches_diff  # update diff
+        variables["diff_no_line_numbers"] = patches_diff_no_line_number  # update diff
        environment = Environment(undefined=StrictUndefined)
        system_prompt = environment.from_string(self.pr_code_suggestions_prompt_system).render(variables)
        user_prompt = environment.from_string(get_settings().pr_code_suggestions_prompt.user).render(variables)
        response, finish_reason = await self.ai_handler.chat_completion(
            model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt)
+        if not get_settings().config.publish_output:
+            get_settings().system_prompt = system_prompt
+            get_settings().user_prompt = user_prompt

        # load suggestions from the AI response
        data = self._prepare_pr_code_suggestions(response)

-        # self-reflect on suggestions
-        if get_settings().pr_code_suggestions.self_reflect_on_suggestions:
-            model_turbo = get_settings().config.model_turbo  # use turbo model for self-reflection, since it is an easier task
+        # self-reflect on suggestions (mandatory, since line numbers are generated now here)
+        model_reflection = get_settings().config.model
        response_reflect = await self.self_reflect_on_suggestions(data["code_suggestions"],
-                                                                      patches_diff, model=model_turbo)
+                                                                  patches_diff, model=model_reflection)
        if response_reflect:
            response_reflect_yaml = load_yaml(response_reflect)
            code_suggestions_feedback = response_reflect_yaml["code_suggestions"]
@ -364,6 +371,25 @@ class PRCodeSuggestions:
                    try:
                        suggestion["score"] = code_suggestions_feedback[i]["suggestion_score"]
                        suggestion["score_why"] = code_suggestions_feedback[i]["why"]
+
+                        if 'relevant_lines_start' not in suggestion:
+                            relevant_lines_start = code_suggestions_feedback[i].get('relevant_lines_start', -1)
+                            relevant_lines_end = code_suggestions_feedback[i].get('relevant_lines_end', -1)
+                            suggestion['relevant_lines_start'] = relevant_lines_start
+                            suggestion['relevant_lines_end'] = relevant_lines_end
+                            if relevant_lines_start < 0 or relevant_lines_end < 0:
+                                suggestion["score"] = 0
+
+                        try:
+                            if get_settings().config.publish_output:
+                                suggestion_statistics_dict = {'score': int(suggestion["score"]),
+                                                              'label': suggestion["label"].lower().strip()}
+                                get_logger().info(f"PR-Agent suggestions statistics",
+                                                  statistics=suggestion_statistics_dict, analytics=True)
+                        except Exception as e:
+                            get_logger().error(f"Failed to log suggestion statistics, error: {e}")
+                            pass
+
                    except Exception as e:  #
                        get_logger().error(f"Error processing suggestion score {i}",
                                           artifact={"suggestion": suggestion,
@ -396,10 +422,10 @@ class PRCodeSuggestions:
        suggestion_truncation_message = get_settings().get("PR_CODE_SUGGESTIONS.SUGGESTION_TRUNCATION_MESSAGE", "")
        if max_code_suggestion_length > 0:
            if len(suggestion['improved_code']) > max_code_suggestion_length:
-                suggestion['improved_code'] = suggestion['improved_code'][:max_code_suggestion_length]
-                suggestion['improved_code'] += f"\n{suggestion_truncation_message}"
                get_logger().info(f"Truncated suggestion from {len(suggestion['improved_code'])} "
                                  f"characters to {max_code_suggestion_length} characters")
+                suggestion['improved_code'] = suggestion['improved_code'][:max_code_suggestion_length]
+                suggestion['improved_code'] += f"\n{suggestion_truncation_message}"
        return suggestion

    def _prepare_pr_code_suggestions(self, predictions: str) -> Dict:
@ -414,8 +440,7 @@ class PRCodeSuggestions:
        one_sentence_summary_list = []
        for i, suggestion in enumerate(data['code_suggestions']):
            try:
-                needed_keys = ['one_sentence_summary', 'label', 'relevant_file', 'relevant_lines_start',
-                               'relevant_lines_end']
+                needed_keys = ['one_sentence_summary', 'label', 'relevant_file']
                is_valid_keys = True
                for key in needed_keys:
                    if key not in suggestion:
@ -539,9 +564,33 @@ class PRCodeSuggestions:
            return True
        return False

+    def remove_line_numbers(self, patches_diff_list: List[str]) -> List[str]:
+        # create a copy of the patches_diff_list, without line numbers for '__new hunk__' sections
+        try:
+            self.patches_diff_list_no_line_numbers = []
+            for patches_diff in self.patches_diff_list:
+                patches_diff_lines = patches_diff.splitlines()
+                for i, line in enumerate(patches_diff_lines):
+                    if line.strip():
+                        if line[0].isdigit():
+                            # find the first letter in the line that starts with a valid letter
+                            for j, char in enumerate(line):
+                                if not char.isdigit():
+                                    patches_diff_lines[i] = line[j + 1:]
+                                    break
+                self.patches_diff_list_no_line_numbers.append('\n'.join(patches_diff_lines))
+            return self.patches_diff_list_no_line_numbers
+        except Exception as e:
+            get_logger().error(f"Error removing line numbers from patches_diff_list, error: {e}")
+            return patches_diff_list
+
    async def _prepare_prediction_extended(self, model: str) -> dict:
        self.patches_diff_list = get_pr_multi_diffs(self.git_provider, self.token_handler, model,
                                                    max_calls=get_settings().pr_code_suggestions.max_number_of_calls)
+
+        # create a copy of the patches_diff_list, without line numbers for '__new hunk__' sections
+        self.patches_diff_list_no_line_numbers = self.remove_line_numbers(self.patches_diff_list)
+
        if self.patches_diff_list:
            get_logger().info(f"Number of PR chunk calls: {len(self.patches_diff_list)}")
            get_logger().debug(f"PR diff:", artifact=self.patches_diff_list)
@ -549,12 +598,14 @@ class PRCodeSuggestions:
            # parallelize calls to AI:
            if get_settings().pr_code_suggestions.parallel_calls:
                prediction_list = await asyncio.gather(
-                    *[self._get_prediction(model, patches_diff) for patches_diff in self.patches_diff_list])
+                    *[self._get_prediction(model, patches_diff, patches_diff_no_line_numbers) for
+                      patches_diff, patches_diff_no_line_numbers in
+                      zip(self.patches_diff_list, self.patches_diff_list_no_line_numbers)])
                self.prediction_list = prediction_list
            else:
                prediction_list = []
-                for i, patches_diff in enumerate(self.patches_diff_list):
-                    prediction = await self._get_prediction(model, patches_diff)
+                for patches_diff, patches_diff_no_line_numbers in zip(self.patches_diff_list, self.patches_diff_list_no_line_numbers):
+                    prediction = await self._get_prediction(model, patches_diff, patches_diff_no_line_numbers)
                    prediction_list.append(prediction)

            data = {"code_suggestions": []}
@ -563,7 +614,6 @@ class PRCodeSuggestions:
                    score_threshold = max(1, int(get_settings().pr_code_suggestions.suggestions_score_threshold))
                    for i, prediction in enumerate(predictions["code_suggestions"]):
                        try:
-                            if get_settings().pr_code_suggestions.self_reflect_on_suggestions:
                            score = int(prediction.get("score", 1))
                            if score >= score_threshold:
                                data["code_suggestions"].append(prediction)
@ -571,10 +621,9 @@ class PRCodeSuggestions:
                                get_logger().info(
                                    f"Removing suggestions {i} from call {j}, because score is {score}, and score_threshold is {score_threshold}",
                                    artifact=prediction)
-                            else:
-                                data["code_suggestions"].append(prediction)
                        except Exception as e:
-                            get_logger().error(f"Error getting PR diff for suggestion {i} in call {j}, error: {e}")
+                            get_logger().error(f"Error getting PR diff for suggestion {i} in call {j}, error: {e}",
+                                               artifact={"prediction": prediction})
            self.data = data
        else:
            get_logger().warning(f"Empty PR diff list")
@ -625,7 +674,7 @@ class PRCodeSuggestions:
            if get_settings().pr_code_suggestions.final_clip_factor != 1:
                max_len = max(
                    len(data_sorted),
-                    get_settings().pr_code_suggestions.num_code_suggestions_per_chunk,
+                    int(get_settings().pr_code_suggestions.num_code_suggestions_per_chunk),
                )
                new_len = int(0.5 + max_len * get_settings().pr_code_suggestions.final_clip_factor)
                if new_len < len(data_sorted):
@ -658,10 +707,7 @@ class PRCodeSuggestions:
            header = f"Suggestion"
            delta = 66
            header += "&nbsp; " * delta
-            if get_settings().pr_code_suggestions.self_reflect_on_suggestions:
            pr_body += f"""<thead><tr><td>Category</td><td align=left>{header}</td><td align=center>Score</td></tr>"""
-            else:
-                pr_body += f"""<thead><tr><td>Category</td><td align=left>{header}</td></tr>"""
            pr_body += """<tbody>"""
            suggestions_labels = dict()
            # add all suggestions related to each label
@ -672,7 +718,6 @@ class PRCodeSuggestions:
                suggestions_labels[label].append(suggestion)

            # sort suggestions_labels by the suggestion with the highest score
-            if get_settings().pr_code_suggestions.self_reflect_on_suggestions:
            suggestions_labels = dict(
                sorted(suggestions_labels.items(), key=lambda x: max([s['score'] for s in x[1]]), reverse=True))
            # sort the suggestions inside each label group by score
@ -736,7 +781,6 @@ class PRCodeSuggestions:

 {example_code.rstrip()}                   
 """
-                    if get_settings().pr_code_suggestions.self_reflect_on_suggestions:
                    pr_body += f"<details><summary>Suggestion importance[1-10]: {suggestion['score']}</summary>\n\n"
                    pr_body += f"Why: {suggestion['score_why']}\n\n"
                    pr_body += f"</details>"
@ -744,7 +788,6 @@ class PRCodeSuggestions:
                    pr_body += f"</details>"

                    # # add another column for 'score'
-                    if get_settings().pr_code_suggestions.self_reflect_on_suggestions:
                    pr_body += f"</td><td align=center>{suggestion['score']}\n\n"

                    pr_body += f"</td></tr>"