From f76f7507579612ff450ad57e34518f54f700c250 Mon Sep 17 00:00:00 2001
From: mrT23 <tal.r@codium.ai>
Date: Tue, 9 Jul 2024 07:49:30 +0300
Subject: [PATCH] best_practices

---
 pr_agent/settings/configuration.toml          |  4 ++
 .../settings/pr_code_suggestions_prompts.toml |  7 ++--
 .../pr_code_suggestions_reflect_prompts.toml  | 11 +++---
 pr_agent/tools/pr_code_suggestions.py         | 38 ++++++++++---------
 4 files changed, 35 insertions(+), 25 deletions(-)

diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml
index 4fc6ae5c..c718edde 100644
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@@ -276,3 +276,7 @@ number_of_results = 5
 
 [lancedb]
 uri = "./lancedb"
+
+[best_practices]
+content = ""
+max_lines_allowed = 800
\ No newline at end of file
diff --git a/pr_agent/settings/pr_code_suggestions_prompts.toml b/pr_agent/settings/pr_code_suggestions_prompts.toml
index eebd9d27..e7fbca6e 100644
--- a/pr_agent/settings/pr_code_suggestions_prompts.toml
+++ b/pr_agent/settings/pr_code_suggestions_prompts.toml
@@ -34,7 +34,7 @@ Suggestions should always focus on ways to improve the new code lines introduced
 
 
 Specific instructions for generating code suggestions:
-- Provide up to {{ num_code_suggestions }} code suggestions. The suggestions should be diverse and insightful.
+- Provide in total up to {{ num_code_suggestions }} code suggestions. The suggestions should be diverse and insightful.
 - The suggestions should focus on improving the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers).
 - Prioritize suggestions that address possible issues, major problems, and bugs in the PR code.
 - Don't suggest to add docstring, type hints, or comments, or to remove unused imports.
@@ -149,7 +149,7 @@ Suggestions should always focus on ways to improve the new code lines introduced
 
 
 Specific instructions for generating code suggestions:
-- Provide up to {{ num_code_suggestions }} code suggestions. The suggestions should be diverse and insightful.
+- Provide in total up to {{ num_code_suggestions }} code suggestions. The suggestions should be diverse and insightful.
 - The suggestions should focus on improving the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers).
 - Prioritize suggestions that address possible issues, major problems, and bugs in the PR code.
 - Don't suggest to add docstring, type hints, or comments, or to remove unused imports.
@@ -179,7 +179,8 @@ class CodeSuggestion(BaseModel):
     one_sentence_summary: str = Field(description="a short summary of the suggestion action, in a single sentence. Focus on the 'what'. Be general, and avoid method or variable names.")
     relevant_lines_start: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion starts (inclusive). Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above")
     relevant_lines_end: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion ends (inclusive). Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above")
-    label: str = Field(description="a single label for the suggestion, to help understand the suggestion type. For example: 'security', 'possible bug', 'possible issue', 'performance', 'enhancement', 'best practice', 'maintainability', etc. Other labels are also allowed")
+    label: str = Field(description="a single label for the suggestion, to help the user understand the suggestion type. For example: 'security', 'possible bug', 'possible issue', 'performance', 'enhancement', 'best practice', 'maintainability', etc. Other labels are also allowed")
+
 
 class PRCodeSuggestions(BaseModel):
     code_suggestions: List[CodeSuggestion]
diff --git a/pr_agent/settings/pr_code_suggestions_reflect_prompts.toml b/pr_agent/settings/pr_code_suggestions_reflect_prompts.toml
index 0fbdb862..265116dd 100644
--- a/pr_agent/settings/pr_code_suggestions_reflect_prompts.toml
+++ b/pr_agent/settings/pr_code_suggestions_reflect_prompts.toml
@@ -6,10 +6,10 @@ Your goal is to inspect, review and score the suggestsions.
 Be aware - the suggestions may not always be correct or accurate, and you should evaluate them in relation to the actual PR code diff presented. Sometimes the suggestion may ignore parts of the actual code diff, and in that case, you should give it a score of 0.
 
 Specific instructions:
-- Carefully review both the suggestion content, and the related PR code diff. Mistakes in the suggestions can occur. Make sure the suggestions are correct, and properly derived from the PR code diff.
+- Carefully review both the suggestion content, and the related PR code diff. Mistakes in the suggestions can occur. Make sure the suggestions are logical and correct, and properly derived from the PR code diff.
 - In addition to the exact code lines mentioned in each suggestion, review the code around them, to ensure that the suggestions are contextually accurate.
-- Also check that the 'existing_code' and 'improved_code' fields correctly reflect the suggested changes.
-- Make sure the suggestions focus on new code introduced in the PR, and not on existing code that was not changed.
+- Check that the 'existing_code' field is valid. The 'existing_code' content should match, or be derived, from code lines from a 'new hunk' section in the PR code diff.
+- Check that the 'improved_code' section correctly reflects the suggestion content.
 - High scores (8 to 10) should be given to correct suggestions that address major bugs and issues, or security concerns. Lower scores (3 to 7) should be for correct suggestions addressing minor issues, code style, code readability, maintainability, etc. Don't give high scores to suggestions that are not crucial, and bring only small improvement or optimization.
 - Order the feedback the same way the suggestions are ordered in the input.
 
@@ -39,8 +39,9 @@ __old hunk__
 ...
 ======
 - In this format, we separated each hunk of code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code that was removed.
+- We added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference.
 - Code lines are prefixed symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code.
-- We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference.
+
 
 
 The output must be a YAML object equivalent to type $PRCodeSuggestionsFeedback, according to the following Pydantic definitions:
@@ -48,7 +49,7 @@ The output must be a YAML object equivalent to type $PRCodeSuggestionsFeedback,
 class CodeSuggestionFeedback(BaseModel):
     suggestion_summary: str = Field(description="repeated from the input")
     relevant_file: str = Field(description="repeated from the input")
-    suggestion_score: int = Field(description="The actual output - the score of the suggestion, from 0 to 10. Give 0 if the suggestion is plain wrong. Otherwise, give a score from 1 to 10 (inclusive), where 1 is the lowest and 10 is the highest.")
+    suggestion_score: int = Field(description="The actual output - the score of the suggestion, from 0 to 10. Give 0 if the suggestion is wrong. Otherwise, give a score from 1 to 10 (inclusive), where 1 is the lowest and 10 is the highest.")
     why: str = Field(description="Short and concise explanation of why the suggestion received the score (one to two sentences).")
 
 class PRCodeSuggestionsFeedback(BaseModel):
diff --git a/pr_agent/tools/pr_code_suggestions.py b/pr_agent/tools/pr_code_suggestions.py
index 6e10b54f..c028b457 100644
--- a/pr_agent/tools/pr_code_suggestions.py
+++ b/pr_agent/tools/pr_code_suggestions.py
@@ -34,6 +34,7 @@ class PRCodeSuggestions:
             MAX_CONTEXT_TOKENS_IMPROVE = get_settings().pr_code_suggestions.max_context_tokens
             if get_settings().config.max_model_tokens > MAX_CONTEXT_TOKENS_IMPROVE:
                 get_logger().info(f"Setting max_model_tokens to {MAX_CONTEXT_TOKENS_IMPROVE} for PR improve")
+                get_settings().config.max_model_tokens_original = get_settings().config.max_model_tokens
                 get_settings().config.max_model_tokens = MAX_CONTEXT_TOKENS_IMPROVE
 
         # extended mode
@@ -60,6 +61,7 @@ class PRCodeSuggestions:
             "num_code_suggestions": num_code_suggestions,
             "extra_instructions": get_settings().pr_code_suggestions.extra_instructions,
             "commit_messages_str": self.git_provider.get_commit_messages(),
+            "relevant_best_practices": "",
         }
         if 'claude' in get_settings().config.model:
             # prompt for Claude, with minor adjustments
@@ -168,12 +170,12 @@ class PRCodeSuggestions:
                     pass
 
     def publish_persistent_comment_with_history(self, pr_comment: str,
-                                   initial_header: str,
-                                   update_header: bool = True,
-                                   name='review',
-                                   final_update_message=True,
-                                   max_previous_comments=4,
-                                   progress_response=None):
+                                                initial_header: str,
+                                                update_header: bool = True,
+                                                name='review',
+                                                final_update_message=True,
+                                                max_previous_comments=4,
+                                                progress_response=None):
         history_header = f"#### Previous suggestions\n"
         last_commit_num = self.git_provider.get_latest_commit_url().split('/')[-1][:7]
         latest_suggestion_header = f"Latest suggestions up to {last_commit_num}"
@@ -198,7 +200,8 @@ class PRCodeSuggestions:
                                 continue
                             # find http link from comment.body[:table_index]
                             up_to_commit_txt = self.extract_link(comment.body[:table_index])
-                            prev_suggestion_table = comment.body[table_index:comment.body.rfind("</table>") + len("</table>")]
+                            prev_suggestion_table = comment.body[
+                                                    table_index:comment.body.rfind("</table>") + len("</table>")]
 
                             tick = "✅ " if "✅" in prev_suggestion_table else ""
                             # surround with details tag
@@ -225,7 +228,8 @@ class PRCodeSuggestions:
                             count += prev_suggestions.count(f"\n<details><summary>✅ {name.capitalize()}")
                             if count >= max_previous_comments:
                                 # remove the oldest suggestion
-                                prev_suggestion_table = prev_suggestion_table[:prev_suggestion_table.rfind(f"<details><summary>{name.capitalize()} up to commit")]
+                                prev_suggestion_table = prev_suggestion_table[:prev_suggestion_table.rfind(
+                                    f"<details><summary>{name.capitalize()} up to commit")]
 
                             tick = "✅ " if "✅" in latest_table else ""
                             # Add to the prev_suggestions section
@@ -242,7 +246,7 @@ class PRCodeSuggestions:
                             pr_comment_updated += f"{prev_suggestion_table}\n"
 
                         get_logger().info(f"Persistent mode - updating comment {comment_url} to latest {name} message")
-                        if progress_response: # publish to 'progress_response' comment, because it refreshes immediately
+                        if progress_response:  # publish to 'progress_response' comment, because it refreshes immediately
                             self.git_provider.edit_comment(progress_response, pr_comment_updated)
                             comment.delete()
                         else:
@@ -300,9 +304,9 @@ class PRCodeSuggestions:
 
         # self-reflect on suggestions
         if get_settings().pr_code_suggestions.self_reflect_on_suggestions:
-            model = get_settings().config.model_turbo  # use turbo model for self-reflection, since it is an easier task
-            response_reflect = await self.self_reflect_on_suggestions(data["code_suggestions"], patches_diff,
-                                                                      model=model)
+            model_turbo = get_settings().config.model_turbo  # use turbo model for self-reflection, since it is an easier task
+            response_reflect = await self.self_reflect_on_suggestions(data["code_suggestions"],
+                                                                      patches_diff, model=model_turbo)
             if response_reflect:
                 response_reflect_yaml = load_yaml(response_reflect)
                 code_suggestions_feedback = response_reflect_yaml["code_suggestions"]
@@ -340,7 +344,7 @@ class PRCodeSuggestions:
     def _prepare_pr_code_suggestions(self, predictions: str) -> Dict:
         data = load_yaml(predictions.strip(),
                          keys_fix_yaml=["relevant_file", "suggestion_content", "existing_code", "improved_code"],
-                         first_key="code_suggestions",last_key="label")
+                         first_key="code_suggestions", last_key="label")
         if isinstance(data, list):
             data = {'code_suggestions': data}
 
@@ -458,7 +462,7 @@ class PRCodeSuggestions:
             get_logger().info("Extended mode is enabled by the `--extended` flag")
             return True
         if get_settings().pr_code_suggestions.auto_extended_mode:
-            get_logger().info("Extended mode is enabled automatically based on the configuration toggle")
+            # get_logger().info("Extended mode is enabled automatically based on the configuration toggle")
             return True
         return False
 
@@ -624,9 +628,9 @@ class PRCodeSuggestions:
                         code_snippet_link = ""
                     # add html table for each suggestion
 
-                    suggestion_content = suggestion['suggestion_content'].rstrip().rstrip()
-
-                    suggestion_content = insert_br_after_x_chars(suggestion_content, 90)
+                    suggestion_content = suggestion['suggestion_content'].rstrip()
+                    CHAR_LIMIT_PER_LINE = 84
+                    suggestion_content = insert_br_after_x_chars(suggestion_content, CHAR_LIMIT_PER_LINE)
                     # pr_body += f"<tr><td><details><summary>{suggestion_content}</summary>"
                     existing_code = suggestion['existing_code'].rstrip() + "\n"
                     improved_code = suggestion['improved_code'].rstrip() + "\n"