Add truncation and summarization features to PR code suggestions

2025-07-04 04:40:38 +08:00 · 2024-03-04 08:16:05 +02:00
parent 248c6b13be
commit eed23a7aaa
2 changed files with 37 additions and 9 deletions
--- a/pr_agent/settings/pr_code_suggestions_prompts.toml
+++ b/pr_agent/settings/pr_code_suggestions_prompts.toml
@ -40,12 +40,14 @@ Specific instructions:

 {%- if extra_instructions %}

+
 Extra instructions from the user:
 ======
 {{ extra_instructions }}
 ======
 {%- endif %}

+
 The output must be a YAML object equivalent to type $PRCodeSuggestions, according to the following Pydantic definitions:
 =====
 class CodeSuggestion(BaseModel):
@ -80,18 +82,18 @@ code_suggestions:
    ...
 {%- if summarize_mode %}
  existing_code: |
-    def func1():
+    ...
  improved_code: |
    ...
  one_sentence_summary: |
    ...
  relevant_lines_start: 12
-  relevant_lines_end: 12
+  relevant_lines_end: 13
 {%- else %}
  existing_code: |
-    def func1():
+    ...
  relevant_lines_start: 12
-  relevant_lines_end: 12
+  relevant_lines_end: 13
  improved_code: |
    ...
 {%- endif %}
--- a/pr_agent/tools/pr_code_suggestions.py
+++ b/pr_agent/tools/pr_code_suggestions.py
@ -137,6 +137,7 @@ class PRCodeSuggestions:
                                        model,
                                        add_line_numbers_to_hunks=True,
                                        disable_extra_lines=True)
+
        if self.patches_diff:
            get_logger().debug(f"PR diff", artifact=self.patches_diff)
            self.prediction = await self._get_prediction(model, self.patches_diff)
@ -150,12 +151,23 @@ class PRCodeSuggestions:
        environment = Environment(undefined=StrictUndefined)
        system_prompt = environment.from_string(get_settings().pr_code_suggestions_prompt.system).render(variables)
        user_prompt = environment.from_string(get_settings().pr_code_suggestions_prompt.user).render(variables)
-
        response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
                                                                        system=system_prompt, user=user_prompt)

        return response

+    @staticmethod
+    def _truncate_if_needed(suggestion):
+        max_code_suggestion_length = get_settings().get("PR_CODE_SUGGESTIONS.MAX_CODE_SUGGESTION_LENGTH", 0)
+        suggestion_truncation_message = get_settings().get("PR_CODE_SUGGESTIONS.SUGGESTION_TRUNCATION_MESSAGE", "")
+        if max_code_suggestion_length > 0:
+            if len(suggestion['improved_code']) > max_code_suggestion_length:
+                suggestion['improved_code'] = suggestion['improved_code'][:max_code_suggestion_length]
+                suggestion['improved_code'] += f"\n{suggestion_truncation_message}"
+                get_logger().info(f"Truncated suggestion from {len(suggestion['improved_code'])} "
+                                      f"characters to {max_code_suggestion_length} characters")
+        return suggestion
+
    def _prepare_pr_code_suggestions(self) -> Dict:
        review = self.prediction.strip()
        data = load_yaml(review,
@ -165,8 +177,22 @@ class PRCodeSuggestions:

        # remove invalid suggestions
        suggestion_list = []
+        one_sentence_summary_list = []
        for i, suggestion in enumerate(data['code_suggestions']):
-            if suggestion['existing_code'] != suggestion['improved_code']:
+            if get_settings().pr_code_suggestions.summarize:
+                if not suggestion or 'one_sentence_summary' not in suggestion or 'label' not in suggestion or 'relevant_file' not in suggestion:
+                    get_logger().debug(f"Skipping suggestion {i + 1}, because it is invalid: {suggestion}")
+                    continue
+
+                if suggestion['one_sentence_summary'] in one_sentence_summary_list:
+                    get_logger().debug(f"Skipping suggestion {i + 1}, because it is a duplicate: {suggestion}")
+                    continue
+
+            if ('existing_code' in suggestion) and ('improved_code' in suggestion) and (
+                    suggestion['existing_code'] != suggestion['improved_code']):
+                suggestion = self._truncate_if_needed(suggestion)
+                if get_settings().pr_code_suggestions.summarize:
+                    one_sentence_summary_list.append(suggestion['one_sentence_summary'])
                suggestion_list.append(suggestion)
            else:
                get_logger().debug(
@ -250,7 +276,8 @@ class PRCodeSuggestions:

            # parallelize calls to AI:
            if get_settings().pr_code_suggestions.parallel_calls:
-                prediction_list = await asyncio.gather(*[self._get_prediction(model, patches_diff) for patches_diff in self.patches_diff_list])
+                prediction_list = await asyncio.gather(
+                    *[self._get_prediction(model, patches_diff) for patches_diff in self.patches_diff_list])
                self.prediction_list = prediction_list
            else:
                prediction_list = []
@ -304,7 +331,6 @@ class PRCodeSuggestions:
            system_prompt = environment.from_string(get_settings().pr_sort_code_suggestions_prompt.system).render(
                variables)
            user_prompt = environment.from_string(get_settings().pr_sort_code_suggestions_prompt.user).render(variables)
-
            response, finish_reason = await self.ai_handler.chat_completion(model=model, system=system_prompt,
                                                                            user=user_prompt)