s

2025-07-21 04:50:39 +08:00 · 2024-06-27 09:07:19 +03:00
parent 056eb3a954
commit 15f854336a
1 changed files with 17 additions and 13 deletions
--- a/pr_agent/tools/pr_description.py
+++ b/pr_agent/tools/pr_description.py
@ -168,7 +168,8 @@ class PRDescription:
            return None

        large_pr_handling = get_settings().pr_description.enable_large_pr_handling and "pr_description_only_files_prompts" in get_settings()
-        patches_diff = get_pr_diff(self.git_provider, self.token_handler, model, large_pr_handling=large_pr_handling)
+        patches_diff = get_pr_diff(self.git_provider, self.token_handler, model,
+                                   large_pr_handling=large_pr_handling)
        if not large_pr_handling or patches_diff:
            self.patches_diff = patches_diff
            if patches_diff:
@ -211,29 +212,32 @@ class PRDescription:
                get_settings().pr_description_only_description_prompts.system,
                get_settings().pr_description_only_description_prompts.user)
            files_walkthrough = "\n".join(file_description_str_list)
+            files_walkthrough_prompt = copy.deepcopy(files_walkthrough)
            if remaining_files_list:
-                files_walkthrough += "\n\nNo more token budget. Additional unprocessed files:"
+                files_walkthrough_prompt += "\n\nNo more token budget. Additional unprocessed files:"
                for file in remaining_files_list:
-                    files_walkthrough += f"\n- {file}"
+                    files_walkthrough_prompt += f"\n- {file}"
            if deleted_files_list:
-                files_walkthrough += "\n\nAdditional deleted files:"
+                files_walkthrough_prompt += "\n\nAdditional deleted files:"
                for file in deleted_files_list:
-                    files_walkthrough += f"\n- {file}"
-            tokens_files_walkthrough = len(token_handler_only_description_prompt.encoder.encode(files_walkthrough))
+                    files_walkthrough_prompt += f"\n- {file}"
+            tokens_files_walkthrough = len(
+                token_handler_only_description_prompt.encoder.encode(files_walkthrough_prompt))
            total_tokens = token_handler_only_description_prompt.prompt_tokens + tokens_files_walkthrough
            max_tokens_model = get_max_tokens(model)
            if total_tokens > max_tokens_model - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD:
                # clip files_walkthrough to git the tokens within the limit
-                files_walkthrough = clip_tokens(files_walkthrough,
-                                                max_tokens_model - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD - token_handler_only_description_prompt.prompt_tokens,
-                                                num_input_tokens=tokens_files_walkthrough)
+                files_walkthrough_prompt = clip_tokens(files_walkthrough_prompt,
+                                                       max_tokens_model - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD - token_handler_only_description_prompt.prompt_tokens,
+                                                       num_input_tokens=tokens_files_walkthrough)

            # PR header inference
-            # toDo - add deleted and unprocessed files to the prompt ('files_walkthrough'), as extra data
-            get_logger().debug(f"PR diff only description", artifact=files_walkthrough)
-            prediction_headers = await self._get_prediction(model, patches_diff=files_walkthrough,
+            get_logger().debug(f"PR diff only description", artifact=files_walkthrough_prompt)
+            prediction_headers = await self._get_prediction(model, patches_diff=files_walkthrough_prompt,
                                                            prompt="pr_description_only_description_prompts")
            prediction_headers = prediction_headers.strip().removeprefix('```yaml').strip('`').strip()
+
+            # manually add extra files to final prediction
            if get_settings().pr_description.mention_extra_files:
                for file in remaining_files_list:
                    extra_file_yaml = f"""\
@ -247,7 +251,6 @@ class PRDescription:
    not processed (token-limit)
 """
                    files_walkthrough = files_walkthrough.strip() + "\n" + extra_file_yaml.strip()
-
            # final processing
            self.prediction = prediction_headers + "\n" + "pr_files:\n" + files_walkthrough
            if not load_yaml(self.prediction):
@ -256,6 +259,7 @@ class PRDescription:
                    get_logger().debug(f"Using only headers for describe {self.pr_id}")
                    self.prediction = prediction_headers

+
    async def _get_prediction(self, model: str, patches_diff: str, prompt="pr_description_prompt") -> str:
        variables = copy.deepcopy(self.vars)
        variables["diff"] = patches_diff  # update diff