diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py index a21e3a71..63e9aaa1 100644 --- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py +++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py @@ -359,7 +359,7 @@ class LiteLLMAIHandler(BaseAiHandler): get_logger().info(f"\nUser prompt:\n{user}") # Get completion with automatic streaming detection - resp, finish_reason, response_obj = await self._get_completion(model, **kwargs) + resp, finish_reason, response_obj = await self._get_completion(**kwargs) except openai.RateLimitError as e: get_logger().error(f"Rate limit error during LLM inference: {e}") @@ -383,10 +383,11 @@ class LiteLLMAIHandler(BaseAiHandler): return resp, finish_reason - async def _get_completion(self, model, **kwargs): + async def _get_completion(self, **kwargs): """ Wrapper that automatically handles streaming for required models. """ + model = kwargs["model"] if model in self.streaming_required_models: kwargs["stream"] = True get_logger().info(f"Using streaming mode for model {model}") diff --git a/pr_agent/algo/ai_handlers/litellm_helpers.py b/pr_agent/algo/ai_handlers/litellm_helpers.py index 5f30655d..7324301e 100644 --- a/pr_agent/algo/ai_handlers/litellm_helpers.py +++ b/pr_agent/algo/ai_handlers/litellm_helpers.py @@ -1,7 +1,6 @@ import json import openai -from azure.identity import ClientSecretCredential from pr_agent.config_loader import get_settings from pr_agent.log import get_logger