diff --git a/docs/docs/usage-guide/changing_a_model.md b/docs/docs/usage-guide/changing_a_model.md index 9648e6cf..361abbca 100644 --- a/docs/docs/usage-guide/changing_a_model.md +++ b/docs/docs/usage-guide/changing_a_model.md @@ -32,6 +32,16 @@ OPENAI__API_BASE=https://api.openai.com/v1 OPENAI__KEY=sk-... ``` +### OpenAI Flex Processing + +To reduce costs for non-urgent/background tasks, enable Flex Processing: + +```toml +[litellm] +extra_body='{"processing_mode": "flex"}' +``` + +See [OpenAI Flex Processing docs](https://platform.openai.com/docs/guides/flex-processing) for details. ### Azure diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py index ec96d952..cbfe37da 100644 --- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py +++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py @@ -175,6 +175,37 @@ class LiteLLMAIHandler(BaseAiHandler): response_log['main_pr_language'] = 'unknown' return response_log + def _process_litellm_extra_body(self, kwargs: dict) -> dict: + """ + Process LITELLM.EXTRA_BODY configuration and update kwargs accordingly. + + Args: + kwargs: The current kwargs dictionary to update + + Returns: + Updated kwargs dictionary + + Raises: + ValueError: If extra_body contains invalid JSON, unsupported keys, or colliding keys + """ + allowed_extra_body_keys = {"processing_mode", "service_tier"} + extra_body = getattr(getattr(get_settings(), "litellm", None), "extra_body", None) + if extra_body: + try: + litellm_extra_body = json.loads(extra_body) + if not isinstance(litellm_extra_body, dict): + raise ValueError("LITELLM.EXTRA_BODY must be a JSON object") + unsupported_keys = set(litellm_extra_body.keys()) - allowed_extra_body_keys + if unsupported_keys: + raise ValueError(f"LITELLM.EXTRA_BODY contains unsupported keys: {', '.join(unsupported_keys)}. Allowed keys: {', '.join(allowed_extra_body_keys)}") + colliding_keys = kwargs.keys() & litellm_extra_body.keys() + if colliding_keys: + raise ValueError(f"LITELLM.EXTRA_BODY cannot override existing parameters: {', '.join(colliding_keys)}") + kwargs.update(litellm_extra_body) + except json.JSONDecodeError as e: + raise ValueError(f"LITELLM.EXTRA_BODY contains invalid JSON: {str(e)}") + return kwargs + def _configure_claude_extended_thinking(self, model: str, kwargs: dict) -> dict: """ Configure Claude extended thinking parameters if applicable. @@ -364,6 +395,9 @@ class LiteLLMAIHandler(BaseAiHandler): raise ValueError(f"LITELLM.EXTRA_HEADERS contains invalid JSON: {str(e)}") kwargs["extra_headers"] = litellm_extra_headers + # Support for custom OpenAI body fields (e.g., Flex Processing) + kwargs = self._process_litellm_extra_body(kwargs) + get_logger().debug("Prompts", artifact={"system": system, "user": user}) if get_settings().config.verbosity_level >= 2: diff --git a/pr_agent/settings/.secrets_template.toml b/pr_agent/settings/.secrets_template.toml index 350abe5c..c3d7a3f9 100644 --- a/pr_agent/settings/.secrets_template.toml +++ b/pr_agent/settings/.secrets_template.toml @@ -16,6 +16,10 @@ key = "" # Acquire through https://platform.openai.com #deployment_id = "" # The deployment name you chose when you deployed the engine #fallback_deployments = [] # For each fallback model specified in configuration.toml in the [config] section, specify the appropriate deployment_id +# OpenAI Flex Processing (optional, for cost savings) +# [litellm] +# extra_body='{"processing_mode": "flex"}' + [pinecone] api_key = "..." environment = "gcp-starter"