feat: support OpenAI Flex Processing via [litellm] extra_body config

This commit is contained in:
Abhinav Kumar
2025-07-07 21:14:45 +05:30
parent 17a90c536f
commit 12af211c13
3 changed files with 24 additions and 0 deletions

View File

@ -32,6 +32,16 @@ OPENAI__API_BASE=https://api.openai.com/v1
OPENAI__KEY=sk-... OPENAI__KEY=sk-...
``` ```
### OpenAI Flex Processing
To reduce costs for non-urgent/background tasks, enable Flex Processing:
```toml
[litellm]
extra_body='{"processing_mode": "flex"}'
```
See [OpenAI Flex Processing docs](https://platform.openai.com/docs/guides/flex-processing) for details.
### Azure ### Azure

View File

@ -364,6 +364,16 @@ class LiteLLMAIHandler(BaseAiHandler):
raise ValueError(f"LITELLM.EXTRA_HEADERS contains invalid JSON: {str(e)}") raise ValueError(f"LITELLM.EXTRA_HEADERS contains invalid JSON: {str(e)}")
kwargs["extra_headers"] = litellm_extra_headers kwargs["extra_headers"] = litellm_extra_headers
# Support for custom OpenAI body fields (e.g., Flex Processing)
if get_settings().get("LITELLM.EXTRA_BODY", None):
try:
litellm_extra_body = json.loads(get_settings().litellm.extra_body)
if not isinstance(litellm_extra_body, dict):
raise ValueError("LITELLM.EXTRA_BODY must be a JSON object")
kwargs.update(litellm_extra_body)
except json.JSONDecodeError as e:
raise ValueError(f"LITELLM.EXTRA_BODY contains invalid JSON: {str(e)}")
get_logger().debug("Prompts", artifact={"system": system, "user": user}) get_logger().debug("Prompts", artifact={"system": system, "user": user})
if get_settings().config.verbosity_level >= 2: if get_settings().config.verbosity_level >= 2:

View File

@ -16,6 +16,10 @@ key = "" # Acquire through https://platform.openai.com
#deployment_id = "" # The deployment name you chose when you deployed the engine #deployment_id = "" # The deployment name you chose when you deployed the engine
#fallback_deployments = [] # For each fallback model specified in configuration.toml in the [config] section, specify the appropriate deployment_id #fallback_deployments = [] # For each fallback model specified in configuration.toml in the [config] section, specify the appropriate deployment_id
# OpenAI Flex Processing (optional, for cost savings)
# [litellm]
# extra_body='{"processing_mode": "flex"}'
[pinecone] [pinecone]
api_key = "..." api_key = "..."
environment = "gcp-starter" environment = "gcp-starter"