feat: support OpenAI Flex Processing via [litellm] extra_body config

2025-07-21 04:50:39 +08:00 · 2025-07-07 21:14:45 +05:30
parent 17a90c536f
commit 12af211c13
3 changed files with 24 additions and 0 deletions
--- a/docs/docs/usage-guide/changing_a_model.md
+++ b/docs/docs/usage-guide/changing_a_model.md
@ -32,6 +32,16 @@ OPENAI__API_BASE=https://api.openai.com/v1
 OPENAI__KEY=sk-...
 ```
 ### OpenAI Flex Processing
 To reduce costs for non-urgent/background tasks, enable Flex Processing:
 ```toml
 [litellm]
 extra_body='{"processing_mode": "flex"}'
 ```
 See [OpenAI Flex Processing docs](https://platform.openai.com/docs/guides/flex-processing) for details.
 ### Azure
--- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py
+++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
@ -364,6 +364,16 @@ class LiteLLMAIHandler(BaseAiHandler):
                    raise ValueError(f"LITELLM.EXTRA_HEADERS contains invalid JSON: {str(e)}")
                kwargs["extra_headers"] = litellm_extra_headers
            # Support for custom OpenAI body fields (e.g., Flex Processing)
            if get_settings().get("LITELLM.EXTRA_BODY", None):
                try:
                    litellm_extra_body = json.loads(get_settings().litellm.extra_body)
                    if not isinstance(litellm_extra_body, dict):
                        raise ValueError("LITELLM.EXTRA_BODY must be a JSON object")
                    kwargs.update(litellm_extra_body)
                except json.JSONDecodeError as e:
                    raise ValueError(f"LITELLM.EXTRA_BODY contains invalid JSON: {str(e)}")
            get_logger().debug("Prompts", artifact={"system": system, "user": user})
            if get_settings().config.verbosity_level >= 2:
--- a/pr_agent/settings/.secrets_template.toml
+++ b/pr_agent/settings/.secrets_template.toml
@ -16,6 +16,10 @@ key = ""  # Acquire through https://platform.openai.com
 #deployment_id = ""  # The deployment name you chose when you deployed the engine
 #fallback_deployments = []  # For each fallback model specified in configuration.toml in the [config] section, specify the appropriate deployment_id
 # OpenAI Flex Processing (optional, for cost savings)
 # [litellm]
 # extra_body='{"processing_mode": "flex"}'
 [pinecone]
 api_key = "..."
 environment = "gcp-starter"