mirror of
https://github.com/qodo-ai/pr-agent.git
synced 2025-07-13 17:20:38 +08:00
Merge pull request #1921 from abhinav-1305/add-flex-processing
feat: support OpenAI Flex Processing via [litellm] extra_body config
This commit is contained in:
@ -32,6 +32,16 @@ OPENAI__API_BASE=https://api.openai.com/v1
|
|||||||
OPENAI__KEY=sk-...
|
OPENAI__KEY=sk-...
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### OpenAI Flex Processing
|
||||||
|
|
||||||
|
To reduce costs for non-urgent/background tasks, enable Flex Processing:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[litellm]
|
||||||
|
extra_body='{"processing_mode": "flex"}'
|
||||||
|
```
|
||||||
|
|
||||||
|
See [OpenAI Flex Processing docs](https://platform.openai.com/docs/guides/flex-processing) for details.
|
||||||
|
|
||||||
### Azure
|
### Azure
|
||||||
|
|
||||||
|
@ -175,6 +175,37 @@ class LiteLLMAIHandler(BaseAiHandler):
|
|||||||
response_log['main_pr_language'] = 'unknown'
|
response_log['main_pr_language'] = 'unknown'
|
||||||
return response_log
|
return response_log
|
||||||
|
|
||||||
|
def _process_litellm_extra_body(self, kwargs: dict) -> dict:
|
||||||
|
"""
|
||||||
|
Process LITELLM.EXTRA_BODY configuration and update kwargs accordingly.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
kwargs: The current kwargs dictionary to update
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Updated kwargs dictionary
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If extra_body contains invalid JSON, unsupported keys, or colliding keys
|
||||||
|
"""
|
||||||
|
allowed_extra_body_keys = {"processing_mode", "service_tier"}
|
||||||
|
extra_body = getattr(getattr(get_settings(), "litellm", None), "extra_body", None)
|
||||||
|
if extra_body:
|
||||||
|
try:
|
||||||
|
litellm_extra_body = json.loads(extra_body)
|
||||||
|
if not isinstance(litellm_extra_body, dict):
|
||||||
|
raise ValueError("LITELLM.EXTRA_BODY must be a JSON object")
|
||||||
|
unsupported_keys = set(litellm_extra_body.keys()) - allowed_extra_body_keys
|
||||||
|
if unsupported_keys:
|
||||||
|
raise ValueError(f"LITELLM.EXTRA_BODY contains unsupported keys: {', '.join(unsupported_keys)}. Allowed keys: {', '.join(allowed_extra_body_keys)}")
|
||||||
|
colliding_keys = kwargs.keys() & litellm_extra_body.keys()
|
||||||
|
if colliding_keys:
|
||||||
|
raise ValueError(f"LITELLM.EXTRA_BODY cannot override existing parameters: {', '.join(colliding_keys)}")
|
||||||
|
kwargs.update(litellm_extra_body)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
raise ValueError(f"LITELLM.EXTRA_BODY contains invalid JSON: {str(e)}")
|
||||||
|
return kwargs
|
||||||
|
|
||||||
def _configure_claude_extended_thinking(self, model: str, kwargs: dict) -> dict:
|
def _configure_claude_extended_thinking(self, model: str, kwargs: dict) -> dict:
|
||||||
"""
|
"""
|
||||||
Configure Claude extended thinking parameters if applicable.
|
Configure Claude extended thinking parameters if applicable.
|
||||||
@ -364,6 +395,9 @@ class LiteLLMAIHandler(BaseAiHandler):
|
|||||||
raise ValueError(f"LITELLM.EXTRA_HEADERS contains invalid JSON: {str(e)}")
|
raise ValueError(f"LITELLM.EXTRA_HEADERS contains invalid JSON: {str(e)}")
|
||||||
kwargs["extra_headers"] = litellm_extra_headers
|
kwargs["extra_headers"] = litellm_extra_headers
|
||||||
|
|
||||||
|
# Support for custom OpenAI body fields (e.g., Flex Processing)
|
||||||
|
kwargs = self._process_litellm_extra_body(kwargs)
|
||||||
|
|
||||||
get_logger().debug("Prompts", artifact={"system": system, "user": user})
|
get_logger().debug("Prompts", artifact={"system": system, "user": user})
|
||||||
|
|
||||||
if get_settings().config.verbosity_level >= 2:
|
if get_settings().config.verbosity_level >= 2:
|
||||||
|
@ -16,6 +16,10 @@ key = "" # Acquire through https://platform.openai.com
|
|||||||
#deployment_id = "" # The deployment name you chose when you deployed the engine
|
#deployment_id = "" # The deployment name you chose when you deployed the engine
|
||||||
#fallback_deployments = [] # For each fallback model specified in configuration.toml in the [config] section, specify the appropriate deployment_id
|
#fallback_deployments = [] # For each fallback model specified in configuration.toml in the [config] section, specify the appropriate deployment_id
|
||||||
|
|
||||||
|
# OpenAI Flex Processing (optional, for cost savings)
|
||||||
|
# [litellm]
|
||||||
|
# extra_body='{"processing_mode": "flex"}'
|
||||||
|
|
||||||
[pinecone]
|
[pinecone]
|
||||||
api_key = "..."
|
api_key = "..."
|
||||||
environment = "gcp-starter"
|
environment = "gcp-starter"
|
||||||
|
Reference in New Issue
Block a user