diff --git a/docs/docs/usage-guide/changing_a_model.md b/docs/docs/usage-guide/changing_a_model.md index 9648e6cf..361abbca 100644 --- a/docs/docs/usage-guide/changing_a_model.md +++ b/docs/docs/usage-guide/changing_a_model.md @@ -32,6 +32,16 @@ OPENAI__API_BASE=https://api.openai.com/v1 OPENAI__KEY=sk-... ``` +### OpenAI Flex Processing + +To reduce costs for non-urgent/background tasks, enable Flex Processing: + +```toml +[litellm] +extra_body='{"processing_mode": "flex"}' +``` + +See [OpenAI Flex Processing docs](https://platform.openai.com/docs/guides/flex-processing) for details. ### Azure diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py index ec96d952..ecb84ea7 100644 --- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py +++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py @@ -364,6 +364,16 @@ class LiteLLMAIHandler(BaseAiHandler): raise ValueError(f"LITELLM.EXTRA_HEADERS contains invalid JSON: {str(e)}") kwargs["extra_headers"] = litellm_extra_headers + # Support for custom OpenAI body fields (e.g., Flex Processing) + if get_settings().get("LITELLM.EXTRA_BODY", None): + try: + litellm_extra_body = json.loads(get_settings().litellm.extra_body) + if not isinstance(litellm_extra_body, dict): + raise ValueError("LITELLM.EXTRA_BODY must be a JSON object") + kwargs.update(litellm_extra_body) + except json.JSONDecodeError as e: + raise ValueError(f"LITELLM.EXTRA_BODY contains invalid JSON: {str(e)}") + get_logger().debug("Prompts", artifact={"system": system, "user": user}) if get_settings().config.verbosity_level >= 2: diff --git a/pr_agent/settings/.secrets_template.toml b/pr_agent/settings/.secrets_template.toml index 350abe5c..c3d7a3f9 100644 --- a/pr_agent/settings/.secrets_template.toml +++ b/pr_agent/settings/.secrets_template.toml @@ -16,6 +16,10 @@ key = "" # Acquire through https://platform.openai.com #deployment_id = "" # The deployment name you chose when you deployed the engine #fallback_deployments = [] # For each fallback model specified in configuration.toml in the [config] section, specify the appropriate deployment_id +# OpenAI Flex Processing (optional, for cost savings) +# [litellm] +# extra_body='{"processing_mode": "flex"}' + [pinecone] api_key = "..." environment = "gcp-starter"