Merge pull request #183 from zmeir/zmeir-fallback_deployments

Support fallback deployments to accompany fallback models
2025-07-21 04:50:39 +08:00 · 2023-08-14 14:51:14 +03:00
parent 230d684cd3 6ca0655517
commit 23a79bc8fe
3 changed files with 50 additions and 10 deletions
--- a/pr_agent/algo/ai_handler.py
+++ b/pr_agent/algo/ai_handler.py
@ -29,7 +29,6 @@ class AiHandler:
            self.azure = False
            if get_settings().get("OPENAI.ORG", None):
                litellm.organization = get_settings().openai.org
-            self.deployment_id = get_settings().get("OPENAI.DEPLOYMENT_ID", None)
            if get_settings().get("OPENAI.API_TYPE", None):
                if get_settings().openai.api_type == "azure":
                    self.azure = True
@ -47,6 +46,13 @@ class AiHandler:
        except AttributeError as e:
            raise ValueError("OpenAI key is required") from e

+    @property
+    def deployment_id(self):
+        """
+        Returns the deployment ID for the OpenAI API.
+        """
+        return get_settings().get("OPENAI.DEPLOYMENT_ID", None)
+
    @retry(exceptions=(APIError, Timeout, TryAgain, AttributeError, RateLimitError),
           tries=OPENAI_RETRIES, delay=2, backoff=2, jitter=(1, 3))
    async def chat_completion(self, model: str, temperature: float, system: str, user: str):
@ -70,9 +76,15 @@ class AiHandler:
            TryAgain: If there is an attribute error during OpenAI inference.
        """
        try:
+            deployment_id = self.deployment_id
+            if get_settings().config.verbosity_level >= 2:
+                logging.debug(
+                    f"Generating completion with {model}"
+                    f"{(' from deployment ' + deployment_id) if deployment_id else ''}"
+                )
            response = await acompletion(
                model=model,
-                deployment_id=self.deployment_id,
+                deployment_id=deployment_id,
                messages=[
                    {"role": "system", "content": system},
                    {"role": "user", "content": user}
--- a/pr_agent/algo/pr_processing.py
+++ b/pr_agent/algo/pr_processing.py
@ -208,18 +208,45 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo


 async def retry_with_fallback_models(f: Callable):
+    all_models = _get_all_models()
+    all_deployments = _get_all_deployments(all_models)
+    # try each (model, deployment_id) pair until one is successful, otherwise raise exception
+    for i, (model, deployment_id) in enumerate(zip(all_models, all_deployments)):
+        try:
+            get_settings().set("openai.deployment_id", deployment_id)
+            return await f(model)
+        except Exception as e:
+            logging.warning(
+                f"Failed to generate prediction with {model}"
+                f"{(' from deployment ' + deployment_id) if deployment_id else ''}: "
+                f"{traceback.format_exc()}"
+            )
+            if i == len(all_models) - 1:  # If it's the last iteration
+                raise  # Re-raise the last exception
+
+
+def _get_all_models() -> List[str]:
    model = get_settings().config.model
    fallback_models = get_settings().config.fallback_models
    if not isinstance(fallback_models, list):
-        fallback_models = [fallback_models]
+        fallback_models = [m.strip() for m in fallback_models.split(",")]
    all_models = [model] + fallback_models
-    for i, model in enumerate(all_models):
-        try:
-            return await f(model)
-        except Exception as e:
-            logging.warning(f"Failed to generate prediction with {model}: {traceback.format_exc()}")
-            if i == len(all_models) - 1:  # If it's the last iteration
-                raise  # Re-raise the last exception
+    return all_models
+
+
+def _get_all_deployments(all_models: List[str]) -> List[str]:
+    deployment_id = get_settings().get("openai.deployment_id", None)
+    fallback_deployments = get_settings().get("openai.fallback_deployments", [])
+    if not isinstance(fallback_deployments, list) and fallback_deployments:
+        fallback_deployments = [d.strip() for d in fallback_deployments.split(",")]
+    if fallback_deployments:
+        all_deployments = [deployment_id] + fallback_deployments
+        if len(all_deployments) < len(all_models):
+            raise ValueError(f"The number of deployments ({len(all_deployments)}) "
+                             f"is less than the number of models ({len(all_models)})")
+    else:
+        all_deployments = [deployment_id] * len(all_models)
+    return all_deployments


 def find_line_number_of_relevant_line_in_file(diff_files: List[FilePatchInfo],
--- a/pr_agent/settings/.secrets_template.toml
+++ b/pr_agent/settings/.secrets_template.toml
@ -14,6 +14,7 @@ key = ""  # Acquire through https://platform.openai.com
 #api_version = '2023-05-15'  # Check Azure documentation for the current API version
 #api_base = ""  # The base URL for your Azure OpenAI resource. e.g. "https://<your resource name>.openai.azure.com"
 #deployment_id = ""  # The deployment name you chose when you deployed the engine
+#fallback_deployments = []  # For each fallback model specified in configuration.toml in the [config] section, specify the appropriate deployment_id

 [anthropic]
 key = "" # Optional, uncomment if you want to use Anthropic. Acquire through https://www.anthropic.com/