From 6c4a5bae52f969ee1fedb4bd7e855d7c333b8ee0 Mon Sep 17 00:00:00 2001
From: zmeir <zohar.meir@intel.com>
Date: Mon, 7 Aug 2023 16:17:06 +0300
Subject: [PATCH 1/5] Support fallback deployments to accompany fallback models

This is useful for example in Azure OpenAI deployments where you have a different deployment per model, so the current fallback implementation doesn't work (still uses the same deployment for each fallback attempt)
---
 pr_agent/algo/ai_handler.py              |  8 +++++++-
 pr_agent/algo/pr_processing.py           | 17 +++++++++++++++--
 pr_agent/settings/.secrets_template.toml |  1 +
 3 files changed, 23 insertions(+), 3 deletions(-)
diff --git a/pr_agent/algo/ai_handler.py b/pr_agent/algo/ai_handler.py
index 57221518..cfca63f6 100644
--- a/pr_agent/algo/ai_handler.py
+++ b/pr_agent/algo/ai_handler.py
@@ -27,7 +27,6 @@ class AiHandler:
             self.azure = False
             if get_settings().get("OPENAI.ORG", None):
                 litellm.organization = get_settings().openai.org
-            self.deployment_id = get_settings().get("OPENAI.DEPLOYMENT_ID", None)
             if get_settings().get("OPENAI.API_TYPE", None):
                 if get_settings().openai.api_type == "azure":
                     self.azure = True
@@ -45,6 +44,13 @@ class AiHandler:
         except AttributeError as e:
             raise ValueError("OpenAI key is required") from e
 
+    @property
+    def deployment_id(self):
+        """
+        Returns the deployment ID for the OpenAI API.
+        """
+        return get_settings().get("OPENAI.DEPLOYMENT_ID", None)
+
     @retry(exceptions=(APIError, Timeout, TryAgain, AttributeError, RateLimitError),
            tries=OPENAI_RETRIES, delay=2, backoff=2, jitter=(1, 3))
     async def chat_completion(self, model: str, temperature: float, system: str, user: str):
diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py
index 8b319446..fae2535a 100644
--- a/pr_agent/algo/pr_processing.py
+++ b/pr_agent/algo/pr_processing.py
@@ -208,13 +208,26 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo
 
 
 async def retry_with_fallback_models(f: Callable):
+    # getting all models
     model = get_settings().config.model
     fallback_models = get_settings().config.fallback_models
     if not isinstance(fallback_models, list):
-        fallback_models = [fallback_models]
+        fallback_models = [m.strip() for m in fallback_models.split(",")]
     all_models = [model] + fallback_models
-    for i, model in enumerate(all_models):
+
+    # getting all deployments
+    deployment_id = get_settings().get("openai.deployment_id", None)
+    fallback_deployments = get_settings().get("openai.fallback_deployments", [])
+    if not isinstance(fallback_deployments, list) and fallback_deployments:
+        fallback_deployments = [d.strip() for d in fallback_deployments.split(",")]
+    if fallback_deployments:
+        all_deployments = [deployment_id] + fallback_deployments
+    else:
+        all_deployments = [deployment_id] * len(all_models)
+    # try each (model, deployment_id) pair until one is successful, otherwise raise exception
+    for i, (model, deployment_id) in enumerate(zip(all_models, all_deployments)):
         try:
+            get_settings().set("openai.deployment_id", deployment_id)
             return await f(model)
         except Exception as e:
             logging.warning(f"Failed to generate prediction with {model}: {traceback.format_exc()}")
diff --git a/pr_agent/settings/.secrets_template.toml b/pr_agent/settings/.secrets_template.toml
index 36b529a6..25a6562f 100644
--- a/pr_agent/settings/.secrets_template.toml
+++ b/pr_agent/settings/.secrets_template.toml
@@ -14,6 +14,7 @@ key = ""  # Acquire through https://platform.openai.com
 #api_version = '2023-05-15'  # Check Azure documentation for the current API version
 #api_base = ""  # The base URL for your Azure OpenAI resource. e.g. "https://<your resource name>.openai.azure.com"
 #deployment_id = ""  # The deployment name you chose when you deployed the engine
+#fallback_deployments = []  # Match your fallback models from configuration.toml with the appropriate deployment_id
 
 [anthropic]
 key = "" # Optional, uncomment if you want to use Anthropic. Acquire through https://www.anthropic.com/

From 7f1849a86712e117465faf573485169d175872c1 Mon Sep 17 00:00:00 2001
From: zmeir <zohar.meir@intel.com>
Date: Mon, 7 Aug 2023 22:42:53 +0300
Subject: [PATCH 2/5] Logging

---
 pr_agent/algo/ai_handler.py    | 8 +++++++-
 pr_agent/algo/pr_processing.py | 7 +++++--
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/pr_agent/algo/ai_handler.py b/pr_agent/algo/ai_handler.py
index cfca63f6..5fcb13c7 100644
--- a/pr_agent/algo/ai_handler.py
+++ b/pr_agent/algo/ai_handler.py
@@ -74,9 +74,15 @@ class AiHandler:
             TryAgain: If there is an attribute error during OpenAI inference.
         """
         try:
+            deployment_id = self.deployment_id
+            if get_settings().config.verbosity_level >= 2:
+                logging.debug(
+                    f"Generating completion with {model}"
+                    f"{(' from deployment ' + deployment_id) if deployment_id else ''}"
+                )
             response = await acompletion(
                             model=model,
-                            deployment_id=self.deployment_id,
+                            deployment_id=deployment_id,
                             messages=[
                                 {"role": "system", "content": system},
                                 {"role": "user", "content": user}
diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py
index fae2535a..be4f4d5f 100644
--- a/pr_agent/algo/pr_processing.py
+++ b/pr_agent/algo/pr_processing.py
@@ -214,7 +214,6 @@ async def retry_with_fallback_models(f: Callable):
     if not isinstance(fallback_models, list):
         fallback_models = [m.strip() for m in fallback_models.split(",")]
     all_models = [model] + fallback_models
-
     # getting all deployments
     deployment_id = get_settings().get("openai.deployment_id", None)
     fallback_deployments = get_settings().get("openai.fallback_deployments", [])
@@ -230,7 +229,11 @@ async def retry_with_fallback_models(f: Callable):
             get_settings().set("openai.deployment_id", deployment_id)
             return await f(model)
         except Exception as e:
-            logging.warning(f"Failed to generate prediction with {model}: {traceback.format_exc()}")
+            logging.warning(
+                f"Failed to generate prediction with {model}"
+                f"{(' from deployment ' + deployment_id) if deployment_id else ''}: "
+                f"{traceback.format_exc()}"
+            )
             if i == len(all_models) - 1:  # If it's the last iteration
                 raise  # Re-raise the last exception
 

From 7762a672508f3aef10e034bcbf83126f102b5908 Mon Sep 17 00:00:00 2001
From: zmeir <zohar.meir@intel.com>
Date: Sun, 13 Aug 2023 10:55:44 +0300
Subject: [PATCH 3/5] Fail if not enough fallback deployments

---
 pr_agent/algo/pr_processing.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py
index 140046a3..db311dac 100644
--- a/pr_agent/algo/pr_processing.py
+++ b/pr_agent/algo/pr_processing.py
@@ -221,6 +221,9 @@ async def retry_with_fallback_models(f: Callable):
         fallback_deployments = [d.strip() for d in fallback_deployments.split(",")]
     if fallback_deployments:
         all_deployments = [deployment_id] + fallback_deployments
+        if len(fallback_deployments) < len(fallback_models):
+            raise ValueError(f"The number of fallback deployments ({len(all_deployments)}) "
+                             f"is less than the number of fallback models ({len(all_models)})")
     else:
         all_deployments = [deployment_id] * len(all_models)
     # try each (model, deployment_id) pair until one is successful, otherwise raise exception

From edcf89a45664fa4e407f529d4f9aa58972fc7e91 Mon Sep 17 00:00:00 2001
From: zmeir <zohar.meir@intel.com>
Date: Sun, 13 Aug 2023 10:56:16 +0300
Subject: [PATCH 4/5] Improve comment

---
 pr_agent/settings/.secrets_template.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pr_agent/settings/.secrets_template.toml b/pr_agent/settings/.secrets_template.toml
index 25a6562f..0ac75519 100644
--- a/pr_agent/settings/.secrets_template.toml
+++ b/pr_agent/settings/.secrets_template.toml
@@ -14,7 +14,7 @@ key = ""  # Acquire through https://platform.openai.com
 #api_version = '2023-05-15'  # Check Azure documentation for the current API version
 #api_base = ""  # The base URL for your Azure OpenAI resource. e.g. "https://<your resource name>.openai.azure.com"
 #deployment_id = ""  # The deployment name you chose when you deployed the engine
-#fallback_deployments = []  # Match your fallback models from configuration.toml with the appropriate deployment_id
+#fallback_deployments = []  # For each fallback model specified in configuration.toml in the [config] section, specify the appropriate deployment_id
 
 [anthropic]
 key = "" # Optional, uncomment if you want to use Anthropic. Acquire through https://www.anthropic.com/

From 6ca0655517dbb72e02d97494d912a220362f1210 Mon Sep 17 00:00:00 2001
From: zmeir <zohar.meir@intel.com>
Date: Sun, 13 Aug 2023 11:03:10 +0300
Subject: [PATCH 5/5] Extracted to helper functions

---
 pr_agent/algo/pr_processing.py | 44 ++++++++++++++++++++--------------
 1 file changed, 26 insertions(+), 18 deletions(-)

diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py
index db311dac..adab9506 100644
--- a/pr_agent/algo/pr_processing.py
+++ b/pr_agent/algo/pr_processing.py
@@ -208,24 +208,8 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo
 
 
 async def retry_with_fallback_models(f: Callable):
-    # getting all models
-    model = get_settings().config.model
-    fallback_models = get_settings().config.fallback_models
-    if not isinstance(fallback_models, list):
-        fallback_models = [m.strip() for m in fallback_models.split(",")]
-    all_models = [model] + fallback_models
-    # getting all deployments
-    deployment_id = get_settings().get("openai.deployment_id", None)
-    fallback_deployments = get_settings().get("openai.fallback_deployments", [])
-    if not isinstance(fallback_deployments, list) and fallback_deployments:
-        fallback_deployments = [d.strip() for d in fallback_deployments.split(",")]
-    if fallback_deployments:
-        all_deployments = [deployment_id] + fallback_deployments
-        if len(fallback_deployments) < len(fallback_models):
-            raise ValueError(f"The number of fallback deployments ({len(all_deployments)}) "
-                             f"is less than the number of fallback models ({len(all_models)})")
-    else:
-        all_deployments = [deployment_id] * len(all_models)
+    all_models = _get_all_models()
+    all_deployments = _get_all_deployments(all_models)
     # try each (model, deployment_id) pair until one is successful, otherwise raise exception
     for i, (model, deployment_id) in enumerate(zip(all_models, all_deployments)):
         try:
@@ -241,6 +225,30 @@ async def retry_with_fallback_models(f: Callable):
                 raise  # Re-raise the last exception
 
 
+def _get_all_models() -> List[str]:
+    model = get_settings().config.model
+    fallback_models = get_settings().config.fallback_models
+    if not isinstance(fallback_models, list):
+        fallback_models = [m.strip() for m in fallback_models.split(",")]
+    all_models = [model] + fallback_models
+    return all_models
+
+
+def _get_all_deployments(all_models: List[str]) -> List[str]:
+    deployment_id = get_settings().get("openai.deployment_id", None)
+    fallback_deployments = get_settings().get("openai.fallback_deployments", [])
+    if not isinstance(fallback_deployments, list) and fallback_deployments:
+        fallback_deployments = [d.strip() for d in fallback_deployments.split(",")]
+    if fallback_deployments:
+        all_deployments = [deployment_id] + fallback_deployments
+        if len(all_deployments) < len(all_models):
+            raise ValueError(f"The number of deployments ({len(all_deployments)}) "
+                             f"is less than the number of models ({len(all_models)})")
+    else:
+        all_deployments = [deployment_id] * len(all_models)
+    return all_deployments
+
+
 def find_line_number_of_relevant_line_in_file(diff_files: List[FilePatchInfo],
                                               relevant_file: str,
                                               relevant_line_in_file: str) -> Tuple[int, int]: