pr-agent/pr_agent/algo/ai_handlers/litellm_ai_handler.py

import os
import requests
import boto3
import litellm
import openai
from litellm import acompletion
from tenacity import retry, retry_if_exception_type, stop_after_attempt
from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
from pr_agent.config_loader import get_settings
from pr_agent.log import get_logger

OPENAI_RETRIES = 5


class LiteLLMAIHandler(BaseAiHandler):
    """
    This class handles interactions with the OpenAI API for chat completions.
    It initializes the API key and other settings from a configuration file,
    and provides a method for performing chat completions using the OpenAI ChatCompletion API.
    """

    def __init__(self):
        """
        Initializes the OpenAI API key and other settings from a configuration file.
        Raises a ValueError if the OpenAI key is missing.
        """
        self.azure = False
        self.api_base = None
        self.repetition_penalty = None
        if get_settings().get("OPENAI.KEY", None):
            openai.api_key = get_settings().openai.key
            litellm.openai_key = get_settings().openai.key
        elif 'OPENAI_API_KEY' not in os.environ:
            litellm.api_key = "dummy_key"
        if get_settings().get("aws.AWS_ACCESS_KEY_ID"):
            os.environ["AWS_ACCESS_KEY_ID"] = get_settings().aws.AWS_ACCESS_KEY_ID
            os.environ["AWS_SECRET_ACCESS_KEY"] = get_settings().aws.AWS_SECRET_ACCESS_KEY
            os.environ["AWS_REGION_NAME"] = get_settings().aws.AWS_REGION_NAME
        if get_settings().get("litellm.use_client"):
            litellm_token = get_settings().get("litellm.LITELLM_TOKEN")
            assert litellm_token, "LITELLM_TOKEN is required"
            os.environ["LITELLM_TOKEN"] = litellm_token
            litellm.use_client = True
        if get_settings().get("LITELLM.DROP_PARAMS", None):
            litellm.drop_params = get_settings().litellm.drop_params
        if get_settings().get("OPENAI.ORG", None):
            litellm.organization = get_settings().openai.org
        if get_settings().get("OPENAI.API_TYPE", None):
            if get_settings().openai.api_type == "azure":
                self.azure = True
                litellm.azure_key = get_settings().openai.key
        if get_settings().get("OPENAI.API_VERSION", None):
            litellm.api_version = get_settings().openai.api_version
        if get_settings().get("OPENAI.API_BASE", None):
            litellm.api_base = get_settings().openai.api_base
        if get_settings().get("ANTHROPIC.KEY", None):
            litellm.anthropic_key = get_settings().anthropic.key
        if get_settings().get("COHERE.KEY", None):
            litellm.cohere_key = get_settings().cohere.key
        if get_settings().get("GROQ.KEY", None):
            litellm.api_key = get_settings().groq.key
        if get_settings().get("REPLICATE.KEY", None):
            litellm.replicate_key = get_settings().replicate.key
        if get_settings().get("HUGGINGFACE.KEY", None):
            litellm.huggingface_key = get_settings().huggingface.key
        if get_settings().get("HUGGINGFACE.API_BASE", None) and 'huggingface' in get_settings().config.model:
            litellm.api_base = get_settings().huggingface.api_base
            self.api_base = get_settings().huggingface.api_base
        if get_settings().get("OLLAMA.API_BASE", None):
            litellm.api_base = get_settings().ollama.api_base
            self.api_base = get_settings().ollama.api_base
        if get_settings().get("HUGGINGFACE.REPETITION_PENALTY", None):
            self.repetition_penalty = float(get_settings().huggingface.repetition_penalty)
        if get_settings().get("VERTEXAI.VERTEX_PROJECT", None):
            litellm.vertex_project = get_settings().vertexai.vertex_project
            litellm.vertex_location = get_settings().get(
                "VERTEXAI.VERTEX_LOCATION", None
            )
    def prepare_logs(self, response, system, user, resp, finish_reason):
        response_log = response.dict().copy()
        response_log['system'] = system
        response_log['user'] = user
        response_log['output'] = resp
        response_log['finish_reason'] = finish_reason
        if hasattr(self, 'main_pr_language'):
            response_log['main_pr_language'] = self.main_pr_language
        else:
            response_log['main_pr_language'] = 'unknown'
        return response_log

    @property
    def deployment_id(self):
        """
        Returns the deployment ID for the OpenAI API.
        """
        return get_settings().get("OPENAI.DEPLOYMENT_ID", None)

    @retry(
        retry=retry_if_exception_type((openai.APIError, openai.APIConnectionError, openai.APITimeoutError)), # No retry on RateLimitError
        stop=stop_after_attempt(OPENAI_RETRIES)
    )
    async def chat_completion(self, model: str, system: str, user: str, temperature: float = 0.2, img_path: str = None):
        try:
            resp, finish_reason = None, None
            deployment_id = self.deployment_id
            if self.azure:
                model = 'azure/' + model
            messages = [{"role": "system", "content": system}, {"role": "user", "content": user}]
            if img_path:
                try:
                    # check if the image link is alive
                    r = requests.head(img_path, allow_redirects=True)
                    if r.status_code == 404:
                        error_msg = f"The image link is not [alive](img_path).\nPlease repost the original image as a comment, and send the question again with 'quote reply' (see [instructions](https://pr-agent-docs.codium.ai/tools/ask/#ask-on-images-using-the-pr-code-as-context))."
                        get_logger().error(error_msg)
                        return f"{error_msg}", "error"
                except Exception as e:
                    get_logger().error(f"Error fetching image: {img_path}", e)
                    return f"Error fetching image: {img_path}", "error"
                messages[1]["content"] = [{"type": "text", "text": messages[1]["content"]},
                                          {"type": "image_url", "image_url": {"url": img_path}}]

            kwargs = {
                "model": model,
                "deployment_id": deployment_id,
                "messages": messages,
                "temperature": temperature,
                "force_timeout": get_settings().config.ai_timeout,
                "api_base": self.api_base,
            }
            if self.repetition_penalty:
                kwargs["repetition_penalty"] = self.repetition_penalty

            get_logger().debug("Prompts", artifact={"system": system, "user": user})

            if get_settings().config.verbosity_level >= 2:
                get_logger().info(f"\nSystem prompt:\n{system}")
                get_logger().info(f"\nUser prompt:\n{user}")

            response = await acompletion(**kwargs)
        except (openai.APIError, openai.APITimeoutError) as e:
            get_logger().error("Error during OpenAI inference: ", e)
            raise
        except (openai.RateLimitError) as e:
            get_logger().error("Rate limit error during OpenAI inference: ", e)
            raise
        except (Exception) as e:
            get_logger().error("Unknown error during OpenAI inference: ", e)
            raise openai.APIError from e
        if response is None or len(response["choices"]) == 0:
            raise openai.APIError
        else:
            resp = response["choices"][0]['message']['content']
            finish_reason = response["choices"][0]["finish_reason"]
            get_logger().debug(f"\nAI response:\n{resp}")

            # log the full response for debugging
            response_log = self.prepare_logs(response, system, user, resp, finish_reason)
            get_logger().debug("Full_response", artifact=response_log)

            # for CLI debugging
            if get_settings().config.verbosity_level >= 2:
                get_logger().info(f"\nAI response:\n{resp}")

        return resp, finish_reason
litellm client 2023-09-09 17:35:45 +03:00			`import os`
ask 2024-04-14 14:09:58 +03:00			`import requests`
Update ai_handler.py 2023-11-28 23:07:46 +09:00			`import boto3`
Default timeout for AI is now 180s, configurable 2023-08-07 13:26:28 +03:00			`import litellm`
Initial commit - PR-Agent OSS release 2023-07-06 00:21:08 +03:00			`import openai`
Default timeout for AI is now 180s, configurable 2023-08-07 13:26:28 +03:00			`from litellm import acompletion`
Refactor litellm_ai_handler.py and update requirements.txt - Replace retry library with tenacity for better exception handling - Add verbosity level checks for logging prompts and AI responses - Add support for HuggingFace API base and repetition penalty in chat completion - Update requirements.txt with tenacity library 2024-03-06 12:13:54 +02:00			`from tenacity import retry, retry_if_exception_type, stop_after_attempt`
Merge branch 'base-ai-handler' into abstract-BaseAiHandler 2023-12-14 07:44:13 +08:00			`from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler`
Support context aware settings (for each incoming request), support override of settings, refactor CLI to use pr_agent.py 2023-08-01 14:43:26 +03:00			`from pr_agent.config_loader import get_settings`
Refactor logging system to use custom logger across the codebase 2023-10-16 14:56:00 +03:00			`from pr_agent.log import get_logger`

Default timeout for AI is now 180s, configurable 2023-08-07 13:26:28 +03:00			`OPENAI_RETRIES = 5`

Initial commit - PR-Agent OSS release 2023-07-06 00:21:08 +03:00
Merge branch 'base-ai-handler' into abstract-BaseAiHandler 2023-12-14 07:44:13 +08:00			`class LiteLLMAIHandler(BaseAiHandler):`
docstring 2023-07-20 10:51:21 +03:00			`"""`
			`This class handles interactions with the OpenAI API for chat completions.`
			`It initializes the API key and other settings from a configuration file,`
			`and provides a method for performing chat completions using the OpenAI ChatCompletion API.`
			`"""`

Initial commit - PR-Agent OSS release 2023-07-06 00:21:08 +03:00			`def __init__(self):`
docstring 2023-07-20 10:51:21 +03:00			`"""`
			`Initializes the OpenAI API key and other settings from a configuration file.`
			`Raises a ValueError if the OpenAI key is missing.`
			`"""`
Support Google's Vertex AI 2023-11-07 09:13:08 +00:00			`self.azure = False`
Refactor litellm_ai_handler.py and update requirements.txt - Replace retry library with tenacity for better exception handling - Add verbosity level checks for logging prompts and AI responses - Add support for HuggingFace API base and repetition penalty in chat completion - Update requirements.txt with tenacity library 2024-03-06 12:13:54 +02:00			`self.api_base = None`
			`self.repetition_penalty = None`
Support Google's Vertex AI 2023-11-07 09:13:08 +00:00			`if get_settings().get("OPENAI.KEY", None):`
Support context aware settings (for each incoming request), support override of settings, refactor CLI to use pr_agent.py 2023-08-01 14:43:26 +03:00			`openai.api_key = get_settings().openai.key`
bug fixes and updates 2023-08-03 16:05:46 -07:00			`litellm.openai_key = get_settings().openai.key`
sonnet-3.5 2024-07-04 12:23:36 +03:00			`elif 'OPENAI_API_KEY' not in os.environ:`
			`litellm.api_key = "dummy_key"`
			`if get_settings().get("aws.AWS_ACCESS_KEY_ID"):`
			`os.environ["AWS_ACCESS_KEY_ID"] = get_settings().aws.AWS_ACCESS_KEY_ID`
			`os.environ["AWS_SECRET_ACCESS_KEY"] = get_settings().aws.AWS_SECRET_ACCESS_KEY`
			`os.environ["AWS_REGION_NAME"] = get_settings().aws.AWS_REGION_NAME`
Support Google's Vertex AI 2023-11-07 09:13:08 +00:00			`if get_settings().get("litellm.use_client"):`
			`litellm_token = get_settings().get("litellm.LITELLM_TOKEN")`
			`assert litellm_token, "LITELLM_TOKEN is required"`
			`os.environ["LITELLM_TOKEN"] = litellm_token`
			`litellm.use_client = True`
add config litellm.drop_params 2024-03-13 11:20:02 +09:00			`if get_settings().get("LITELLM.DROP_PARAMS", None):`
			`litellm.drop_params = get_settings().litellm.drop_params`
Support Google's Vertex AI 2023-11-07 09:13:08 +00:00			`if get_settings().get("OPENAI.ORG", None):`
			`litellm.organization = get_settings().openai.org`
			`if get_settings().get("OPENAI.API_TYPE", None):`
			`if get_settings().openai.api_type == "azure":`
			`self.azure = True`
			`litellm.azure_key = get_settings().openai.key`
			`if get_settings().get("OPENAI.API_VERSION", None):`
			`litellm.api_version = get_settings().openai.api_version`
			`if get_settings().get("OPENAI.API_BASE", None):`
			`litellm.api_base = get_settings().openai.api_base`
			`if get_settings().get("ANTHROPIC.KEY", None):`
			`litellm.anthropic_key = get_settings().anthropic.key`
			`if get_settings().get("COHERE.KEY", None):`
			`litellm.cohere_key = get_settings().cohere.key`
Add GROQ.KEY support in LiteLLMAIHandler 2024-04-21 15:21:45 +09:00			`if get_settings().get("GROQ.KEY", None):`
			`litellm.api_key = get_settings().groq.key`
Support Google's Vertex AI 2023-11-07 09:13:08 +00:00			`if get_settings().get("REPLICATE.KEY", None):`
			`litellm.replicate_key = get_settings().replicate.key`
			`if get_settings().get("HUGGINGFACE.KEY", None):`
			`litellm.huggingface_key = get_settings().huggingface.key`
Refactor litellm_ai_handler.py and update requirements.txt - Replace retry library with tenacity for better exception handling - Add verbosity level checks for logging prompts and AI responses - Add support for HuggingFace API base and repetition penalty in chat completion - Update requirements.txt with tenacity library 2024-03-06 12:13:54 +02:00			`if get_settings().get("HUGGINGFACE.API_BASE", None) and 'huggingface' in get_settings().config.model:`
			`litellm.api_base = get_settings().huggingface.api_base`
			`self.api_base = get_settings().huggingface.api_base`
Update Python code formatting, configuration loading, and local model additions 1. Code Formatting: - Standardized Python code formatting across multiple files to align with PEP 8 guidelines. This includes adjustments to whitespace, line breaks, and inline comments. 2. Configuration Loader Enhancements: - Enhanced the `get_settings` function in `config_loader.py` to provide more robust handling of settings retrieval. Added detailed documentation to improve code maintainability and clarity. 3. Model Addition in __init__.py: - Added a new model "ollama/llama3" with a token limit to the MAX_TOKENS dictionary in `__init__.py` to support new AI capabilities and configurations. 2024-06-03 23:58:31 +08:00			`if get_settings().get("OLLAMA.API_BASE", None):`
feat: allows ollama usage Fix https://github.com/Codium-ai/pr-agent/issues/657 2024-04-02 11:01:45 +02:00			`litellm.api_base = get_settings().ollama.api_base`
			`self.api_base = get_settings().ollama.api_base`
repetition_penalty Correct the spelling of this variable. Fix spelling errors now will prevent issues going forward where people have to misspell something on purpose 2024-06-16 17:28:30 +01:00			`if get_settings().get("HUGGINGFACE.REPETITION_PENALTY", None):`
Refactor litellm_ai_handler.py and update requirements.txt - Replace retry library with tenacity for better exception handling - Add verbosity level checks for logging prompts and AI responses - Add support for HuggingFace API base and repetition penalty in chat completion - Update requirements.txt with tenacity library 2024-03-06 12:13:54 +02:00			`self.repetition_penalty = float(get_settings().huggingface.repetition_penalty)`
Support Google's Vertex AI 2023-11-07 09:13:08 +00:00			`if get_settings().get("VERTEXAI.VERTEX_PROJECT", None):`
			`litellm.vertex_project = get_settings().vertexai.vertex_project`
			`litellm.vertex_location = get_settings().get(`
			`"VERTEXAI.VERTEX_LOCATION", None`
			`)`
Enhance AI handler logging and add main PR language attribute to AI handler in various tools 2024-03-16 13:52:02 +02:00			`def prepare_logs(self, response, system, user, resp, finish_reason):`
			`response_log = response.dict().copy()`
			`response_log['system'] = system`
			`response_log['user'] = user`
			`response_log['output'] = resp`
			`response_log['finish_reason'] = finish_reason`
			`if hasattr(self, 'main_pr_language'):`
			`response_log['main_pr_language'] = self.main_pr_language`
			`else:`
			`response_log['main_pr_language'] = 'unknown'`
			`return response_log`

Support fallback deployments to accompany fallback models This is useful for example in Azure OpenAI deployments where you have a different deployment per model, so the current fallback implementation doesn't work (still uses the same deployment for each fallback attempt) 2023-08-07 16:17:06 +03:00			`@property`
			`def deployment_id(self):`
			`"""`
			`Returns the deployment ID for the OpenAI API.`
			`"""`
			`return get_settings().get("OPENAI.DEPLOYMENT_ID", None)`

Refactor litellm_ai_handler.py and update requirements.txt - Replace retry library with tenacity for better exception handling - Add verbosity level checks for logging prompts and AI responses - Add support for HuggingFace API base and repetition penalty in chat completion - Update requirements.txt with tenacity library 2024-03-06 12:13:54 +02:00			`@retry(`
APITimeoutError 2024-06-29 11:30:15 +03:00			`retry=retry_if_exception_type((openai.APIError, openai.APIConnectionError, openai.APITimeoutError)), # No retry on RateLimitError`
Refactor litellm_ai_handler.py and update requirements.txt - Replace retry library with tenacity for better exception handling - Add verbosity level checks for logging prompts and AI responses - Add support for HuggingFace API base and repetition penalty in chat completion - Update requirements.txt with tenacity library 2024-03-06 12:13:54 +02:00			`stop=stop_after_attempt(OPENAI_RETRIES)`
			`)`
s 2024-04-14 12:00:19 +03:00			`async def chat_completion(self, model: str, system: str, user: str, temperature: float = 0.2, img_path: str = None):`
Initial commit - PR-Agent OSS release 2023-07-06 00:21:08 +03:00			`try:`
Refactor logging statements for better readability and debugging 2024-02-25 09:58:58 +02:00			`resp, finish_reason = None, None`
Logging 2023-08-07 22:42:53 +03:00			`deployment_id = self.deployment_id`
revert azure 2023-10-06 08:12:11 +03:00			`if self.azure:`
azure 2023-10-06 08:31:31 +03:00			`model = 'azure/' + model`
Refactor logging system to use custom logger across the codebase 2023-10-16 14:56:00 +03:00			`messages = [{"role": "system", "content": system}, {"role": "user", "content": user}]`
s 2024-04-14 12:00:19 +03:00			`if img_path:`
ask 2024-04-14 14:09:58 +03:00			`try:`
			`# check if the image link is alive`
			`r = requests.head(img_path, allow_redirects=True)`
			`if r.status_code == 404:`
			`error_msg = f"The image link is not [alive](img_path).\nPlease repost the original image as a comment, and send the question again with 'quote reply' (see [instructions](https://pr-agent-docs.codium.ai/tools/ask/#ask-on-images-using-the-pr-code-as-context))."`
			`get_logger().error(error_msg)`
			`return f"{error_msg}", "error"`
			`except Exception as e:`
			`get_logger().error(f"Error fetching image: {img_path}", e)`
			`return f"Error fetching image: {img_path}", "error"`
s 2024-04-14 12:00:19 +03:00			`messages[1]["content"] = [{"type": "text", "text": messages[1]["content"]},`
			`{"type": "image_url", "image_url": {"url": img_path}}]`

support Amazon Bedrock 2023-11-28 20:11:40 +09:00			`kwargs = {`
			`"model": model,`
			`"deployment_id": deployment_id,`
			`"messages": messages,`
			`"temperature": temperature,`
			`"force_timeout": get_settings().config.ai_timeout,`
Update Python code formatting, configuration loading, and local model additions 1. Code Formatting: - Standardized Python code formatting across multiple files to align with PEP 8 guidelines. This includes adjustments to whitespace, line breaks, and inline comments. 2. Configuration Loader Enhancements: - Enhanced the `get_settings` function in `config_loader.py` to provide more robust handling of settings retrieval. Added detailed documentation to improve code maintainability and clarity. 3. Model Addition in __init__.py: - Added a new model "ollama/llama3" with a token limit to the MAX_TOKENS dictionary in `__init__.py` to support new AI capabilities and configurations. 2024-06-03 23:58:31 +08:00			`"api_base": self.api_base,`
support Amazon Bedrock 2023-11-28 20:11:40 +09:00			`}`
Refactor litellm_ai_handler.py and update requirements.txt - Replace retry library with tenacity for better exception handling - Add verbosity level checks for logging prompts and AI responses - Add support for HuggingFace API base and repetition penalty in chat completion - Update requirements.txt with tenacity library 2024-03-06 12:13:54 +02:00			`if self.repetition_penalty:`
			`kwargs["repetition_penalty"] = self.repetition_penalty`
Refactor logging statements for better readability and debugging 2024-02-24 16:47:23 +02:00
artifact 2024-02-25 10:45:15 +02:00			`get_logger().debug("Prompts", artifact={"system": system, "user": user})`
Refactor litellm_ai_handler.py and update requirements.txt - Replace retry library with tenacity for better exception handling - Add verbosity level checks for logging prompts and AI responses - Add support for HuggingFace API base and repetition penalty in chat completion - Update requirements.txt with tenacity library 2024-03-06 12:13:54 +02:00
			`if get_settings().config.verbosity_level >= 2:`
			`get_logger().info(f"\nSystem prompt:\n{system}")`
			`get_logger().info(f"\nUser prompt:\n{user}")`

support Amazon Bedrock 2023-11-28 20:11:40 +09:00			`response = await acompletion(**kwargs)`
APITimeoutError 2024-06-29 11:30:15 +03:00			`except (openai.APIError, openai.APITimeoutError) as e:`
Refactor logging system to use custom logger across the codebase 2023-10-16 14:56:00 +03:00			`get_logger().error("Error during OpenAI inference: ", e)`
Initial commit - PR-Agent OSS release 2023-07-06 00:21:08 +03:00			`raise`
Refactor litellm_ai_handler.py and update requirements.txt - Replace retry library with tenacity for better exception handling - Add verbosity level checks for logging prompts and AI responses - Add support for HuggingFace API base and repetition penalty in chat completion - Update requirements.txt with tenacity library 2024-03-06 12:13:54 +02:00			`except (openai.RateLimitError) as e:`
Refactor logging system to use custom logger across the codebase 2023-10-16 14:56:00 +03:00			`get_logger().error("Rate limit error during OpenAI inference: ", e)`
Retry on rate limit error on OpenAI calls 2023-07-20 15:01:12 +03:00			`raise`
Retry on rate limit error on OpenAI calls 2023-07-20 15:02:34 +03:00			`except (Exception) as e:`
Refactor logging system to use custom logger across the codebase 2023-10-16 14:56:00 +03:00			`get_logger().error("Unknown error during OpenAI inference: ", e)`
Refactor litellm_ai_handler.py and update requirements.txt - Replace retry library with tenacity for better exception handling - Add verbosity level checks for logging prompts and AI responses - Add support for HuggingFace API base and repetition penalty in chat completion - Update requirements.txt with tenacity library 2024-03-06 12:13:54 +02:00			`raise openai.APIError from e`
bug fixes and updates 2023-08-03 16:05:46 -07:00			`if response is None or len(response["choices"]) == 0:`
Refactor litellm_ai_handler.py and update requirements.txt - Replace retry library with tenacity for better exception handling - Add verbosity level checks for logging prompts and AI responses - Add support for HuggingFace API base and repetition penalty in chat completion - Update requirements.txt with tenacity library 2024-03-06 12:13:54 +02:00			`raise openai.APIError`
Refactor logging statements for better readability and debugging 2024-02-25 09:58:58 +02:00			`else:`
			`resp = response["choices"][0]['message']['content']`
			`finish_reason = response["choices"][0]["finish_reason"]`
			`get_logger().debug(f"\nAI response:\n{resp}")`
Refactor logging statements for better readability and debugging 2024-02-24 16:47:23 +02:00
Enhance AI handler logging and add main PR language attribute to AI handler in various tools 2024-03-16 13:52:02 +02:00			`# log the full response for debugging`
			`response_log = self.prepare_logs(response, system, user, resp, finish_reason)`
Enhance AI handler logging and add main PR language attribute to AI handler in various tools 2024-03-16 13:47:44 +02:00			`get_logger().debug("Full_response", artifact=response_log)`

			`# for CLI debugging`
Refactor litellm_ai_handler.py and update requirements.txt - Replace retry library with tenacity for better exception handling - Add verbosity level checks for logging prompts and AI responses - Add support for HuggingFace API base and repetition penalty in chat completion - Update requirements.txt with tenacity library 2024-03-06 12:13:54 +02:00			`if get_settings().config.verbosity_level >= 2:`
			`get_logger().info(f"\nAI response:\n{resp}")`

feat: allows ollama usage Fix https://github.com/Codium-ai/pr-agent/issues/657 2024-04-02 11:01:45 +02:00			`return resp, finish_reason`