pr-agent/pr_agent/algo/ai_handlers/litellm_ai_handler.py

import os

import boto3
import litellm
import openai
from litellm import acompletion
from tenacity import retry, retry_if_exception_type, stop_after_attempt
from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
from pr_agent.config_loader import get_settings
from pr_agent.log import get_logger

OPENAI_RETRIES = 5


class LiteLLMAIHandler(BaseAiHandler):
    """
    This class handles interactions with the OpenAI API for chat completions.
    It initializes the API key and other settings from a configuration file,
    and provides a method for performing chat completions using the OpenAI ChatCompletion API.
    """

    def __init__(self):
        """
        Initializes the OpenAI API key and other settings from a configuration file.
        Raises a ValueError if the OpenAI key is missing.
        """
        self.azure = False
        self.aws_bedrock_client = None
        self.api_base = None
        self.repetition_penalty = None
        if get_settings().get("OPENAI.KEY", None):
            openai.api_key = get_settings().openai.key
            litellm.openai_key = get_settings().openai.key
        if get_settings().get("litellm.use_client"):
            litellm_token = get_settings().get("litellm.LITELLM_TOKEN")
            assert litellm_token, "LITELLM_TOKEN is required"
            os.environ["LITELLM_TOKEN"] = litellm_token
            litellm.use_client = True
        if get_settings().get("OPENAI.ORG", None):
            litellm.organization = get_settings().openai.org
        if get_settings().get("OPENAI.API_TYPE", None):
            if get_settings().openai.api_type == "azure":
                self.azure = True
                litellm.azure_key = get_settings().openai.key
        if get_settings().get("OPENAI.API_VERSION", None):
            litellm.api_version = get_settings().openai.api_version
        if get_settings().get("OPENAI.API_BASE", None):
            litellm.api_base = get_settings().openai.api_base
        if get_settings().get("ANTHROPIC.KEY", None):
            litellm.anthropic_key = get_settings().anthropic.key
        if get_settings().get("COHERE.KEY", None):
            litellm.cohere_key = get_settings().cohere.key
        if get_settings().get("REPLICATE.KEY", None):
            litellm.replicate_key = get_settings().replicate.key
        if get_settings().get("REPLICATE.KEY", None):
            litellm.replicate_key = get_settings().replicate.key
        if get_settings().get("HUGGINGFACE.KEY", None):
            litellm.huggingface_key = get_settings().huggingface.key
        if get_settings().get("HUGGINGFACE.API_BASE", None) and 'huggingface' in get_settings().config.model:
            litellm.api_base = get_settings().huggingface.api_base
            self.api_base = get_settings().huggingface.api_base
        if get_settings().get("HUGGINGFACE.REPITITION_PENALTY", None):
            self.repetition_penalty = float(get_settings().huggingface.repetition_penalty)
        if get_settings().get("VERTEXAI.VERTEX_PROJECT", None):
            litellm.vertex_project = get_settings().vertexai.vertex_project
            litellm.vertex_location = get_settings().get(
                "VERTEXAI.VERTEX_LOCATION", None
            )
        if get_settings().get("AWS.BEDROCK_REGION", None):
            litellm.AmazonAnthropicConfig.max_tokens_to_sample = 2000
            self.aws_bedrock_client = boto3.client(
                service_name="bedrock-runtime",
                region_name=get_settings().aws.bedrock_region,
            )

    @property
    def deployment_id(self):
        """
        Returns the deployment ID for the OpenAI API.
        """
        return get_settings().get("OPENAI.DEPLOYMENT_ID", None)

    @retry(
        retry=retry_if_exception_type((openai.APIError, openai.APIConnectionError, openai.Timeout)), # No retry on RateLimitError
        stop=stop_after_attempt(OPENAI_RETRIES)
    )
    async def chat_completion(self, model: str, system: str, user: str, temperature: float = 0.2):
        try:
            resp, finish_reason = None, None
            deployment_id = self.deployment_id
            if self.azure:
                model = 'azure/' + model
            messages = [{"role": "system", "content": system}, {"role": "user", "content": user}]
            kwargs = {
                "model": model,
                "deployment_id": deployment_id,
                "messages": messages,
                "temperature": temperature,
                "force_timeout": get_settings().config.ai_timeout,
                "api_base" : self.api_base,
            }
            if self.aws_bedrock_client:
                kwargs["aws_bedrock_client"] = self.aws_bedrock_client
            if self.repetition_penalty:
                kwargs["repetition_penalty"] = self.repetition_penalty

            get_logger().debug("Prompts", artifact={"system": system, "user": user})

            if get_settings().config.verbosity_level >= 2:
                get_logger().info(f"\nSystem prompt:\n{system}")
                get_logger().info(f"\nUser prompt:\n{user}")

            response = await acompletion(**kwargs)
        except (openai.APIError, openai.Timeout) as e:
            get_logger().error("Error during OpenAI inference: ", e)
            raise
        except (openai.RateLimitError) as e:
            get_logger().error("Rate limit error during OpenAI inference: ", e)
            raise
        except (Exception) as e:
            get_logger().error("Unknown error during OpenAI inference: ", e)
            raise openai.APIError from e
        if response is None or len(response["choices"]) == 0:
            raise openai.APIError
        else:
            resp = response["choices"][0]['message']['content']
            finish_reason = response["choices"][0]["finish_reason"]
            # usage = response.get("usage")
            get_logger().debug(f"\nAI response:\n{resp}")
            get_logger().debug("Full_response", artifact=response)

            if get_settings().config.verbosity_level >= 2:
                get_logger().info(f"\nAI response:\n{resp}")

        return resp, finish_reason
litellm client 2023-09-09 17:35:45 +03:00			`import os`
Initial commit - PR-Agent OSS release 2023-07-06 00:21:08 +03:00
Update ai_handler.py 2023-11-28 23:07:46 +09:00			`import boto3`
Default timeout for AI is now 180s, configurable 2023-08-07 13:26:28 +03:00			`import litellm`
Initial commit - PR-Agent OSS release 2023-07-06 00:21:08 +03:00			`import openai`
Default timeout for AI is now 180s, configurable 2023-08-07 13:26:28 +03:00			`from litellm import acompletion`
Refactor litellm_ai_handler.py and update requirements.txt - Replace retry library with tenacity for better exception handling - Add verbosity level checks for logging prompts and AI responses - Add support for HuggingFace API base and repetition penalty in chat completion - Update requirements.txt with tenacity library 2024-03-06 12:13:54 +02:00			`from tenacity import retry, retry_if_exception_type, stop_after_attempt`
Merge branch 'base-ai-handler' into abstract-BaseAiHandler 2023-12-14 07:44:13 +08:00			`from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler`
Support context aware settings (for each incoming request), support override of settings, refactor CLI to use pr_agent.py 2023-08-01 14:43:26 +03:00			`from pr_agent.config_loader import get_settings`
Refactor logging system to use custom logger across the codebase 2023-10-16 14:56:00 +03:00			`from pr_agent.log import get_logger`

Default timeout for AI is now 180s, configurable 2023-08-07 13:26:28 +03:00			`OPENAI_RETRIES = 5`

Initial commit - PR-Agent OSS release 2023-07-06 00:21:08 +03:00
Merge branch 'base-ai-handler' into abstract-BaseAiHandler 2023-12-14 07:44:13 +08:00			`class LiteLLMAIHandler(BaseAiHandler):`
docstring 2023-07-20 10:51:21 +03:00			`"""`
			`This class handles interactions with the OpenAI API for chat completions.`
			`It initializes the API key and other settings from a configuration file,`
			`and provides a method for performing chat completions using the OpenAI ChatCompletion API.`
			`"""`

Initial commit - PR-Agent OSS release 2023-07-06 00:21:08 +03:00			`def __init__(self):`
docstring 2023-07-20 10:51:21 +03:00			`"""`
			`Initializes the OpenAI API key and other settings from a configuration file.`
			`Raises a ValueError if the OpenAI key is missing.`
			`"""`
Support Google's Vertex AI 2023-11-07 09:13:08 +00:00			`self.azure = False`
support Amazon Bedrock 2023-11-28 20:11:40 +09:00			`self.aws_bedrock_client = None`
Refactor litellm_ai_handler.py and update requirements.txt - Replace retry library with tenacity for better exception handling - Add verbosity level checks for logging prompts and AI responses - Add support for HuggingFace API base and repetition penalty in chat completion - Update requirements.txt with tenacity library 2024-03-06 12:13:54 +02:00			`self.api_base = None`
			`self.repetition_penalty = None`
Support Google's Vertex AI 2023-11-07 09:13:08 +00:00			`if get_settings().get("OPENAI.KEY", None):`
Support context aware settings (for each incoming request), support override of settings, refactor CLI to use pr_agent.py 2023-08-01 14:43:26 +03:00			`openai.api_key = get_settings().openai.key`
bug fixes and updates 2023-08-03 16:05:46 -07:00			`litellm.openai_key = get_settings().openai.key`
Support Google's Vertex AI 2023-11-07 09:13:08 +00:00			`if get_settings().get("litellm.use_client"):`
			`litellm_token = get_settings().get("litellm.LITELLM_TOKEN")`
			`assert litellm_token, "LITELLM_TOKEN is required"`
			`os.environ["LITELLM_TOKEN"] = litellm_token`
			`litellm.use_client = True`
			`if get_settings().get("OPENAI.ORG", None):`
			`litellm.organization = get_settings().openai.org`
			`if get_settings().get("OPENAI.API_TYPE", None):`
			`if get_settings().openai.api_type == "azure":`
			`self.azure = True`
			`litellm.azure_key = get_settings().openai.key`
			`if get_settings().get("OPENAI.API_VERSION", None):`
			`litellm.api_version = get_settings().openai.api_version`
			`if get_settings().get("OPENAI.API_BASE", None):`
			`litellm.api_base = get_settings().openai.api_base`
			`if get_settings().get("ANTHROPIC.KEY", None):`
			`litellm.anthropic_key = get_settings().anthropic.key`
			`if get_settings().get("COHERE.KEY", None):`
			`litellm.cohere_key = get_settings().cohere.key`
			`if get_settings().get("REPLICATE.KEY", None):`
			`litellm.replicate_key = get_settings().replicate.key`
			`if get_settings().get("REPLICATE.KEY", None):`
			`litellm.replicate_key = get_settings().replicate.key`
			`if get_settings().get("HUGGINGFACE.KEY", None):`
			`litellm.huggingface_key = get_settings().huggingface.key`
Refactor litellm_ai_handler.py and update requirements.txt - Replace retry library with tenacity for better exception handling - Add verbosity level checks for logging prompts and AI responses - Add support for HuggingFace API base and repetition penalty in chat completion - Update requirements.txt with tenacity library 2024-03-06 12:13:54 +02:00			`if get_settings().get("HUGGINGFACE.API_BASE", None) and 'huggingface' in get_settings().config.model:`
			`litellm.api_base = get_settings().huggingface.api_base`
			`self.api_base = get_settings().huggingface.api_base`
			`if get_settings().get("HUGGINGFACE.REPITITION_PENALTY", None):`
			`self.repetition_penalty = float(get_settings().huggingface.repetition_penalty)`
Support Google's Vertex AI 2023-11-07 09:13:08 +00:00			`if get_settings().get("VERTEXAI.VERTEX_PROJECT", None):`
			`litellm.vertex_project = get_settings().vertexai.vertex_project`
			`litellm.vertex_location = get_settings().get(`
			`"VERTEXAI.VERTEX_LOCATION", None`
			`)`
support Amazon Bedrock 2023-11-28 20:11:40 +09:00			`if get_settings().get("AWS.BEDROCK_REGION", None):`
hard code value 2023-11-28 20:59:21 +09:00			`litellm.AmazonAnthropicConfig.max_tokens_to_sample = 2000`
support Amazon Bedrock 2023-11-28 20:11:40 +09:00			`self.aws_bedrock_client = boto3.client(`
			`service_name="bedrock-runtime",`
			`region_name=get_settings().aws.bedrock_region,`
			`)`
Initial commit - PR-Agent OSS release 2023-07-06 00:21:08 +03:00
Support fallback deployments to accompany fallback models This is useful for example in Azure OpenAI deployments where you have a different deployment per model, so the current fallback implementation doesn't work (still uses the same deployment for each fallback attempt) 2023-08-07 16:17:06 +03:00			`@property`
			`def deployment_id(self):`
			`"""`
			`Returns the deployment ID for the OpenAI API.`
			`"""`
			`return get_settings().get("OPENAI.DEPLOYMENT_ID", None)`

Refactor litellm_ai_handler.py and update requirements.txt - Replace retry library with tenacity for better exception handling - Add verbosity level checks for logging prompts and AI responses - Add support for HuggingFace API base and repetition penalty in chat completion - Update requirements.txt with tenacity library 2024-03-06 12:13:54 +02:00			`@retry(`
			`retry=retry_if_exception_type((openai.APIError, openai.APIConnectionError, openai.Timeout)), # No retry on RateLimitError`
			`stop=stop_after_attempt(OPENAI_RETRIES)`
			`)`
extended improve 2023-08-21 09:07:21 +03:00			`async def chat_completion(self, model: str, system: str, user: str, temperature: float = 0.2):`
Initial commit - PR-Agent OSS release 2023-07-06 00:21:08 +03:00			`try:`
Refactor logging statements for better readability and debugging 2024-02-25 09:58:58 +02:00			`resp, finish_reason = None, None`
Logging 2023-08-07 22:42:53 +03:00			`deployment_id = self.deployment_id`
revert azure 2023-10-06 08:12:11 +03:00			`if self.azure:`
azure 2023-10-06 08:31:31 +03:00			`model = 'azure/' + model`
Refactor logging system to use custom logger across the codebase 2023-10-16 14:56:00 +03:00			`messages = [{"role": "system", "content": system}, {"role": "user", "content": user}]`
support Amazon Bedrock 2023-11-28 20:11:40 +09:00			`kwargs = {`
			`"model": model,`
			`"deployment_id": deployment_id,`
			`"messages": messages,`
			`"temperature": temperature,`
			`"force_timeout": get_settings().config.ai_timeout,`
Refactor litellm_ai_handler.py and update requirements.txt - Replace retry library with tenacity for better exception handling - Add verbosity level checks for logging prompts and AI responses - Add support for HuggingFace API base and repetition penalty in chat completion - Update requirements.txt with tenacity library 2024-03-06 12:13:54 +02:00			`"api_base" : self.api_base,`
support Amazon Bedrock 2023-11-28 20:11:40 +09:00			`}`
			`if self.aws_bedrock_client:`
			`kwargs["aws_bedrock_client"] = self.aws_bedrock_client`
Refactor litellm_ai_handler.py and update requirements.txt - Replace retry library with tenacity for better exception handling - Add verbosity level checks for logging prompts and AI responses - Add support for HuggingFace API base and repetition penalty in chat completion - Update requirements.txt with tenacity library 2024-03-06 12:13:54 +02:00			`if self.repetition_penalty:`
			`kwargs["repetition_penalty"] = self.repetition_penalty`
Refactor logging statements for better readability and debugging 2024-02-24 16:47:23 +02:00
artifact 2024-02-25 10:45:15 +02:00			`get_logger().debug("Prompts", artifact={"system": system, "user": user})`
Refactor litellm_ai_handler.py and update requirements.txt - Replace retry library with tenacity for better exception handling - Add verbosity level checks for logging prompts and AI responses - Add support for HuggingFace API base and repetition penalty in chat completion - Update requirements.txt with tenacity library 2024-03-06 12:13:54 +02:00
			`if get_settings().config.verbosity_level >= 2:`
			`get_logger().info(f"\nSystem prompt:\n{system}")`
			`get_logger().info(f"\nUser prompt:\n{user}")`

support Amazon Bedrock 2023-11-28 20:11:40 +09:00			`response = await acompletion(**kwargs)`
Refactor litellm_ai_handler.py and update requirements.txt - Replace retry library with tenacity for better exception handling - Add verbosity level checks for logging prompts and AI responses - Add support for HuggingFace API base and repetition penalty in chat completion - Update requirements.txt with tenacity library 2024-03-06 12:13:54 +02:00			`except (openai.APIError, openai.Timeout) as e:`
Refactor logging system to use custom logger across the codebase 2023-10-16 14:56:00 +03:00			`get_logger().error("Error during OpenAI inference: ", e)`
Initial commit - PR-Agent OSS release 2023-07-06 00:21:08 +03:00			`raise`
Refactor litellm_ai_handler.py and update requirements.txt - Replace retry library with tenacity for better exception handling - Add verbosity level checks for logging prompts and AI responses - Add support for HuggingFace API base and repetition penalty in chat completion - Update requirements.txt with tenacity library 2024-03-06 12:13:54 +02:00			`except (openai.RateLimitError) as e:`
Refactor logging system to use custom logger across the codebase 2023-10-16 14:56:00 +03:00			`get_logger().error("Rate limit error during OpenAI inference: ", e)`
Retry on rate limit error on OpenAI calls 2023-07-20 15:01:12 +03:00			`raise`
Retry on rate limit error on OpenAI calls 2023-07-20 15:02:34 +03:00			`except (Exception) as e:`
Refactor logging system to use custom logger across the codebase 2023-10-16 14:56:00 +03:00			`get_logger().error("Unknown error during OpenAI inference: ", e)`
Refactor litellm_ai_handler.py and update requirements.txt - Replace retry library with tenacity for better exception handling - Add verbosity level checks for logging prompts and AI responses - Add support for HuggingFace API base and repetition penalty in chat completion - Update requirements.txt with tenacity library 2024-03-06 12:13:54 +02:00			`raise openai.APIError from e`
bug fixes and updates 2023-08-03 16:05:46 -07:00			`if response is None or len(response["choices"]) == 0:`
Refactor litellm_ai_handler.py and update requirements.txt - Replace retry library with tenacity for better exception handling - Add verbosity level checks for logging prompts and AI responses - Add support for HuggingFace API base and repetition penalty in chat completion - Update requirements.txt with tenacity library 2024-03-06 12:13:54 +02:00			`raise openai.APIError`
Refactor logging statements for better readability and debugging 2024-02-25 09:58:58 +02:00			`else:`
			`resp = response["choices"][0]['message']['content']`
			`finish_reason = response["choices"][0]["finish_reason"]`
			`# usage = response.get("usage")`
			`get_logger().debug(f"\nAI response:\n{resp}")`
artifact 2024-02-25 10:45:15 +02:00			`get_logger().debug("Full_response", artifact=response)`
Refactor logging statements for better readability and debugging 2024-02-24 16:47:23 +02:00
Refactor litellm_ai_handler.py and update requirements.txt - Replace retry library with tenacity for better exception handling - Add verbosity level checks for logging prompts and AI responses - Add support for HuggingFace API base and repetition penalty in chat completion - Update requirements.txt with tenacity library 2024-03-06 12:13:54 +02:00			`if get_settings().config.verbosity_level >= 2:`
			`get_logger().info(f"\nAI response:\n{resp}")`

Merge branch 'base-ai-handler' into abstract-BaseAiHandler 2023-12-14 07:44:13 +08:00			`return resp, finish_reason`