Merge pull request #747 from Codium-ai/tr/claude3

Refactor litellm_ai_handler.py and update requirements.txt
This commit is contained in:
Ori Kotek
2024-03-06 12:31:18 +02:00
committed by GitHub
2 changed files with 28 additions and 12 deletions

View File

@ -4,9 +4,7 @@ import boto3
import litellm import litellm
import openai import openai
from litellm import acompletion from litellm import acompletion
# from openai.error import APIError, RateLimitError, Timeout, TryAgain from tenacity import retry, retry_if_exception_type, stop_after_attempt
from openai import APIError, RateLimitError, Timeout
from retry import retry
from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
from pr_agent.config_loader import get_settings from pr_agent.config_loader import get_settings
from pr_agent.log import get_logger from pr_agent.log import get_logger
@ -28,7 +26,8 @@ class LiteLLMAIHandler(BaseAiHandler):
""" """
self.azure = False self.azure = False
self.aws_bedrock_client = None self.aws_bedrock_client = None
self.api_base = None
self.repetition_penalty = None
if get_settings().get("OPENAI.KEY", None): if get_settings().get("OPENAI.KEY", None):
openai.api_key = get_settings().openai.key openai.api_key = get_settings().openai.key
litellm.openai_key = get_settings().openai.key litellm.openai_key = get_settings().openai.key
@ -57,8 +56,11 @@ class LiteLLMAIHandler(BaseAiHandler):
litellm.replicate_key = get_settings().replicate.key litellm.replicate_key = get_settings().replicate.key
if get_settings().get("HUGGINGFACE.KEY", None): if get_settings().get("HUGGINGFACE.KEY", None):
litellm.huggingface_key = get_settings().huggingface.key litellm.huggingface_key = get_settings().huggingface.key
if get_settings().get("HUGGINGFACE.API_BASE", None): if get_settings().get("HUGGINGFACE.API_BASE", None) and 'huggingface' in get_settings().config.model:
litellm.api_base = get_settings().huggingface.api_base litellm.api_base = get_settings().huggingface.api_base
self.api_base = get_settings().huggingface.api_base
if get_settings().get("HUGGINGFACE.REPITITION_PENALTY", None):
self.repetition_penalty = float(get_settings().huggingface.repetition_penalty)
if get_settings().get("VERTEXAI.VERTEX_PROJECT", None): if get_settings().get("VERTEXAI.VERTEX_PROJECT", None):
litellm.vertex_project = get_settings().vertexai.vertex_project litellm.vertex_project = get_settings().vertexai.vertex_project
litellm.vertex_location = get_settings().get( litellm.vertex_location = get_settings().get(
@ -78,8 +80,10 @@ class LiteLLMAIHandler(BaseAiHandler):
""" """
return get_settings().get("OPENAI.DEPLOYMENT_ID", None) return get_settings().get("OPENAI.DEPLOYMENT_ID", None)
@retry(exceptions=(APIError, Timeout, AttributeError, RateLimitError), @retry(
tries=OPENAI_RETRIES, delay=2, backoff=2, jitter=(1, 3)) retry=retry_if_exception_type((openai.APIError, openai.APIConnectionError, openai.Timeout)), # No retry on RateLimitError
stop=stop_after_attempt(OPENAI_RETRIES)
)
async def chat_completion(self, model: str, system: str, user: str, temperature: float = 0.2): async def chat_completion(self, model: str, system: str, user: str, temperature: float = 0.2):
try: try:
resp, finish_reason = None, None resp, finish_reason = None, None
@ -93,23 +97,31 @@ class LiteLLMAIHandler(BaseAiHandler):
"messages": messages, "messages": messages,
"temperature": temperature, "temperature": temperature,
"force_timeout": get_settings().config.ai_timeout, "force_timeout": get_settings().config.ai_timeout,
"api_base" : self.api_base,
} }
if self.aws_bedrock_client: if self.aws_bedrock_client:
kwargs["aws_bedrock_client"] = self.aws_bedrock_client kwargs["aws_bedrock_client"] = self.aws_bedrock_client
if self.repetition_penalty:
kwargs["repetition_penalty"] = self.repetition_penalty
get_logger().debug("Prompts", artifact={"system": system, "user": user}) get_logger().debug("Prompts", artifact={"system": system, "user": user})
if get_settings().config.verbosity_level >= 2:
get_logger().info(f"\nSystem prompt:\n{system}")
get_logger().info(f"\nUser prompt:\n{user}")
response = await acompletion(**kwargs) response = await acompletion(**kwargs)
except (APIError, Timeout) as e: except (openai.APIError, openai.Timeout) as e:
get_logger().error("Error during OpenAI inference: ", e) get_logger().error("Error during OpenAI inference: ", e)
raise raise
except (RateLimitError) as e: except (openai.RateLimitError) as e:
get_logger().error("Rate limit error during OpenAI inference: ", e) get_logger().error("Rate limit error during OpenAI inference: ", e)
raise raise
except (Exception) as e: except (Exception) as e:
get_logger().error("Unknown error during OpenAI inference: ", e) get_logger().error("Unknown error during OpenAI inference: ", e)
raise APIError from e raise openai.APIError from e
if response is None or len(response["choices"]) == 0: if response is None or len(response["choices"]) == 0:
raise APIError raise openai.APIError
else: else:
resp = response["choices"][0]['message']['content'] resp = response["choices"][0]['message']['content']
finish_reason = response["choices"][0]["finish_reason"] finish_reason = response["choices"][0]["finish_reason"]
@ -117,4 +129,7 @@ class LiteLLMAIHandler(BaseAiHandler):
get_logger().debug(f"\nAI response:\n{resp}") get_logger().debug(f"\nAI response:\n{resp}")
get_logger().debug("Full_response", artifact=response) get_logger().debug("Full_response", artifact=response)
if get_settings().config.verbosity_level >= 2:
get_logger().info(f"\nAI response:\n{resp}")
return resp, finish_reason return resp, finish_reason

View File

@ -25,4 +25,5 @@ starlette-context==0.3.6
tiktoken==0.5.2 tiktoken==0.5.2
ujson==5.8.0 ujson==5.8.0
uvicorn==0.22.0 uvicorn==0.22.0
tenacity==8.2.3
# langchain==0.0.349 # uncomment this to support language LangChainOpenAIHandler # langchain==0.0.349 # uncomment this to support language LangChainOpenAIHandler