diff --git a/Usage.md b/Usage.md index 9cf774e7..4371ca6b 100644 --- a/Usage.md +++ b/Usage.md @@ -149,6 +149,7 @@ TBD #### Changing a model See [here](pr_agent/algo/__init__.py) for the list of available models. +#### Azure To use Azure, set: ``` api_key = "" # your azure api key @@ -166,6 +167,30 @@ model="" # the OpenAI model you've deployed on Azure (e.g. gpt-3.5-turbo) ``` in the configuration.toml +#### Huggingface + +To use a new model with Huggingface Inference Endpoints, for example, set: +``` +[__init__.py] +MAX_TOKENS = { + "model-name-on-huggingface": +} +e.g. +MAX_TOKENS={ + ..., + "meta-llama/Llama-2-7b-chat-hf": 4096 +} +[config] # in configuration.toml +model = "huggingface/meta-llama/Llama-2-7b-chat-hf" + +[huggingface] # in .secrets.toml +key = ... # your huggingface api key +api_base = ... # the base url for your huggingface inference endpoint +``` +(you can obtain a Llama2 key from [here](https://replicate.com/replicate/llama-2-70b-chat/api)) + +#### Replicate + To use Llama2 model with Replicate, for example, set: ``` [config] # in configuration.toml @@ -175,6 +200,7 @@ key = ... ``` (you can obtain a Llama2 key from [here](https://replicate.com/replicate/llama-2-70b-chat/api)) + Also review the [AiHandler](pr_agent/algo/ai_handler.py) file for instruction how to set keys for other models. #### Extra instructions diff --git a/pr_agent/algo/__init__.py b/pr_agent/algo/__init__.py index 798fc6c5..f7865250 100644 --- a/pr_agent/algo/__init__.py +++ b/pr_agent/algo/__init__.py @@ -11,4 +11,5 @@ MAX_TOKENS = { 'claude-2': 100000, 'command-nightly': 4096, 'replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1': 4096, + 'meta-llama/Llama-2-7b-chat-hf': 4096 } diff --git a/pr_agent/algo/ai_handler.py b/pr_agent/algo/ai_handler.py index f5fc6722..b48924d6 100644 --- a/pr_agent/algo/ai_handler.py +++ b/pr_agent/algo/ai_handler.py @@ -6,7 +6,6 @@ from litellm import acompletion from openai.error import APIError, RateLimitError, Timeout, TryAgain from retry import retry from pr_agent.config_loader import get_settings - OPENAI_RETRIES = 5 @@ -46,6 +45,8 @@ class AiHandler: litellm.replicate_key = get_settings().replicate.key if get_settings().get("HUGGINGFACE.KEY", None): litellm.huggingface_key = get_settings().huggingface.key + if get_settings().get("HUGGINGFACE.API_BASE", None): + litellm.api_base = get_settings().huggingface.api_base except AttributeError as e: raise ValueError("OpenAI key is required") from e diff --git a/pr_agent/settings/.secrets_template.toml b/pr_agent/settings/.secrets_template.toml index 0ac75519..d4fef551 100644 --- a/pr_agent/settings/.secrets_template.toml +++ b/pr_agent/settings/.secrets_template.toml @@ -24,6 +24,11 @@ key = "" # Optional, uncomment if you want to use Cohere. Acquire through https: [replicate] key = "" # Optional, uncomment if you want to use Replicate. Acquire through https://replicate.com/ + +[huggingface] +key = "" # Optional, uncomment if you want to use Huggingface Inference API. Acquire through https://huggingface.co/docs/api-inference/quicktour +api_base = "" # the base url for your huggingface inference endpoint + [github] # ---- Set the following only for deployment type == "user" user_token = "" # A GitHub personal access token with 'repo' scope.