mirror of
https://github.com/qodo-ai/pr-agent.git
synced 2025-07-02 11:50:37 +08:00
showing how to use huggingface models
This commit is contained in:
26
Usage.md
26
Usage.md
@ -149,6 +149,7 @@ TBD
|
|||||||
#### Changing a model
|
#### Changing a model
|
||||||
See [here](pr_agent/algo/__init__.py) for the list of available models.
|
See [here](pr_agent/algo/__init__.py) for the list of available models.
|
||||||
|
|
||||||
|
#### Azure
|
||||||
To use Azure, set:
|
To use Azure, set:
|
||||||
```
|
```
|
||||||
api_key = "" # your azure api key
|
api_key = "" # your azure api key
|
||||||
@ -166,6 +167,30 @@ model="" # the OpenAI model you've deployed on Azure (e.g. gpt-3.5-turbo)
|
|||||||
```
|
```
|
||||||
in the configuration.toml
|
in the configuration.toml
|
||||||
|
|
||||||
|
#### Huggingface
|
||||||
|
|
||||||
|
To use a new model with Huggingface Inference Endpoints, for example, set:
|
||||||
|
```
|
||||||
|
[__init__.py]
|
||||||
|
MAX_TOKENS = {
|
||||||
|
"model-name-on-huggingface": <max_tokens>
|
||||||
|
}
|
||||||
|
e.g.
|
||||||
|
MAX_TOKENS={
|
||||||
|
...,
|
||||||
|
"meta-llama/Llama-2-7b-chat-hf": 4096
|
||||||
|
}
|
||||||
|
[config] # in configuration.toml
|
||||||
|
model = "huggingface/meta-llama/Llama-2-7b-chat-hf"
|
||||||
|
|
||||||
|
[huggingface] # in .secrets.toml
|
||||||
|
key = ... # your huggingface api key
|
||||||
|
api_base = ... # the base url for your huggingface inference endpoint
|
||||||
|
```
|
||||||
|
(you can obtain a Llama2 key from [here](https://replicate.com/replicate/llama-2-70b-chat/api))
|
||||||
|
|
||||||
|
#### Replicate
|
||||||
|
|
||||||
To use Llama2 model with Replicate, for example, set:
|
To use Llama2 model with Replicate, for example, set:
|
||||||
```
|
```
|
||||||
[config] # in configuration.toml
|
[config] # in configuration.toml
|
||||||
@ -175,6 +200,7 @@ key = ...
|
|||||||
```
|
```
|
||||||
(you can obtain a Llama2 key from [here](https://replicate.com/replicate/llama-2-70b-chat/api))
|
(you can obtain a Llama2 key from [here](https://replicate.com/replicate/llama-2-70b-chat/api))
|
||||||
|
|
||||||
|
|
||||||
Also review the [AiHandler](pr_agent/algo/ai_handler.py) file for instruction how to set keys for other models.
|
Also review the [AiHandler](pr_agent/algo/ai_handler.py) file for instruction how to set keys for other models.
|
||||||
|
|
||||||
#### Extra instructions
|
#### Extra instructions
|
||||||
|
@ -11,4 +11,5 @@ MAX_TOKENS = {
|
|||||||
'claude-2': 100000,
|
'claude-2': 100000,
|
||||||
'command-nightly': 4096,
|
'command-nightly': 4096,
|
||||||
'replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1': 4096,
|
'replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1': 4096,
|
||||||
|
'meta-llama/Llama-2-7b-chat-hf': 4096
|
||||||
}
|
}
|
||||||
|
@ -6,7 +6,6 @@ from litellm import acompletion
|
|||||||
from openai.error import APIError, RateLimitError, Timeout, TryAgain
|
from openai.error import APIError, RateLimitError, Timeout, TryAgain
|
||||||
from retry import retry
|
from retry import retry
|
||||||
from pr_agent.config_loader import get_settings
|
from pr_agent.config_loader import get_settings
|
||||||
|
|
||||||
OPENAI_RETRIES = 5
|
OPENAI_RETRIES = 5
|
||||||
|
|
||||||
|
|
||||||
@ -46,6 +45,8 @@ class AiHandler:
|
|||||||
litellm.replicate_key = get_settings().replicate.key
|
litellm.replicate_key = get_settings().replicate.key
|
||||||
if get_settings().get("HUGGINGFACE.KEY", None):
|
if get_settings().get("HUGGINGFACE.KEY", None):
|
||||||
litellm.huggingface_key = get_settings().huggingface.key
|
litellm.huggingface_key = get_settings().huggingface.key
|
||||||
|
if get_settings().get("HUGGINGFACE.API_BASE", None):
|
||||||
|
litellm.api_base = get_settings().huggingface.api_base
|
||||||
except AttributeError as e:
|
except AttributeError as e:
|
||||||
raise ValueError("OpenAI key is required") from e
|
raise ValueError("OpenAI key is required") from e
|
||||||
|
|
||||||
|
@ -24,6 +24,11 @@ key = "" # Optional, uncomment if you want to use Cohere. Acquire through https:
|
|||||||
|
|
||||||
[replicate]
|
[replicate]
|
||||||
key = "" # Optional, uncomment if you want to use Replicate. Acquire through https://replicate.com/
|
key = "" # Optional, uncomment if you want to use Replicate. Acquire through https://replicate.com/
|
||||||
|
|
||||||
|
[huggingface]
|
||||||
|
key = "" # Optional, uncomment if you want to use Huggingface Inference API. Acquire through https://huggingface.co/docs/api-inference/quicktour
|
||||||
|
api_base = "" # the base url for your huggingface inference endpoint
|
||||||
|
|
||||||
[github]
|
[github]
|
||||||
# ---- Set the following only for deployment type == "user"
|
# ---- Set the following only for deployment type == "user"
|
||||||
user_token = "" # A GitHub personal access token with 'repo' scope.
|
user_token = "" # A GitHub personal access token with 'repo' scope.
|
||||||
|
Reference in New Issue
Block a user