mirror of
https://github.com/qodo-ai/pr-agent.git
synced 2025-07-05 05:10:38 +08:00
docs: improve Ollama and Hugging Face model configuration docs
This commit is contained in:
@ -30,50 +30,30 @@ model="" # the OpenAI model you've deployed on Azure (e.g. gpt-4o)
|
|||||||
fallback_models=["..."]
|
fallback_models=["..."]
|
||||||
```
|
```
|
||||||
|
|
||||||
### Hugging Face
|
### Ollama
|
||||||
|
|
||||||
**Local**
|
**Local**
|
||||||
You can run Hugging Face models locally through either [VLLM](https://docs.litellm.ai/docs/providers/vllm) or [Ollama](https://docs.litellm.ai/docs/providers/ollama)
|
You can run Hugging Face models locally through either [VLLM](https://docs.litellm.ai/docs/providers/vllm) or [Ollama](https://docs.litellm.ai/docs/providers/ollama)
|
||||||
|
|
||||||
E.g. to use a new Hugging Face model locally via Ollama, set:
|
E.g. to use a new Hugging Face model locally via Ollama, set:
|
||||||
```
|
```
|
||||||
[__init__.py]
|
|
||||||
MAX_TOKENS = {
|
|
||||||
"model-name-on-ollama": <max_tokens>
|
|
||||||
}
|
|
||||||
e.g.
|
|
||||||
MAX_TOKENS={
|
|
||||||
...,
|
|
||||||
"ollama/llama2": 4096
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
[config] # in configuration.toml
|
[config] # in configuration.toml
|
||||||
model = "ollama/llama2"
|
model = "ollama/llama2"
|
||||||
fallback_models=["ollama/llama2"]
|
fallback_models=["ollama/llama2"]
|
||||||
|
custom_model_max_tokens=... # set the maximal input tokens for the model
|
||||||
|
|
||||||
[ollama] # in .secrets.toml
|
[ollama] # in .secrets.toml
|
||||||
api_base = ... # the base url for your Hugging Face inference endpoint
|
api_base = "http://localhost:11434" # or whatever port you're running Ollama on
|
||||||
# e.g. if running Ollama locally, you may use:
|
|
||||||
api_base = "http://localhost:11434/"
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Inference Endpoints
|
### Hugging Face Inference Endpoints
|
||||||
|
|
||||||
To use a new model with Hugging Face Inference Endpoints, for example, set:
|
To use a new model with Hugging Face Inference Endpoints, for example, set:
|
||||||
```
|
```
|
||||||
[__init__.py]
|
|
||||||
MAX_TOKENS = {
|
|
||||||
"model-name-on-huggingface": <max_tokens>
|
|
||||||
}
|
|
||||||
e.g.
|
|
||||||
MAX_TOKENS={
|
|
||||||
...,
|
|
||||||
"meta-llama/Llama-2-7b-chat-hf": 4096
|
|
||||||
}
|
|
||||||
[config] # in configuration.toml
|
[config] # in configuration.toml
|
||||||
model = "huggingface/meta-llama/Llama-2-7b-chat-hf"
|
model = "huggingface/meta-llama/Llama-2-7b-chat-hf"
|
||||||
fallback_models=["huggingface/meta-llama/Llama-2-7b-chat-hf"]
|
fallback_models=["huggingface/meta-llama/Llama-2-7b-chat-hf"]
|
||||||
|
custom_model_max_tokens=... # set the maximal input tokens for the model
|
||||||
|
|
||||||
[huggingface] # in .secrets.toml
|
[huggingface] # in .secrets.toml
|
||||||
key = ... # your Hugging Face api key
|
key = ... # your Hugging Face api key
|
||||||
|
@ -12,7 +12,6 @@ global_settings = Dynaconf(
|
|||||||
envvar_prefix=False,
|
envvar_prefix=False,
|
||||||
merge_enabled=True,
|
merge_enabled=True,
|
||||||
settings_files=[join(current_dir, f) for f in [
|
settings_files=[join(current_dir, f) for f in [
|
||||||
"settings/.secrets.toml",
|
|
||||||
"settings/configuration.toml",
|
"settings/configuration.toml",
|
||||||
"settings/ignore.toml",
|
"settings/ignore.toml",
|
||||||
"settings/language_extensions.toml",
|
"settings/language_extensions.toml",
|
||||||
@ -29,6 +28,7 @@ global_settings = Dynaconf(
|
|||||||
"settings/pr_add_docs.toml",
|
"settings/pr_add_docs.toml",
|
||||||
"settings/custom_labels.toml",
|
"settings/custom_labels.toml",
|
||||||
"settings/pr_help_prompts.toml",
|
"settings/pr_help_prompts.toml",
|
||||||
|
"settings/.secrets.toml",
|
||||||
"settings_prod/.secrets.toml",
|
"settings_prod/.secrets.toml",
|
||||||
]]
|
]]
|
||||||
)
|
)
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
# models
|
# models
|
||||||
model="gpt-4o-2024-11-20"
|
model="gpt-4o-2024-11-20"
|
||||||
fallback_models=["gpt-4o-2024-08-06"]
|
fallback_models=["gpt-4o-2024-08-06"]
|
||||||
|
custom_model_max_tokens=-1 # for models not in the default list
|
||||||
#model_weak="gpt-4o-mini-2024-07-18" # optional, a weaker model to use for some easier tasks
|
#model_weak="gpt-4o-mini-2024-07-18" # optional, a weaker model to use for some easier tasks
|
||||||
# CLI
|
# CLI
|
||||||
git_provider="github"
|
git_provider="github"
|
||||||
@ -21,7 +22,6 @@ skip_keys = []
|
|||||||
max_description_tokens = 500
|
max_description_tokens = 500
|
||||||
max_commits_tokens = 500
|
max_commits_tokens = 500
|
||||||
max_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities.
|
max_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities.
|
||||||
custom_model_max_tokens=-1 # for models not in the default list
|
|
||||||
# patch extension logic
|
# patch extension logic
|
||||||
patch_extension_skip_types =[".md",".txt"]
|
patch_extension_skip_types =[".md",".txt"]
|
||||||
allow_dynamic_context=true
|
allow_dynamic_context=true
|
||||||
|
Reference in New Issue
Block a user