From 5971a06d739c9f70211c28c78290efa19cad150c Mon Sep 17 00:00:00 2001 From: mrT23 Date: Thu, 2 Jan 2025 11:16:21 +0200 Subject: [PATCH] docs: improve Ollama and Hugging Face model configuration docs --- docs/docs/usage-guide/changing_a_model.md | 30 ++++------------------- pr_agent/config_loader.py | 2 +- pr_agent/settings/configuration.toml | 2 +- 3 files changed, 7 insertions(+), 27 deletions(-) diff --git a/docs/docs/usage-guide/changing_a_model.md b/docs/docs/usage-guide/changing_a_model.md index c86af096..dc3efc9c 100644 --- a/docs/docs/usage-guide/changing_a_model.md +++ b/docs/docs/usage-guide/changing_a_model.md @@ -30,50 +30,30 @@ model="" # the OpenAI model you've deployed on Azure (e.g. gpt-4o) fallback_models=["..."] ``` -### Hugging Face +### Ollama **Local** You can run Hugging Face models locally through either [VLLM](https://docs.litellm.ai/docs/providers/vllm) or [Ollama](https://docs.litellm.ai/docs/providers/ollama) E.g. to use a new Hugging Face model locally via Ollama, set: ``` -[__init__.py] -MAX_TOKENS = { - "model-name-on-ollama": -} -e.g. -MAX_TOKENS={ - ..., - "ollama/llama2": 4096 -} - - [config] # in configuration.toml model = "ollama/llama2" fallback_models=["ollama/llama2"] +custom_model_max_tokens=... # set the maximal input tokens for the model [ollama] # in .secrets.toml -api_base = ... # the base url for your Hugging Face inference endpoint -# e.g. if running Ollama locally, you may use: -api_base = "http://localhost:11434/" +api_base = "http://localhost:11434" # or whatever port you're running Ollama on ``` -### Inference Endpoints +### Hugging Face Inference Endpoints To use a new model with Hugging Face Inference Endpoints, for example, set: ``` -[__init__.py] -MAX_TOKENS = { - "model-name-on-huggingface": -} -e.g. -MAX_TOKENS={ - ..., - "meta-llama/Llama-2-7b-chat-hf": 4096 -} [config] # in configuration.toml model = "huggingface/meta-llama/Llama-2-7b-chat-hf" fallback_models=["huggingface/meta-llama/Llama-2-7b-chat-hf"] +custom_model_max_tokens=... # set the maximal input tokens for the model [huggingface] # in .secrets.toml key = ... # your Hugging Face api key diff --git a/pr_agent/config_loader.py b/pr_agent/config_loader.py index b13a3ce7..9ae430ca 100644 --- a/pr_agent/config_loader.py +++ b/pr_agent/config_loader.py @@ -12,7 +12,6 @@ global_settings = Dynaconf( envvar_prefix=False, merge_enabled=True, settings_files=[join(current_dir, f) for f in [ - "settings/.secrets.toml", "settings/configuration.toml", "settings/ignore.toml", "settings/language_extensions.toml", @@ -29,6 +28,7 @@ global_settings = Dynaconf( "settings/pr_add_docs.toml", "settings/custom_labels.toml", "settings/pr_help_prompts.toml", + "settings/.secrets.toml", "settings_prod/.secrets.toml", ]] ) diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 3bc91099..90e10366 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -2,6 +2,7 @@ # models model="gpt-4o-2024-11-20" fallback_models=["gpt-4o-2024-08-06"] +custom_model_max_tokens=-1 # for models not in the default list #model_weak="gpt-4o-mini-2024-07-18" # optional, a weaker model to use for some easier tasks # CLI git_provider="github" @@ -21,7 +22,6 @@ skip_keys = [] max_description_tokens = 500 max_commits_tokens = 500 max_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities. -custom_model_max_tokens=-1 # for models not in the default list # patch extension logic patch_extension_skip_types =[".md",".txt"] allow_dynamic_context=true