From 5971a06d739c9f70211c28c78290efa19cad150c Mon Sep 17 00:00:00 2001
From: mrT23 <tal.r@codium.ai>
Date: Thu, 2 Jan 2025 11:16:21 +0200
Subject: [PATCH] docs: improve Ollama and Hugging Face model configuration
 docs

---
 docs/docs/usage-guide/changing_a_model.md | 30 ++++-------------------
 pr_agent/config_loader.py                 |  2 +-
 pr_agent/settings/configuration.toml      |  2 +-
 3 files changed, 7 insertions(+), 27 deletions(-)

diff --git a/docs/docs/usage-guide/changing_a_model.md b/docs/docs/usage-guide/changing_a_model.md
index c86af096..dc3efc9c 100644
--- a/docs/docs/usage-guide/changing_a_model.md
+++ b/docs/docs/usage-guide/changing_a_model.md
@@ -30,50 +30,30 @@ model="" # the OpenAI model you've deployed on Azure (e.g. gpt-4o)
 fallback_models=["..."]
 ```
 
-### Hugging Face
+### Ollama
 
 **Local**
 You can run Hugging Face models locally through either [VLLM](https://docs.litellm.ai/docs/providers/vllm) or [Ollama](https://docs.litellm.ai/docs/providers/ollama)
 
 E.g. to use a new Hugging Face model locally via Ollama, set:
 ```
-[__init__.py]
-MAX_TOKENS = {
-    "model-name-on-ollama": <max_tokens>
-}
-e.g.
-MAX_TOKENS={
-    ...,
-    "ollama/llama2": 4096
-}
-
-
 [config] # in configuration.toml
 model = "ollama/llama2"
 fallback_models=["ollama/llama2"]
+custom_model_max_tokens=... # set the maximal input tokens for the model
 
 [ollama] # in .secrets.toml
-api_base = ... # the base url for your Hugging Face inference endpoint
-# e.g. if running Ollama locally, you may use:
-api_base = "http://localhost:11434/"
+api_base = "http://localhost:11434" # or whatever port you're running Ollama on
 ```
 
-### Inference Endpoints
+### Hugging Face Inference Endpoints
 
 To use a new model with Hugging Face Inference Endpoints, for example, set:
 ```
-[__init__.py]
-MAX_TOKENS = {
-    "model-name-on-huggingface": <max_tokens>
-}
-e.g.
-MAX_TOKENS={
-    ...,
-    "meta-llama/Llama-2-7b-chat-hf": 4096
-}
 [config] # in configuration.toml
 model = "huggingface/meta-llama/Llama-2-7b-chat-hf"
 fallback_models=["huggingface/meta-llama/Llama-2-7b-chat-hf"]
+custom_model_max_tokens=... # set the maximal input tokens for the model
 
 [huggingface] # in .secrets.toml
 key = ... # your Hugging Face api key
diff --git a/pr_agent/config_loader.py b/pr_agent/config_loader.py
index b13a3ce7..9ae430ca 100644
--- a/pr_agent/config_loader.py
+++ b/pr_agent/config_loader.py
@@ -12,7 +12,6 @@ global_settings = Dynaconf(
     envvar_prefix=False,
     merge_enabled=True,
     settings_files=[join(current_dir, f) for f in [
-        "settings/.secrets.toml",
         "settings/configuration.toml",
         "settings/ignore.toml",
         "settings/language_extensions.toml",
@@ -29,6 +28,7 @@ global_settings = Dynaconf(
         "settings/pr_add_docs.toml",
         "settings/custom_labels.toml",
         "settings/pr_help_prompts.toml",
+        "settings/.secrets.toml",
         "settings_prod/.secrets.toml",
     ]]
 )
diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml
index 3bc91099..90e10366 100644
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@@ -2,6 +2,7 @@
 # models
 model="gpt-4o-2024-11-20"
 fallback_models=["gpt-4o-2024-08-06"]
+custom_model_max_tokens=-1 # for models not in the default list
 #model_weak="gpt-4o-mini-2024-07-18" # optional, a weaker model to use for some easier tasks
 # CLI
 git_provider="github"
@@ -21,7 +22,6 @@ skip_keys = []
 max_description_tokens = 500
 max_commits_tokens = 500
 max_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities.
-custom_model_max_tokens=-1 # for models not in the default list
 # patch extension logic
 patch_extension_skip_types =[".md",".txt"]
 allow_dynamic_context=true