From 1b098aea13ba2042f05e2c3e95acdf29d58e709f Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Fri, 8 Sep 2023 09:59:44 -0700
Subject: [PATCH] adding documentation on how to call local hf models

---
 Usage.md                                 | 25 ++++++++++++++++++++++++
 pr_agent/settings/.secrets_template.toml |  3 +++
 2 files changed, 28 insertions(+)

diff --git a/Usage.md b/Usage.md
index 4371ca6b..ad176093 100644
--- a/Usage.md
+++ b/Usage.md
@@ -169,6 +169,31 @@ in the configuration.toml
 
 #### Huggingface
 
+**Local**  
+You can run Huggingface models locally through either [VLLM](https://docs.litellm.ai/docs/providers/vllm) or [Ollama](https://docs.litellm.ai/docs/providers/ollama)
+
+E.g. to use a new Huggingface model locally via Ollama, set:
+```
+[__init__.py]
+MAX_TOKENS = {
+    "model-name-on-ollama": <max_tokens>
+}
+e.g.
+MAX_TOKENS={
+    ...,
+    "llama2": 4096
+}
+
+
+[config] # in configuration.toml
+model = "ollama/llama2"
+
+[ollama] # in .secrets.toml
+api_base = ... # the base url for your huggingface inference endpoint 
+```
+
+**Inference Endpoints**
+
 To use a new model with Huggingface Inference Endpoints, for example, set:
 ```
 [__init__.py]
diff --git a/pr_agent/settings/.secrets_template.toml b/pr_agent/settings/.secrets_template.toml
index d4fef551..1fcf53c2 100644
--- a/pr_agent/settings/.secrets_template.toml
+++ b/pr_agent/settings/.secrets_template.toml
@@ -29,6 +29,9 @@ key = "" # Optional, uncomment if you want to use Replicate. Acquire through htt
 key = "" # Optional, uncomment if you want to use Huggingface Inference API. Acquire through https://huggingface.co/docs/api-inference/quicktour
 api_base = "" # the base url for your huggingface inference endpoint 
 
+[ollama]
+api_base = "" # the base url for your huggingface inference endpoint 
+
 [github]
 # ---- Set the following only for deployment type == "user"
 user_token = ""  # A GitHub personal access token with 'repo' scope.