diff --git a/Usage.md b/Usage.md index 4371ca6b..ad176093 100644 --- a/Usage.md +++ b/Usage.md @@ -169,6 +169,31 @@ in the configuration.toml #### Huggingface +**Local** +You can run Huggingface models locally through either [VLLM](https://docs.litellm.ai/docs/providers/vllm) or [Ollama](https://docs.litellm.ai/docs/providers/ollama) + +E.g. to use a new Huggingface model locally via Ollama, set: +``` +[__init__.py] +MAX_TOKENS = { + "model-name-on-ollama": +} +e.g. +MAX_TOKENS={ + ..., + "llama2": 4096 +} + + +[config] # in configuration.toml +model = "ollama/llama2" + +[ollama] # in .secrets.toml +api_base = ... # the base url for your huggingface inference endpoint +``` + +**Inference Endpoints** + To use a new model with Huggingface Inference Endpoints, for example, set: ``` [__init__.py] diff --git a/pr_agent/settings/.secrets_template.toml b/pr_agent/settings/.secrets_template.toml index d4fef551..1fcf53c2 100644 --- a/pr_agent/settings/.secrets_template.toml +++ b/pr_agent/settings/.secrets_template.toml @@ -29,6 +29,9 @@ key = "" # Optional, uncomment if you want to use Replicate. Acquire through htt key = "" # Optional, uncomment if you want to use Huggingface Inference API. Acquire through https://huggingface.co/docs/api-inference/quicktour api_base = "" # the base url for your huggingface inference endpoint +[ollama] +api_base = "" # the base url for your huggingface inference endpoint + [github] # ---- Set the following only for deployment type == "user" user_token = "" # A GitHub personal access token with 'repo' scope.