showing how to use huggingface models

2025-07-17 11:00:39 +08:00 · 2023-09-05 16:23:22 -07:00
parent cd1ae55f4f
commit 5da6a0147c
4 changed files with 34 additions and 1 deletions
--- a/pr_agent/algo/init.py
+++ b/pr_agent/algo/init.py
@ -11,4 +11,5 @@ MAX_TOKENS = {
    'claude-2': 100000,
    'command-nightly': 4096,
    'replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1': 4096,
+    'meta-llama/Llama-2-7b-chat-hf': 4096
 }
--- a/pr_agent/algo/ai_handler.py
+++ b/pr_agent/algo/ai_handler.py
@ -6,7 +6,6 @@ from litellm import acompletion
 from openai.error import APIError, RateLimitError, Timeout, TryAgain
 from retry import retry
 from pr_agent.config_loader import get_settings
-
 OPENAI_RETRIES = 5


@ -46,6 +45,8 @@ class AiHandler:
                litellm.replicate_key = get_settings().replicate.key
            if get_settings().get("HUGGINGFACE.KEY", None):
                litellm.huggingface_key = get_settings().huggingface.key
+                if get_settings().get("HUGGINGFACE.API_BASE", None):
+                    litellm.api_base = get_settings().huggingface.api_base
        except AttributeError as e:
            raise ValueError("OpenAI key is required") from e

--- a/pr_agent/settings/.secrets_template.toml
+++ b/pr_agent/settings/.secrets_template.toml
@ -24,6 +24,11 @@ key = "" # Optional, uncomment if you want to use Cohere. Acquire through https:

 [replicate]
 key = "" # Optional, uncomment if you want to use Replicate. Acquire through https://replicate.com/
+
+[huggingface]
+key = "" # Optional, uncomment if you want to use Huggingface Inference API. Acquire through https://huggingface.co/docs/api-inference/quicktour
+api_base = "" # the base url for your huggingface inference endpoint 
+
 [github]
 # ---- Set the following only for deployment type == "user"
 user_token = ""  # A GitHub personal access token with 'repo' scope.