From 63f1d449ce29b13c9bcb841f36f403b25d7edd9b Mon Sep 17 00:00:00 2001 From: Trung Dinh Date: Wed, 22 Jan 2025 20:32:18 +0700 Subject: [PATCH 1/2] Add support model deepseek/deepseek-reasoner --- pr_agent/algo/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pr_agent/algo/__init__.py b/pr_agent/algo/__init__.py index 7528be1f..40d4fc07 100644 --- a/pr_agent/algo/__init__.py +++ b/pr_agent/algo/__init__.py @@ -30,6 +30,7 @@ MAX_TOKENS = { 'claude-2': 100000, 'command-nightly': 4096, 'deepseek/deepseek-chat': 128000, # 128K, but may be limited by config.max_model_tokens + 'deepseek/deepseek-reasoner': 64000, # 64K, but may be limited by config.max_model_tokens 'replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1': 4096, 'meta-llama/Llama-2-7b-chat-hf': 4096, 'vertex_ai/codechat-bison': 6144, From c2ca79da0d88630848dbc56640ddb0332b15c3e4 Mon Sep 17 00:00:00 2001 From: Trung Dinh Date: Wed, 22 Jan 2025 20:33:43 +0700 Subject: [PATCH 2/2] Combining system and user prompts for o1 series and deepseek-reasoner models --- pr_agent/algo/ai_handlers/litellm_ai_handler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py index 453973f6..50d14b21 100644 --- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py +++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py @@ -200,10 +200,10 @@ class LiteLLMAIHandler(BaseAiHandler): # Currently, model OpenAI o1 series does not support a separate system and user prompts O1_MODEL_PREFIX = 'o1' model_type = model.split('/')[-1] if '/' in model else model - if model_type.startswith(O1_MODEL_PREFIX): + if (model_type.startswith(O1_MODEL_PREFIX)) or ("deepseek-reasoner" in model): user = f"{system}\n\n\n{user}" system = "" - get_logger().info(f"Using O1 model, combining system and user prompts") + get_logger().info(f"Using model {model}, combining system and user prompts") messages = [{"role": "user", "content": user}] kwargs = { "model": model,