diff --git a/docs/docs/usage-guide/changing_a_model.md b/docs/docs/usage-guide/changing_a_model.md index e28bf5e4..b0bde3fa 100644 --- a/docs/docs/usage-guide/changing_a_model.md +++ b/docs/docs/usage-guide/changing_a_model.md @@ -234,3 +234,10 @@ To bypass chat templates and temperature controls, set `config.custom_reasoning_ reasoning_efffort= = "medium" # "low", "medium", "high" With the OpenAI models that support reasoning effort (eg: o3-mini), you can specify its reasoning effort via `config` section. The default value is `medium`. You can change it to `high` or `low` based on your usage. + +### Anthropic models + +[config] +enable_claude_extended_thinking = false # Set to true to enable extended thinking feature +extended_thinking_budget_tokens = 2048 +extended_thinking_max_output_tokens = 2048 diff --git a/pr_agent/algo/__init__.py b/pr_agent/algo/__init__.py index 4487b04a..483ea2e0 100644 --- a/pr_agent/algo/__init__.py +++ b/pr_agent/algo/__init__.py @@ -60,6 +60,7 @@ MAX_TOKENS = { 'anthropic/claude-3-5-sonnet-20240620': 100000, 'anthropic/claude-3-5-sonnet-20241022': 100000, 'anthropic/claude-3-7-sonnet-20250219': 200000, + 'claude-3-7-sonnet-20250219': 200000, 'anthropic/claude-3-5-haiku-20241022': 100000, 'bedrock/anthropic.claude-instant-v1': 100000, 'bedrock/anthropic.claude-v2': 100000, @@ -113,3 +114,8 @@ SUPPORT_REASONING_EFFORT_MODELS = [ "o3-mini", "o3-mini-2025-01-31" ] + +CLAUDE_EXTENDED_THINKING_MODELS = [ + "anthropic/claude-3-7-sonnet-20250219", + "claude-3-7-sonnet-20250219" +] diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py index fe367c9e..af9efc87 100644 --- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py +++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py @@ -6,7 +6,7 @@ import requests from litellm import acompletion from tenacity import retry, retry_if_exception_type, stop_after_attempt -from pr_agent.algo import NO_SUPPORT_TEMPERATURE_MODELS, SUPPORT_REASONING_EFFORT_MODELS, USER_MESSAGE_ONLY_MODELS +from pr_agent.algo import CLAUDE_EXTENDED_THINKING_MODELS, NO_SUPPORT_TEMPERATURE_MODELS, SUPPORT_REASONING_EFFORT_MODELS, USER_MESSAGE_ONLY_MODELS from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler from pr_agent.algo.utils import ReasoningEffort, get_version from pr_agent.config_loader import get_settings @@ -109,6 +109,9 @@ class LiteLLMAIHandler(BaseAiHandler): # Models that support reasoning effort self.support_reasoning_models = SUPPORT_REASONING_EFFORT_MODELS + # Models that support extended thinking + self.claude_extended_thinking_models = CLAUDE_EXTENDED_THINKING_MODELS + def prepare_logs(self, response, system, user, resp, finish_reason): response_log = response.dict().copy() response_log['system'] = system @@ -121,6 +124,43 @@ class LiteLLMAIHandler(BaseAiHandler): response_log['main_pr_language'] = 'unknown' return response_log + def _configure_claude_extended_thinking(self, model: str, kwargs: dict) -> dict: + """ + Configure Claude extended thinking parameters if applicable. + + Args: + model (str): The AI model being used + kwargs (dict): The keyword arguments for the model call + + Returns: + dict: Updated kwargs with extended thinking configuration + """ + extended_thinking_budget_tokens = get_settings().config.get("extended_thinking_budget_tokens", 2048) + extended_thinking_max_output_tokens = get_settings().config.get("extended_thinking_max_output_tokens", 2048) + + # Validate extended thinking parameters + if not isinstance(extended_thinking_budget_tokens, int) or extended_thinking_budget_tokens <= 0: + raise ValueError(f"extended_thinking_budget_tokens must be a positive integer, got {extended_thinking_budget_tokens}") + if not isinstance(extended_thinking_max_output_tokens, int) or extended_thinking_max_output_tokens <= 0: + raise ValueError(f"extended_thinking_max_output_tokens must be a positive integer, got {extended_thinking_max_output_tokens}") + if extended_thinking_max_output_tokens < extended_thinking_budget_tokens: + raise ValueError(f"extended_thinking_max_output_tokens ({extended_thinking_max_output_tokens}) must be greater than or equal to extended_thinking_budget_tokens ({extended_thinking_budget_tokens})") + + kwargs["thinking"] = { + "type": "enabled", + "budget_tokens": extended_thinking_budget_tokens + } + if get_settings().config.verbosity_level >= 2: + get_logger().info(f"Adding max output tokens {extended_thinking_max_output_tokens} to model {model}, extended thinking budget tokens: {extended_thinking_budget_tokens}") + kwargs["max_tokens"] = extended_thinking_max_output_tokens + + # temperature may only be set to 1 when thinking is enabled + if get_settings().config.verbosity_level >= 2: + get_logger().info("Temperature may only be set to 1 when thinking is enabled with claude models.") + kwargs["temperature"] = 1 + + return kwargs + def add_litellm_callbacks(selfs, kwargs) -> dict: captured_extra = [] @@ -246,6 +286,10 @@ class LiteLLMAIHandler(BaseAiHandler): get_logger().info(f"Adding reasoning_effort with value {reasoning_effort} to model {model}.") kwargs["reasoning_effort"] = reasoning_effort + # https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking + if (model in self.claude_extended_thinking_models) and get_settings().config.get("enable_claude_extended_thinking", False): + kwargs = self._configure_claude_extended_thinking(model, kwargs) + if get_settings().litellm.get("enable_callbacks", False): kwargs = self.add_litellm_callbacks(kwargs) @@ -268,13 +312,13 @@ class LiteLLMAIHandler(BaseAiHandler): except json.JSONDecodeError as e: raise ValueError(f"LITELLM.EXTRA_HEADERS contains invalid JSON: {str(e)}") kwargs["extra_headers"] = litellm_extra_headers - + get_logger().debug("Prompts", artifact={"system": system, "user": user}) - + if get_settings().config.verbosity_level >= 2: get_logger().info(f"\nSystem prompt:\n{system}") get_logger().info(f"\nUser prompt:\n{user}") - + response = await acompletion(**kwargs) except (openai.APIError, openai.APITimeoutError) as e: get_logger().warning(f"Error during LLM inference: {e}") diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 5bfb6488..7f8daedf 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -60,6 +60,10 @@ reasoning_effort = "medium" # "low", "medium", "high" enable_auto_approval=false # Set to true to enable auto-approval of PRs under certain conditions auto_approve_for_low_review_effort=-1 # -1 to disable, [1-5] to set the threshold for auto-approval auto_approve_for_no_suggestions=false # If true, the PR will be auto-approved if there are no suggestions +# extended thinking for Claude reasoning models +enable_claude_extended_thinking = false # Set to true to enable extended thinking feature +extended_thinking_budget_tokens = 2048 +extended_thinking_max_output_tokens = 2048 [pr_reviewer] # /review #