diff --git a/pr_agent/algo/token_handler.py b/pr_agent/algo/token_handler.py index aab94894..9bc801ed 100644 --- a/pr_agent/algo/token_handler.py +++ b/pr_agent/algo/token_handler.py @@ -1,6 +1,7 @@ from threading import Lock from jinja2 import Environment, StrictUndefined +from math import ceil from tiktoken import encoding_for_model, get_encoding from pr_agent.config_loader import get_settings @@ -114,6 +115,22 @@ class TokenHandler: Returns: The number of tokens in the patch string. """ - if force_accurate and 'claude' in get_settings().config.model.lower() and get_settings(use_context=False).get('anthropic.key'): + encoder_estimate = len(self.encoder.encode(patch, disallowed_special=())) + if not force_accurate: + return encoder_estimate + #else, need to provide an accurate estimation: + + model = get_settings().config.model.lower() + if force_accurate and 'claude' in model and get_settings(use_context=False).get('anthropic.key'): return self.calc_claude_tokens(patch) # API call to Anthropic for accurate token counting for Claude models - return len(self.encoder.encode(patch, disallowed_special=())) \ No newline at end of file + #else: Non Anthropic provided model + + import re + model_is_from_o_series = re.match(r"^o[1-9](-mini|-preview)?$", model) + if ('gpt' in get_settings().config.model.lower() or model_is_from_o_series) and get_settings(use_context=False).get('openai.key'): + return encoder_estimate + #else: Model is neither an OpenAI, nor an Anthropic model - therefore, cannot provide an accurate token count and instead, return a higher number as best effort. + + elbow_factor = 1 + get_settings().get('config.model_token_count_estimate_factor', 0) + get_logger().warning(f"{model}'s expected token count cannot be accurately estimated. Using {elbow_factor} of encoder output as best effort estimate") + return ceil(elbow_factor * encoder_estimate)