TokenEncoder

This commit is contained in:
mrT23
2024-04-03 08:42:50 +03:00
parent a13c6e964b
commit 9c3673209d
2 changed files with 20 additions and 7 deletions

View File

@ -1,12 +1,25 @@
from jinja2 import Environment, StrictUndefined
from tiktoken import encoding_for_model, get_encoding
from pr_agent.config_loader import get_settings
from threading import Lock
def get_token_encoder():
return encoding_for_model(get_settings().config.model) if "gpt" in get_settings().config.model else get_encoding(
"cl100k_base")
class TokenEncoder:
_encoder_instance = None
_model = None
_lock = Lock() # Create a lock object
@classmethod
def get_token_encoder(cls):
model = get_settings().config.model
if cls._encoder_instance is None or model != cls._model: # Check without acquiring the lock for performance
with cls._lock: # Lock acquisition to ensure thread safety
if cls._encoder_instance is None or model != cls._model:
cls._model = model
cls._encoder_instance = encoding_for_model(cls._model) if "gpt" in cls._model else get_encoding(
"cl100k_base")
return cls._encoder_instance
class TokenHandler:
"""
@ -31,7 +44,7 @@ class TokenHandler:
- system: The system string.
- user: The user string.
"""
self.encoder = get_token_encoder()
self.encoder = TokenEncoder.get_token_encoder()
if pr is not None:
self.prompt_tokens = self._get_system_user_tokens(pr, self.encoder, vars, system, user)