Fix tokenizer fallback to use o200k_base instead of cl100k_base

This commit is contained in:
mrT23
2025-04-14 21:15:19 +03:00
parent 4b58a5488f
commit 08bf9593b2

View File

@ -19,8 +19,11 @@ class TokenEncoder:
with cls._lock: # Lock acquisition to ensure thread safety
if cls._encoder_instance is None or model != cls._model:
cls._model = model
cls._encoder_instance = encoding_for_model(cls._model) if "gpt" in cls._model else get_encoding(
"cl100k_base")
try:
cls._encoder_instance = encoding_for_model(cls._model) if "gpt" in cls._model else get_encoding(
"o200k_base")
except:
cls._encoder_instance = get_encoding("o200k_base")
return cls._encoder_instance