A (still) hacky way to clip description and commit messages

This commit is contained in:
Ori Kotek
2023-08-09 10:17:58 +03:00
parent ebbe655c40
commit e3274af831
2 changed files with 25 additions and 1 deletions

View File

@ -284,3 +284,25 @@ def find_line_number_of_relevant_line_in_file(diff_files: List[FilePatchInfo],
absolute_position = start2 + delta - 1 absolute_position = start2 + delta - 1
break break
return position, absolute_position return position, absolute_position
def clip_tokens(text: str, max_tokens: int) -> str:
"""
Clip the number of tokens in a string to a maximum number of tokens.
Args:
text (str): The string to clip.
max_tokens (int): The maximum number of tokens allowed in the string.
Returns:
str: The clipped string.
"""
# We'll estimate the number of tokens by hueristically assuming 2.5 tokens per word
words = re.finditer(r'\S+', text)
max_words = max_tokens // 2.5
end_pos = None
for i, token in enumerate(words):
if i == max_words:
end_pos = token.start()
break
return text if end_pos is None else text[:end_pos]

View File

@ -8,7 +8,7 @@ from jinja2 import Environment, StrictUndefined
from pr_agent.algo.ai_handler import AiHandler from pr_agent.algo.ai_handler import AiHandler
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models, \ from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models, \
find_line_number_of_relevant_line_in_file find_line_number_of_relevant_line_in_file, clip_tokens
from pr_agent.algo.token_handler import TokenHandler from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import convert_to_markdown, try_fix_json from pr_agent.algo.utils import convert_to_markdown, try_fix_json
from pr_agent.config_loader import get_settings from pr_agent.config_loader import get_settings
@ -62,6 +62,8 @@ class PRReviewer:
"extra_instructions": get_settings().pr_reviewer.extra_instructions, "extra_instructions": get_settings().pr_reviewer.extra_instructions,
"commit_messages_str": self.git_provider.get_commit_messages(), "commit_messages_str": self.git_provider.get_commit_messages(),
} }
self.vars["description"] = clip_tokens(self.vars["description"], 500)
self.vars["commit_messages_str"] = clip_tokens(self.vars["commit_messages_str"], 500)
self.token_handler = TokenHandler( self.token_handler = TokenHandler(
self.git_provider.pr, self.git_provider.pr,