mirror of
https://github.com/qodo-ai/pr-agent.git
synced 2025-07-15 18:20:37 +08:00
Merge branch 'main' into zmeir-fallback_deployments
This commit is contained in:
@ -11,7 +11,7 @@ from github import RateLimitExceededException
|
||||
from pr_agent.algo import MAX_TOKENS
|
||||
from pr_agent.algo.git_patch_processing import convert_to_hunks_with_lines_numbers, extend_patch, handle_patch_deletions
|
||||
from pr_agent.algo.language_handler import sort_files_by_main_languages
|
||||
from pr_agent.algo.token_handler import TokenHandler
|
||||
from pr_agent.algo.token_handler import TokenHandler, get_token_encoder
|
||||
from pr_agent.config_loader import get_settings
|
||||
from pr_agent.git_providers.git_provider import FilePatchInfo, GitProvider
|
||||
|
||||
@ -300,3 +300,30 @@ def find_line_number_of_relevant_line_in_file(diff_files: List[FilePatchInfo],
|
||||
absolute_position = start2 + delta - 1
|
||||
break
|
||||
return position, absolute_position
|
||||
|
||||
|
||||
def clip_tokens(text: str, max_tokens: int) -> str:
|
||||
"""
|
||||
Clip the number of tokens in a string to a maximum number of tokens.
|
||||
|
||||
Args:
|
||||
text (str): The string to clip.
|
||||
max_tokens (int): The maximum number of tokens allowed in the string.
|
||||
|
||||
Returns:
|
||||
str: The clipped string.
|
||||
"""
|
||||
# We'll estimate the number of tokens by hueristically assuming 2.5 tokens per word
|
||||
try:
|
||||
encoder = get_token_encoder()
|
||||
num_input_tokens = len(encoder.encode(text))
|
||||
if num_input_tokens <= max_tokens:
|
||||
return text
|
||||
num_chars = len(text)
|
||||
chars_per_token = num_chars / num_input_tokens
|
||||
num_output_chars = int(chars_per_token * max_tokens)
|
||||
clipped_text = text[:num_output_chars]
|
||||
return clipped_text
|
||||
except Exception as e:
|
||||
logging.warning(f"Failed to clip tokens: {e}")
|
||||
return text
|
@ -4,6 +4,10 @@ from tiktoken import encoding_for_model, get_encoding
|
||||
from pr_agent.config_loader import get_settings
|
||||
|
||||
|
||||
def get_token_encoder():
|
||||
return encoding_for_model(get_settings().config.model) if "gpt" in get_settings().config.model else get_encoding(
|
||||
"cl100k_base")
|
||||
|
||||
class TokenHandler:
|
||||
"""
|
||||
A class for handling tokens in the context of a pull request.
|
||||
@ -27,7 +31,7 @@ class TokenHandler:
|
||||
- system: The system string.
|
||||
- user: The user string.
|
||||
"""
|
||||
self.encoder = encoding_for_model(get_settings().config.model) if "gpt" in get_settings().config.model else get_encoding("cl100k_base")
|
||||
self.encoder = get_token_encoder()
|
||||
self.prompt_tokens = self._get_system_user_tokens(pr, self.encoder, vars, system, user)
|
||||
|
||||
def _get_system_user_tokens(self, pr, encoder, vars: dict, system, user):
|
||||
|
@ -8,8 +8,8 @@ import textwrap
|
||||
from datetime import datetime
|
||||
from typing import Any, List
|
||||
|
||||
import yaml
|
||||
from starlette_context import context
|
||||
|
||||
from pr_agent.config_loader import get_settings, global_settings
|
||||
|
||||
|
||||
@ -258,3 +258,26 @@ def update_settings_from_args(args: List[str]) -> List[str]:
|
||||
else:
|
||||
other_args.append(arg)
|
||||
return other_args
|
||||
|
||||
|
||||
def load_yaml(review_text: str) -> dict:
|
||||
review_text = review_text.removeprefix('```yaml').rstrip('`')
|
||||
try:
|
||||
data = yaml.load(review_text, Loader=yaml.SafeLoader)
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to parse AI prediction: {e}")
|
||||
data = try_fix_yaml(review_text)
|
||||
return data
|
||||
|
||||
def try_fix_yaml(review_text: str) -> dict:
|
||||
review_text_lines = review_text.split('\n')
|
||||
data = {}
|
||||
for i in range(1, len(review_text_lines)):
|
||||
review_text_lines_tmp = '\n'.join(review_text_lines[:-i])
|
||||
try:
|
||||
data = yaml.load(review_text_lines_tmp, Loader=yaml.SafeLoader)
|
||||
logging.info(f"Successfully parsed AI prediction after removing {i} lines")
|
||||
break
|
||||
except:
|
||||
pass
|
||||
return data
|
||||
|
Reference in New Issue
Block a user