2024-06-30 17:33:48 +03:00
import asyncio
2023-07-13 17:24:56 +03:00
import copy
2023-09-04 12:11:39 -04:00
import re
2024-12-29 11:37:05 +02:00
import traceback
2023-12-17 16:52:03 +02:00
from functools import partial
2023-08-01 14:43:26 +03:00
from typing import List , Tuple
2023-07-13 17:24:56 +03:00
2024-06-30 18:38:06 +03:00
import yaml
2023-07-13 17:24:56 +03:00
from jinja2 import Environment , StrictUndefined
2023-12-12 23:03:38 +08:00
from pr_agent . algo . ai_handlers . base_ai_handler import BaseAiHandler
2023-12-14 09:00:14 +02:00
from pr_agent . algo . ai_handlers . litellm_ai_handler import LiteLLMAIHandler
2024-10-30 09:56:03 +09:00
from pr_agent . algo . pr_processing import ( OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD ,
get_pr_diff ,
get_pr_diff_multiple_patchs ,
retry_with_fallback_models )
2023-07-13 17:24:56 +03:00
from pr_agent . algo . token_handler import TokenHandler
2024-10-30 09:56:03 +09:00
from pr_agent . algo . utils import ( ModelType , PRDescriptionHeader , clip_tokens ,
get_max_tokens , get_user_labels , load_yaml ,
set_custom_labels ,
show_relevant_configurations )
2023-08-01 14:43:26 +03:00
from pr_agent . config_loader import get_settings
2024-10-30 09:56:03 +09:00
from pr_agent . git_providers import ( GithubProvider , get_git_provider ,
get_git_provider_with_context )
2023-07-13 17:24:56 +03:00
from pr_agent . git_providers . git_provider import get_main_pr_language
2023-10-16 14:56:00 +03:00
from pr_agent . log import get_logger
2024-01-07 09:56:09 +02:00
from pr_agent . servers . help import HelpMessage
2024-10-30 09:56:03 +09:00
from pr_agent . tools . ticket_pr_compliance_check import (
extract_and_cache_pr_tickets , extract_ticket_links_from_pr_description ,
extract_tickets )
2023-07-13 17:24:56 +03:00
class PRDescription :
2023-12-14 09:00:14 +02:00
def __init__ ( self , pr_url : str , args : list = None ,
2023-12-17 16:52:03 +02:00
ai_handler : partial [ BaseAiHandler , ] = LiteLLMAIHandler ) :
2023-07-24 12:14:53 +03:00
"""
2023-08-01 14:43:26 +03:00
Initialize the PRDescription object with the necessary attributes and objects for generating a PR description
using an AI model .
2023-07-24 12:14:53 +03:00
Args :
pr_url ( str ) : The URL of the pull request .
2023-07-27 17:42:50 +03:00
args ( list , optional ) : List of arguments passed to the PRDescription class . Defaults to None .
2023-07-24 12:14:53 +03:00
"""
2023-07-24 12:41:00 +03:00
# Initialize the git provider and main PR language
2024-06-19 09:36:37 +03:00
self . git_provider = get_git_provider_with_context ( pr_url )
2023-07-13 17:24:56 +03:00
self . main_pr_language = get_main_pr_language (
self . git_provider . get_languages ( ) , self . git_provider . get_files ( )
)
2023-09-21 21:29:41 +03:00
self . pr_id = self . git_provider . get_pr_id ( )
2024-10-10 08:48:37 +03:00
self . keys_fix = [ " filename: " , " language: " , " changes_summary: " , " changes_title: " , " description: " , " title: " ]
2023-08-01 15:15:59 +03:00
2023-12-06 16:32:53 +02:00
if get_settings ( ) . pr_description . enable_semantic_files_types and not self . git_provider . is_supported (
" gfm_markdown " ) :
2024-02-24 16:47:23 +02:00
get_logger ( ) . debug ( f " Disabling semantic files types for { self . pr_id } , gfm_markdown not supported. " )
2023-12-06 16:32:53 +02:00
get_settings ( ) . pr_description . enable_semantic_files_types = False
2023-07-24 12:41:00 +03:00
# Initialize the AI handler
2023-12-17 16:52:03 +02:00
self . ai_handler = ai_handler ( )
2024-03-16 13:47:44 +02:00
self . ai_handler . main_pr_language = self . main_pr_language
2024-02-24 16:53:18 +07:00
2023-07-24 12:41:00 +03:00
# Initialize the variables dictionary
2024-12-29 11:37:05 +02:00
self . COLLAPSIBLE_FILE_LIST_THRESHOLD = get_settings ( ) . pr_description . get ( " collapsible_file_list_threshold " , 8 )
2025-06-24 17:28:23 +03:00
enable_pr_diagram = get_settings ( ) . pr_description . get ( " enable_pr_diagram " , False ) and self . git_provider . is_supported ( " gfm_markdown " ) # github and gitlab support gfm_markdown
2023-07-13 17:24:56 +03:00
self . vars = {
" title " : self . git_provider . pr . title ,
" branch " : self . git_provider . get_pr_branch ( ) ,
2023-08-30 23:05:41 +03:00
" description " : self . git_provider . get_pr_description ( full = False ) ,
2023-07-13 17:24:56 +03:00
" language " : self . main_pr_language ,
" diff " : " " , # empty diff for initial calculation
2023-08-01 14:43:26 +03:00
" extra_instructions " : get_settings ( ) . pr_description . extra_instructions ,
2023-10-23 16:29:33 +03:00
" commit_messages_str " : self . git_provider . get_commit_messages ( ) ,
2023-10-29 11:40:36 +02:00
" enable_custom_labels " : get_settings ( ) . config . enable_custom_labels ,
2023-11-12 16:37:53 +02:00
" custom_labels_class " : " " , # will be filled if necessary in 'set_custom_labels' function
2023-12-04 18:22:35 +02:00
" enable_semantic_files_types " : get_settings ( ) . pr_description . enable_semantic_files_types ,
2024-10-10 08:48:37 +03:00
" related_tickets " : " " ,
2025-01-02 12:25:42 +02:00
" include_file_summary_changes " : len ( self . git_provider . get_diff_files ( ) ) < = self . COLLAPSIBLE_FILE_LIST_THRESHOLD ,
2025-05-25 12:48:13 +09:00
" duplicate_prompt_examples " : get_settings ( ) . config . get ( " duplicate_prompt_examples " , False ) ,
2025-06-24 17:28:23 +03:00
" enable_pr_diagram " : enable_pr_diagram ,
2023-07-13 17:24:56 +03:00
}
2024-06-26 20:11:20 +03:00
2023-08-17 15:40:24 +03:00
self . user_description = self . git_provider . get_user_description ( )
2024-02-24 16:53:18 +07:00
2023-07-24 12:41:00 +03:00
# Initialize the token handler
2023-07-24 12:14:53 +03:00
self . token_handler = TokenHandler (
self . git_provider . pr ,
self . vars ,
2023-08-01 14:43:26 +03:00
get_settings ( ) . pr_description_prompt . system ,
get_settings ( ) . pr_description_prompt . user ,
2023-07-24 12:14:53 +03:00
)
2024-02-24 16:53:18 +07:00
2023-07-24 12:41:00 +03:00
# Initialize patches_diff and prediction attributes
2023-07-13 17:24:56 +03:00
self . patches_diff = None
self . prediction = None
2024-02-24 16:47:23 +02:00
self . file_label_dict = None
2023-07-13 17:24:56 +03:00
2023-08-01 14:43:26 +03:00
async def run ( self ) :
2023-09-20 07:39:56 +03:00
try :
2024-02-24 16:47:23 +02:00
get_logger ( ) . info ( f " Generating a PR description for pr_id: { self . pr_id } " )
relevant_configs = { ' pr_description ' : dict ( get_settings ( ) . pr_description ) ,
' config ' : dict ( get_settings ( ) . config ) }
2025-05-09 11:12:04 +09:00
get_logger ( ) . debug ( " Relevant configs " , artifact = relevant_configs )
2024-06-19 11:51:10 +03:00
if get_settings ( ) . config . publish_output and not get_settings ( ) . config . get ( ' is_auto_command ' , False ) :
2023-09-23 08:08:46 -04:00
self . git_provider . publish_comment ( " Preparing PR description... " , is_temporary = True )
2023-09-04 12:11:39 -04:00
2024-10-10 08:48:37 +03:00
# ticket extraction if exists
await extract_and_cache_pr_tickets ( self . git_provider , self . vars )
2024-12-08 21:51:29 +07:00
await retry_with_fallback_models ( self . _prepare_prediction , ModelType . WEAK )
2023-09-04 12:11:39 -04:00
2023-09-20 07:39:56 +03:00
if self . prediction :
self . _prepare_data ( )
else :
2024-08-14 08:02:34 +03:00
get_logger ( ) . warning ( f " Empty prediction, PR: { self . pr_id } " )
2024-01-18 17:01:25 +02:00
self . git_provider . remove_initial_comment ( )
2023-09-20 07:39:56 +03:00
return None
2023-09-04 12:11:39 -04:00
2023-12-06 15:29:45 +02:00
if get_settings ( ) . pr_description . enable_semantic_files_types :
2024-02-24 16:47:23 +02:00
self . file_label_dict = self . _prepare_file_labels ( )
2023-12-06 12:30:51 +02:00
2024-02-25 16:23:44 +02:00
pr_labels , pr_file_changes = [ ] , [ ]
2023-09-20 07:39:56 +03:00
if get_settings ( ) . pr_description . publish_labels :
pr_labels = self . _prepare_labels ( )
2024-12-29 11:37:05 +02:00
else :
get_logger ( ) . debug ( f " Publishing labels disabled " )
2023-09-07 12:10:33 +03:00
2023-09-20 07:39:56 +03:00
if get_settings ( ) . pr_description . use_description_markers :
2024-02-25 16:23:44 +02:00
pr_title , pr_body , changes_walkthrough , pr_file_changes = self . _prepare_pr_answer_with_markers ( )
2023-07-17 08:18:42 +03:00
else :
2024-02-25 16:23:44 +02:00
pr_title , pr_body , changes_walkthrough , pr_file_changes = self . _prepare_pr_answer ( )
2024-06-23 21:17:34 +03:00
if not self . git_provider . is_supported (
" publish_file_comments " ) or not get_settings ( ) . pr_description . inline_file_summary :
pr_body + = " \n \n " + changes_walkthrough
2024-02-25 10:45:15 +02:00
get_logger ( ) . debug ( " PR output " , artifact = { " title " : pr_title , " body " : pr_body } )
2024-01-07 09:56:09 +02:00
# Add help text if gfm_markdown is supported
if self . git_provider . is_supported ( " gfm_markdown " ) and get_settings ( ) . pr_description . enable_help_text :
2024-02-20 08:06:33 +02:00
pr_body + = " <hr> \n \n <details> <summary><strong>✨ Describe tool usage guide:</strong></summary><hr> \n \n "
2024-01-07 09:56:09 +02:00
pr_body + = HelpMessage . get_describe_usage_guide ( )
2024-01-08 09:18:46 +02:00
pr_body + = " \n </details> \n "
2025-01-23 12:05:07 +02:00
elif get_settings ( ) . pr_description . enable_help_comment and self . git_provider . is_supported ( " gfm_markdown " ) :
if isinstance ( self . git_provider , GithubProvider ) :
pr_body + = ( ' \n \n ___ \n \n > <details> <summary> Need help?</summary><li>Type <code>/help how to ...</code> '
' in the comments thread for any questions about PR-Agent usage.</li><li>Check out the '
' <a href= " https://qodo-merge-docs.qodo.ai/usage-guide/ " >documentation</a> '
' for more information.</li></details> ' )
else : # gitlab
2025-01-26 16:54:17 +02:00
pr_body + = ( " \n \n ___ \n \n <details><summary>Need help?</summary>- Type <code>/help how to ...</code> in the comments "
2025-01-23 12:05:07 +02:00
" thread for any questions about PR-Agent usage.<br>- Check out the "
" <a href= ' https://qodo-merge-docs.qodo.ai/usage-guide/ ' >documentation</a> for more information.</details> " )
# elif get_settings().pr_description.enable_help_comment:
# pr_body += '\n\n___\n\n> 💡 **PR-Agent usage**: Comment `/help "your question"` on any pull request to receive relevant information'
2023-09-20 07:39:56 +03:00
2024-05-18 13:09:50 +03:00
# Output the relevant configurations if enabled
if get_settings ( ) . get ( ' config ' , { } ) . get ( ' output_relevant_configurations ' , False ) :
pr_body + = show_relevant_configurations ( relevant_section = ' pr_description ' )
2023-09-20 07:39:56 +03:00
if get_settings ( ) . config . publish_output :
2024-12-29 11:37:05 +02:00
2024-01-25 11:07:43 +02:00
# publish labels
2024-10-21 17:56:15 +03:00
if get_settings ( ) . pr_description . publish_labels and pr_labels and self . git_provider . is_supported ( " get_labels " ) :
2024-03-12 17:02:45 +02:00
original_labels = self . git_provider . get_pr_labels ( update = True )
2024-02-25 10:45:15 +02:00
get_logger ( ) . debug ( f " original labels " , artifact = original_labels )
2024-02-24 16:47:23 +02:00
user_labels = get_user_labels ( original_labels )
2024-03-12 17:02:45 +02:00
new_labels = pr_labels + user_labels
get_logger ( ) . debug ( f " published labels " , artifact = new_labels )
2024-03-12 18:25:42 +02:00
if sorted ( new_labels ) != sorted ( original_labels ) :
2024-03-12 17:02:45 +02:00
self . git_provider . publish_labels ( new_labels )
else :
get_logger ( ) . debug ( f " Labels are the same, not updating " )
2024-01-25 11:07:43 +02:00
# publish description
2023-09-20 07:39:56 +03:00
if get_settings ( ) . pr_description . publish_description_as_comment :
2024-02-24 16:47:23 +02:00
full_markdown_description = f " ## Title \n \n { pr_title } \n \n ___ \n { pr_body } "
2024-04-02 17:52:34 +03:00
if get_settings ( ) . pr_description . publish_description_as_comment_persistent :
self . git_provider . publish_persistent_comment ( full_markdown_description ,
initial_header = " ## Title " ,
update_header = True ,
name = " describe " ,
final_update_message = False , )
else :
self . git_provider . publish_comment ( full_markdown_description )
2023-09-20 07:39:56 +03:00
else :
self . git_provider . publish_description ( pr_title , pr_body )
2023-12-03 10:46:02 +02:00
2024-01-25 11:07:43 +02:00
# publish final update message
2024-12-29 11:37:05 +02:00
if ( get_settings ( ) . pr_description . final_update_message and not get_settings ( ) . config . get ( ' is_auto_command ' , False ) ) :
2023-12-03 10:46:02 +02:00
latest_commit_url = self . git_provider . get_latest_commit_url ( )
if latest_commit_url :
2024-02-25 16:23:44 +02:00
pr_url = self . git_provider . get_pr_url ( )
update_comment = f " **[PR Description]( { pr_url } )** updated to latest commit ( { latest_commit_url } ) "
self . git_provider . publish_comment ( update_comment )
2023-09-20 07:39:56 +03:00
self . git_provider . remove_initial_comment ( )
2024-12-08 11:27:43 +02:00
else :
get_logger ( ) . info ( ' PR description, but not published since publish_output is False. ' )
get_settings ( ) . data = { " artifact " : pr_body }
return
2023-09-20 07:39:56 +03:00
except Exception as e :
2024-12-29 11:37:05 +02:00
get_logger ( ) . error ( f " Error generating PR description { self . pr_id } : { e } " ,
artifact = { " traceback " : traceback . format_exc ( ) } )
2024-06-04 11:19:18 +08:00
2023-07-13 17:24:56 +03:00
return " "
2023-07-24 12:14:53 +03:00
async def _prepare_prediction ( self , model : str ) - > None :
2023-09-04 12:11:39 -04:00
if get_settings ( ) . pr_description . use_description_markers and ' pr_agent: ' not in self . user_description :
2025-05-20 02:30:17 +09:00
get_logger ( ) . info ( " Markers were enabled, but user description does not contain markers. Skipping AI prediction " )
2023-09-04 12:11:39 -04:00
return None
2024-06-26 20:11:20 +03:00
large_pr_handling = get_settings ( ) . pr_description . enable_large_pr_handling and " pr_description_only_files_prompts " in get_settings ( )
2024-12-29 11:37:05 +02:00
output = get_pr_diff ( self . git_provider , self . token_handler , model , large_pr_handling = large_pr_handling , return_remaining_files = True )
2024-06-30 18:38:06 +03:00
if isinstance ( output , tuple ) :
patches_diff , remaining_files_list = output
else :
patches_diff = output
remaining_files_list = [ ]
2024-12-29 11:37:05 +02:00
2024-06-26 20:11:20 +03:00
if not large_pr_handling or patches_diff :
self . patches_diff = patches_diff
if patches_diff :
2024-12-29 11:37:05 +02:00
# generate the prediction
2024-06-26 20:11:20 +03:00
get_logger ( ) . debug ( f " PR diff " , artifact = self . patches_diff )
self . prediction = await self . _get_prediction ( model , patches_diff , prompt = " pr_description_prompt " )
2024-12-29 11:37:05 +02:00
# extend the prediction with additional files not shown
2024-12-29 21:43:46 +02:00
if get_settings ( ) . pr_description . enable_semantic_files_types :
self . prediction = await self . extend_uncovered_files ( self . prediction )
2024-06-26 20:11:20 +03:00
else :
2024-12-29 11:37:05 +02:00
get_logger ( ) . error ( f " Error getting PR diff { self . pr_id } " ,
artifact = { " traceback " : traceback . format_exc ( ) } )
2024-06-26 20:11:20 +03:00
self . prediction = None
2024-01-18 17:01:25 +02:00
else :
2024-06-26 20:11:20 +03:00
# get the diff in multiple patches, with the token handler only for the files prompt
get_logger ( ) . debug ( ' large_pr_handling for describe ' )
token_handler_only_files_prompt = TokenHandler (
self . git_provider . pr ,
self . vars ,
get_settings ( ) . pr_description_only_files_prompts . system ,
get_settings ( ) . pr_description_only_files_prompts . user ,
)
( patches_compressed_list , total_tokens_list , deleted_files_list , remaining_files_list , file_dict ,
files_in_patches_list ) = get_pr_diff_multiple_patchs (
self . git_provider , token_handler_only_files_prompt , model )
# get the files prediction for each patch
2024-06-30 17:33:48 +03:00
if not get_settings ( ) . pr_description . async_ai_calls :
results = [ ]
for i , patches in enumerate ( patches_compressed_list ) : # sync calls
patches_diff = " \n " . join ( patches )
get_logger ( ) . debug ( f " PR diff number { i + 1 } for describe files " )
prediction_files = await self . _get_prediction ( model , patches_diff ,
prompt = " pr_description_only_files_prompts " )
results . append ( prediction_files )
else : # async calls
tasks = [ ]
for i , patches in enumerate ( patches_compressed_list ) :
2024-08-09 21:15:29 +03:00
if patches :
patches_diff = " \n " . join ( patches )
get_logger ( ) . debug ( f " PR diff number { i + 1 } for describe files " )
task = asyncio . create_task (
self . _get_prediction ( model , patches_diff , prompt = " pr_description_only_files_prompts " ) )
tasks . append ( task )
2024-06-30 17:33:48 +03:00
# Wait for all tasks to complete
results = await asyncio . gather ( * tasks )
2024-06-26 20:11:20 +03:00
file_description_str_list = [ ]
2024-06-30 17:33:48 +03:00
for i , result in enumerate ( results ) :
prediction_files = result . strip ( ) . removeprefix ( ' ```yaml ' ) . strip ( ' ` ' ) . strip ( )
2024-10-10 08:48:37 +03:00
if load_yaml ( prediction_files , keys_fix_yaml = self . keys_fix ) and prediction_files . startswith ( ' pr_files ' ) :
2024-06-26 20:11:20 +03:00
prediction_files = prediction_files . removeprefix ( ' pr_files: ' ) . strip ( )
file_description_str_list . append ( prediction_files )
else :
get_logger ( ) . debug ( f " failed to generate predictions in iteration { i + 1 } for describe files " )
# generate files_walkthrough string, with proper token handling
token_handler_only_description_prompt = TokenHandler (
self . git_provider . pr ,
self . vars ,
get_settings ( ) . pr_description_only_description_prompts . system ,
get_settings ( ) . pr_description_only_description_prompts . user )
files_walkthrough = " \n " . join ( file_description_str_list )
2024-06-27 09:07:19 +03:00
files_walkthrough_prompt = copy . deepcopy ( files_walkthrough )
2024-08-09 21:15:29 +03:00
MAX_EXTRA_FILES_TO_PROMPT = 50
2024-06-26 20:11:20 +03:00
if remaining_files_list :
2024-06-27 09:07:19 +03:00
files_walkthrough_prompt + = " \n \n No more token budget. Additional unprocessed files: "
2024-08-09 21:15:29 +03:00
for i , file in enumerate ( remaining_files_list ) :
2024-06-27 09:07:19 +03:00
files_walkthrough_prompt + = f " \n - { file } "
2024-08-09 21:15:29 +03:00
if i > = MAX_EXTRA_FILES_TO_PROMPT :
get_logger ( ) . debug ( f " Too many remaining files, clipping to { MAX_EXTRA_FILES_TO_PROMPT } " )
files_walkthrough_prompt + = f " \n ... and { len ( remaining_files_list ) - MAX_EXTRA_FILES_TO_PROMPT } more "
break
2024-06-26 20:11:20 +03:00
if deleted_files_list :
2024-06-27 09:07:19 +03:00
files_walkthrough_prompt + = " \n \n Additional deleted files: "
2024-08-09 21:15:29 +03:00
for i , file in enumerate ( deleted_files_list ) :
2024-06-27 09:07:19 +03:00
files_walkthrough_prompt + = f " \n - { file } "
2024-08-09 21:15:29 +03:00
if i > = MAX_EXTRA_FILES_TO_PROMPT :
get_logger ( ) . debug ( f " Too many deleted files, clipping to { MAX_EXTRA_FILES_TO_PROMPT } " )
files_walkthrough_prompt + = f " \n ... and { len ( deleted_files_list ) - MAX_EXTRA_FILES_TO_PROMPT } more "
break
2024-06-27 09:07:19 +03:00
tokens_files_walkthrough = len (
token_handler_only_description_prompt . encoder . encode ( files_walkthrough_prompt ) )
2024-06-26 20:11:20 +03:00
total_tokens = token_handler_only_description_prompt . prompt_tokens + tokens_files_walkthrough
max_tokens_model = get_max_tokens ( model )
if total_tokens > max_tokens_model - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD :
# clip files_walkthrough to git the tokens within the limit
2024-06-27 09:07:19 +03:00
files_walkthrough_prompt = clip_tokens ( files_walkthrough_prompt ,
max_tokens_model - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD - token_handler_only_description_prompt . prompt_tokens ,
num_input_tokens = tokens_files_walkthrough )
2024-06-26 20:11:20 +03:00
# PR header inference
2024-06-27 09:07:19 +03:00
get_logger ( ) . debug ( f " PR diff only description " , artifact = files_walkthrough_prompt )
prediction_headers = await self . _get_prediction ( model , patches_diff = files_walkthrough_prompt ,
2024-06-26 20:11:20 +03:00
prompt = " pr_description_only_description_prompts " )
prediction_headers = prediction_headers . strip ( ) . removeprefix ( ' ```yaml ' ) . strip ( ' ` ' ) . strip ( )
2024-06-27 09:07:19 +03:00
2024-12-29 11:37:05 +02:00
# extend the tables with the files not shown
files_walkthrough_extended = await self . extend_uncovered_files ( files_walkthrough )
# final processing
self . prediction = prediction_headers + " \n " + " pr_files: \n " + files_walkthrough_extended
if not load_yaml ( self . prediction , keys_fix_yaml = self . keys_fix ) :
get_logger ( ) . error ( f " Error getting valid YAML in large PR handling for describe { self . pr_id } " )
if load_yaml ( prediction_headers , keys_fix_yaml = self . keys_fix ) :
get_logger ( ) . debug ( f " Using only headers for describe { self . pr_id } " )
self . prediction = prediction_headers
async def extend_uncovered_files ( self , original_prediction : str ) - > str :
try :
prediction = original_prediction
# get the original prediction filenames
original_prediction_loaded = load_yaml ( original_prediction , keys_fix_yaml = self . keys_fix )
if isinstance ( original_prediction_loaded , list ) :
original_prediction_dict = { " pr_files " : original_prediction_loaded }
else :
original_prediction_dict = original_prediction_loaded
2025-02-26 21:33:40 +02:00
if original_prediction_dict :
filenames_predicted = [ file . get ( ' filename ' , ' ' ) . strip ( ) for file in original_prediction_dict . get ( ' pr_files ' , [ ] ) ]
else :
filenames_predicted = [ ]
2024-12-29 11:37:05 +02:00
# extend the prediction with additional files not included in the original prediction
pr_files = self . git_provider . get_diff_files ( )
prediction_extra = " pr_files: "
2024-08-09 21:15:29 +03:00
MAX_EXTRA_FILES_TO_OUTPUT = 100
2024-12-29 11:37:05 +02:00
counter_extra_files = 0
for file in pr_files :
if file . filename in filenames_predicted :
continue
# add up to MAX_EXTRA_FILES_TO_OUTPUT files
counter_extra_files + = 1
if counter_extra_files > MAX_EXTRA_FILES_TO_OUTPUT :
2024-06-26 20:11:20 +03:00
extra_file_yaml = f """ \
- filename : |
2024-12-29 11:37:05 +02:00
Additional files not shown
2024-06-26 20:11:20 +03:00
changes_title : |
. . .
label : |
2024-12-29 11:37:05 +02:00
additional files
2024-06-26 20:11:20 +03:00
"""
2024-12-29 11:37:05 +02:00
prediction_extra = prediction_extra + " \n " + extra_file_yaml . strip ( )
get_logger ( ) . debug ( f " Too many remaining files, clipping to { MAX_EXTRA_FILES_TO_OUTPUT } " )
break
extra_file_yaml = f """ \
2024-08-09 21:15:29 +03:00
- filename : |
2024-12-29 11:37:05 +02:00
{ file . filename }
2024-08-09 21:15:29 +03:00
changes_title : |
. . .
label : |
2024-12-29 11:37:05 +02:00
additional files
2024-08-09 21:15:29 +03:00
"""
2024-12-29 11:37:05 +02:00
prediction_extra = prediction_extra + " \n " + extra_file_yaml . strip ( )
# merge the two dictionaries
if counter_extra_files > 0 :
get_logger ( ) . info ( f " Adding { counter_extra_files } unprocessed extra files to table prediction " )
prediction_extra_dict = load_yaml ( prediction_extra , keys_fix_yaml = self . keys_fix )
2025-02-26 21:33:40 +02:00
if original_prediction_dict and isinstance ( original_prediction_dict , dict ) and \
isinstance ( prediction_extra_dict , dict ) and " pr_files " in prediction_extra_dict :
if " pr_files " in original_prediction_dict :
original_prediction_dict [ " pr_files " ] . extend ( prediction_extra_dict [ " pr_files " ] )
else :
original_prediction_dict [ " pr_files " ] = prediction_extra_dict [ " pr_files " ]
2024-12-29 11:37:05 +02:00
new_yaml = yaml . dump ( original_prediction_dict )
if load_yaml ( new_yaml , keys_fix_yaml = self . keys_fix ) :
prediction = new_yaml
if isinstance ( original_prediction , list ) :
prediction = yaml . dump ( original_prediction_dict [ " pr_files " ] )
return prediction
except Exception as e :
2025-02-26 21:33:40 +02:00
get_logger ( ) . exception ( f " Error extending uncovered files { self . pr_id } " , artifact = { " error " : e } )
2024-12-29 11:37:05 +02:00
return original_prediction
2024-08-09 21:15:29 +03:00
2023-07-24 11:31:35 +03:00
2024-06-30 18:38:06 +03:00
async def extend_additional_files ( self , remaining_files_list ) - > str :
prediction = self . prediction
try :
2024-10-10 08:48:37 +03:00
original_prediction_dict = load_yaml ( self . prediction , keys_fix_yaml = self . keys_fix )
2024-06-30 18:38:06 +03:00
prediction_extra = " pr_files: "
for file in remaining_files_list :
extra_file_yaml = f """ \
- filename : |
{ file }
changes_summary : |
. . .
changes_title : |
. . .
label : |
additional files ( token - limit )
"""
prediction_extra = prediction_extra + " \n " + extra_file_yaml . strip ( )
2024-10-10 08:48:37 +03:00
prediction_extra_dict = load_yaml ( prediction_extra , keys_fix_yaml = self . keys_fix )
2024-06-30 18:38:06 +03:00
# merge the two dictionaries
if isinstance ( original_prediction_dict , dict ) and isinstance ( prediction_extra_dict , dict ) :
2024-08-09 21:15:29 +03:00
original_prediction_dict [ " pr_files " ] . extend ( prediction_extra_dict [ " pr_files " ] )
new_yaml = yaml . dump ( original_prediction_dict )
2024-10-10 08:48:37 +03:00
if load_yaml ( new_yaml , keys_fix_yaml = self . keys_fix ) :
2024-08-09 21:15:29 +03:00
prediction = new_yaml
2024-06-30 18:38:06 +03:00
return prediction
except Exception as e :
get_logger ( ) . error ( f " Error extending additional files { self . pr_id } : { e } " )
return self . prediction
2024-06-26 20:11:20 +03:00
async def _get_prediction ( self , model : str , patches_diff : str , prompt = " pr_description_prompt " ) - > str :
2023-07-13 17:24:56 +03:00
variables = copy . deepcopy ( self . vars )
2024-06-26 20:11:20 +03:00
variables [ " diff " ] = patches_diff # update diff
2023-07-24 11:31:35 +03:00
2023-07-13 17:24:56 +03:00
environment = Environment ( undefined = StrictUndefined )
2023-12-11 16:47:38 +02:00
set_custom_labels ( variables , self . git_provider )
2023-12-18 12:29:06 +02:00
self . variables = variables
2024-06-26 20:11:20 +03:00
2024-08-18 20:29:59 +07:00
system_prompt = environment . from_string ( get_settings ( ) . get ( prompt , { } ) . get ( " system " , " " ) ) . render ( self . variables )
user_prompt = environment . from_string ( get_settings ( ) . get ( prompt , { } ) . get ( " user " , " " ) ) . render ( self . variables )
2023-07-24 11:31:35 +03:00
response , finish_reason = await self . ai_handler . chat_completion (
model = model ,
2024-07-27 17:19:32 +03:00
temperature = get_settings ( ) . config . temperature ,
2023-07-24 11:31:35 +03:00
system = system_prompt ,
user = user_prompt
)
2023-07-13 17:24:56 +03:00
return response
2023-07-24 09:15:45 +03:00
2023-09-04 12:11:39 -04:00
def _prepare_data ( self ) :
2023-07-24 09:15:45 +03:00
# Load the AI prediction data into a dictionary
2024-10-10 08:48:37 +03:00
self . data = load_yaml ( self . prediction . strip ( ) , keys_fix_yaml = self . keys_fix )
2023-07-24 09:15:45 +03:00
2024-01-04 17:46:24 +02:00
if get_settings ( ) . pr_description . add_original_user_description and self . user_description :
2024-01-04 18:01:55 +02:00
self . data [ " User Description " ] = self . user_description
2024-01-04 17:46:24 +02:00
2023-12-21 08:51:57 +02:00
# re-order keys
2024-01-04 17:46:24 +02:00
if ' User Description ' in self . data :
self . data [ ' User Description ' ] = self . data . pop ( ' User Description ' )
2023-12-21 08:51:57 +02:00
if ' title ' in self . data :
self . data [ ' title ' ] = self . data . pop ( ' title ' )
if ' type ' in self . data :
self . data [ ' type ' ] = self . data . pop ( ' type ' )
if ' labels ' in self . data :
self . data [ ' labels ' ] = self . data . pop ( ' labels ' )
if ' description ' in self . data :
self . data [ ' description ' ] = self . data . pop ( ' description ' )
2025-05-25 18:37:28 +09:00
if ' changes_diagram ' in self . data :
2025-05-25 14:32:12 +03:00
changes_diagram = self . data . pop ( ' changes_diagram ' ) . strip ( )
if changes_diagram . startswith ( ' ``` ' ) :
if not changes_diagram . endswith ( ' ``` ' ) : # fallback for missing closing
changes_diagram + = ' \n ``` '
self . data [ ' changes_diagram ' ] = ' \n ' + changes_diagram
2023-12-21 08:51:57 +02:00
if ' pr_files ' in self . data :
self . data [ ' pr_files ' ] = self . data . pop ( ' pr_files ' )
2023-09-14 08:13:00 +03:00
def _prepare_labels ( self ) - > List [ str ] :
2024-12-29 11:37:05 +02:00
pr_labels = [ ]
2023-07-24 09:15:45 +03:00
# If the 'PR Type' key is present in the dictionary, split its value by comma and assign it to 'pr_types'
2024-12-29 11:37:05 +02:00
if ' labels ' in self . data and self . data [ ' labels ' ] :
2023-11-12 15:00:06 +02:00
if type ( self . data [ ' labels ' ] ) == list :
2024-12-29 11:37:05 +02:00
pr_labels = self . data [ ' labels ' ]
2023-11-12 15:00:06 +02:00
elif type ( self . data [ ' labels ' ] ) == str :
2024-12-29 11:37:05 +02:00
pr_labels = self . data [ ' labels ' ] . split ( ' , ' )
elif ' type ' in self . data and self . data [ ' type ' ] and get_settings ( ) . pr_description . publish_labels :
2023-11-12 15:00:06 +02:00
if type ( self . data [ ' type ' ] ) == list :
2024-12-29 11:37:05 +02:00
pr_labels = self . data [ ' type ' ]
2023-11-12 15:00:06 +02:00
elif type ( self . data [ ' type ' ] ) == str :
2024-12-29 11:37:05 +02:00
pr_labels = self . data [ ' type ' ] . split ( ' , ' )
pr_labels = [ label . strip ( ) for label in pr_labels ]
2023-12-18 12:29:06 +02:00
# convert lowercase labels to original case
try :
if " labels_minimal_to_labels_dict " in self . variables :
d : dict = self . variables [ " labels_minimal_to_labels_dict " ]
2024-12-29 11:37:05 +02:00
for i , label_i in enumerate ( pr_labels ) :
2023-12-18 12:29:06 +02:00
if label_i in d :
2024-12-29 11:37:05 +02:00
pr_labels [ i ] = d [ label_i ]
2023-12-18 12:29:06 +02:00
except Exception as e :
get_logger ( ) . error ( f " Error converting labels to original case { self . pr_id } : { e } " )
2024-12-29 11:37:05 +02:00
return pr_labels
2023-09-04 12:11:39 -04:00
2024-02-25 16:23:44 +02:00
def _prepare_pr_answer_with_markers ( self ) - > Tuple [ str , str , str , List [ dict ] ] :
2023-10-16 14:56:00 +03:00
get_logger ( ) . info ( f " Using description marker replacements { self . pr_id } " )
2025-01-06 12:42:12 +09:00
# Remove the 'PR Title' key from the dictionary
ai_title = self . data . pop ( ' title ' , self . vars [ " title " ] )
if ( not get_settings ( ) . pr_description . generate_ai_title ) :
# Assign the original PR title to the 'title' variable
title = self . vars [ " title " ]
else :
# Assign the value of the 'PR Title' key to 'title' variable
title = ai_title
2023-09-04 12:11:39 -04:00
body = self . user_description
if get_settings ( ) . pr_description . include_generated_by_header :
ai_header = f " ### 🤖 Generated by PR Agent at { self . git_provider . last_commit_id . sha } \n \n "
else :
ai_header = " "
2023-11-12 15:00:06 +02:00
ai_type = self . data . get ( ' type ' )
2023-10-19 12:02:12 +03:00
if ai_type and not re . search ( r ' <!-- \ s*pr_agent:type \ s*--> ' , body ) :
2025-01-06 12:49:30 +09:00
if isinstance ( ai_type , list ) :
2025-05-09 21:33:42 +09:00
pr_type = ' , ' . join ( str ( t ) for t in ai_type )
2025-01-06 12:49:30 +09:00
else :
2025-05-09 21:20:04 +09:00
pr_type = ai_type
pr_type = f " { ai_header } { pr_type } "
2023-10-19 12:02:12 +03:00
body = body . replace ( ' pr_agent:type ' , pr_type )
2023-11-12 15:00:06 +02:00
ai_summary = self . data . get ( ' description ' )
2023-09-04 12:11:39 -04:00
if ai_summary and not re . search ( r ' <!-- \ s*pr_agent:summary \ s*--> ' , body ) :
summary = f " { ai_header } { ai_summary } "
body = body . replace ( ' pr_agent:summary ' , summary )
2024-01-04 09:59:44 +02:00
ai_walkthrough = self . data . get ( ' pr_files ' )
2024-02-25 16:23:44 +02:00
walkthrough_gfm = " "
pr_file_changes = [ ]
2024-01-04 09:59:44 +02:00
if ai_walkthrough and not re . search ( r ' <!-- \ s*pr_agent:walkthrough \ s*--> ' , body ) :
try :
2024-02-25 16:23:44 +02:00
walkthrough_gfm , pr_file_changes = self . process_pr_files_prediction ( walkthrough_gfm ,
self . file_label_dict )
2024-01-04 09:59:44 +02:00
body = body . replace ( ' pr_agent:walkthrough ' , walkthrough_gfm )
except Exception as e :
get_logger ( ) . error ( f " Failing to process walkthrough { self . pr_id } : { e } " )
body = body . replace ( ' pr_agent:walkthrough ' , " " )
2023-09-04 12:11:39 -04:00
2024-02-25 16:23:44 +02:00
return title , body , walkthrough_gfm , pr_file_changes
2023-09-04 12:11:39 -04:00
2024-02-25 16:23:44 +02:00
def _prepare_pr_answer ( self ) - > Tuple [ str , str , str , List [ dict ] ] :
2023-09-04 12:11:39 -04:00
"""
Prepare the PR description based on the AI prediction data .
Returns :
- title : a string containing the PR title .
- pr_body : a string containing the PR description body in a markdown format .
"""
# Iterate over the dictionary items and append the key and value to 'markdown_text' in a markdown format
markdown_text = " "
2023-11-06 11:35:22 +02:00
# Don't display 'PR Labels'
2023-11-12 15:00:06 +02:00
if ' labels ' in self . data and self . git_provider . is_supported ( " get_labels " ) :
self . data . pop ( ' labels ' )
2023-11-06 11:58:26 +02:00
if not get_settings ( ) . pr_description . enable_pr_type :
2023-11-12 15:00:06 +02:00
self . data . pop ( ' type ' )
2023-09-04 12:11:39 -04:00
for key , value in self . data . items ( ) :
2024-01-08 10:30:47 +02:00
markdown_text + = f " ## ** { key } ** \n \n "
2023-09-04 12:11:39 -04:00
markdown_text + = f " { value } \n \n "
2023-07-24 09:15:45 +03:00
2023-08-22 10:32:58 +03:00
# Remove the 'PR Title' key from the dictionary
2023-11-12 15:00:06 +02:00
ai_title = self . data . pop ( ' title ' , self . vars [ " title " ] )
2024-04-17 16:29:12 +03:00
if ( not get_settings ( ) . pr_description . generate_ai_title ) :
2023-08-22 10:32:58 +03:00
# Assign the original PR title to the 'title' variable
title = self . vars [ " title " ]
else :
# Assign the value of the 'PR Title' key to 'title' variable
title = ai_title
2023-07-24 09:15:45 +03:00
# Iterate over the remaining dictionary items and append the key and value to 'pr_body' in a markdown format,
# except for the items containing the word 'walkthrough'
2024-02-25 16:23:44 +02:00
pr_body , changes_walkthrough = " " , " "
pr_file_changes = [ ]
2023-09-04 12:11:39 -04:00
for idx , ( key , value ) in enumerate ( self . data . items ( ) ) :
2023-12-06 12:30:51 +02:00
if key == ' pr_files ' :
value = self . file_label_dict
2023-12-06 15:29:45 +02:00
else :
key_publish = key . rstrip ( ' : ' ) . replace ( " _ " , " " ) . capitalize ( )
2024-06-04 11:19:18 +08:00
if key_publish == " Type " :
2024-05-05 13:33:54 +03:00
key_publish = " PR Type "
# elif key_publish == "Description":
# key_publish = "PR Description"
pr_body + = f " ### ** { key_publish } ** \n "
2023-07-13 17:31:28 +03:00
if ' walkthrough ' in key . lower ( ) :
2023-09-17 16:51:16 +03:00
if self . git_provider . is_supported ( " gfm_markdown " ) :
2023-09-17 16:56:23 +03:00
pr_body + = " <details> <summary>files:</summary> \n \n "
2023-08-09 08:50:15 +03:00
for file in value :
filename = file [ ' filename ' ] . replace ( " ' " , " ` " )
2023-11-12 15:00:06 +02:00
description = file [ ' changes_in_file ' ]
2023-11-06 08:43:15 +02:00
pr_body + = f ' - ` { filename } `: { description } \n '
2023-09-17 16:51:16 +03:00
if self . git_provider . is_supported ( " gfm_markdown " ) :
2023-12-06 17:01:21 +02:00
pr_body + = " </details> \n "
2024-08-07 16:32:36 +03:00
elif ' pr_files ' in key . lower ( ) and get_settings ( ) . pr_description . enable_semantic_files_types :
2024-02-25 16:23:44 +02:00
changes_walkthrough , pr_file_changes = self . process_pr_files_prediction ( changes_walkthrough , value )
2024-10-30 08:48:08 +02:00
changes_walkthrough = f " { PRDescriptionHeader . CHANGES_WALKTHROUGH . value } \n { changes_walkthrough } "
2024-12-31 12:00:21 +02:00
elif key . lower ( ) . strip ( ) == ' description ' :
if isinstance ( value , list ) :
value = ' , ' . join ( v . rstrip ( ) for v in value )
value = value . replace ( ' \n - ' , ' \n \n - ' ) . strip ( ) # makes the bullet points more readable by adding double space
pr_body + = f " { value } \n "
2023-07-13 17:24:56 +03:00
else :
2023-08-09 08:50:15 +03:00
# if the value is a list, join its items by comma
2023-12-04 21:06:56 +02:00
if isinstance ( value , list ) :
2024-07-07 08:08:53 +03:00
value = ' , ' . join ( v . rstrip ( ) for v in value )
2023-08-17 15:40:24 +03:00
pr_body + = f " { value } \n "
2023-09-04 12:11:39 -04:00
if idx < len ( self . data ) - 1 :
2023-12-11 15:55:04 +02:00
pr_body + = " \n \n ___ \n \n "
2023-07-24 09:15:45 +03:00
2024-02-25 16:23:44 +02:00
return title , pr_body , changes_walkthrough , pr_file_changes ,
2023-12-06 15:29:45 +02:00
def _prepare_file_labels ( self ) :
2024-02-24 16:47:23 +02:00
file_label_dict = { }
2024-08-18 08:21:32 +03:00
if ( not self . data or not isinstance ( self . data , dict ) or
' pr_files ' not in self . data or not self . data [ ' pr_files ' ] ) :
2024-08-14 08:02:34 +03:00
return file_label_dict
2023-12-06 15:29:45 +02:00
for file in self . data [ ' pr_files ' ] :
try :
2024-12-29 11:37:05 +02:00
required_fields = [ ' changes_title ' , ' filename ' , ' label ' ]
2024-08-13 12:16:52 +03:00
if not all ( field in file for field in required_fields ) :
# can happen for example if a YAML generation was interrupted in the middle (no more tokens)
get_logger ( ) . warning ( f " Missing required fields in file label dict { self . pr_id } , skipping file " ,
artifact = { " file " : file } )
continue
2024-12-29 11:37:05 +02:00
if not file . get ( ' changes_title ' ) :
get_logger ( ) . warning ( f " Empty changes title or summary in file label dict { self . pr_id } , skipping file " ,
artifact = { " file " : file } )
continue
2023-12-06 15:29:45 +02:00
filename = file [ ' filename ' ] . replace ( " ' " , " ` " ) . replace ( ' " ' , ' ` ' )
2024-12-29 11:37:05 +02:00
changes_summary = file . get ( ' changes_summary ' , " " ) . strip ( )
2024-01-21 13:43:37 +02:00
changes_title = file [ ' changes_title ' ] . strip ( )
2024-06-26 20:11:20 +03:00
label = file . get ( ' label ' ) . strip ( ) . lower ( )
2024-02-24 16:47:23 +02:00
if label not in file_label_dict :
file_label_dict [ label ] = [ ]
file_label_dict [ label ] . append ( ( filename , changes_title , changes_summary ) )
2023-12-06 15:29:45 +02:00
except Exception as e :
get_logger ( ) . error ( f " Error preparing file label dict { self . pr_id } : { e } " )
2023-12-06 16:32:53 +02:00
pass
2024-02-24 16:47:23 +02:00
return file_label_dict
2023-12-06 16:32:53 +02:00
2023-12-06 17:01:21 +02:00
def process_pr_files_prediction ( self , pr_body , value ) :
2024-02-25 16:23:44 +02:00
pr_comments = [ ]
2024-01-06 10:36:36 +02:00
# logic for using collapsible file list
2024-01-04 10:27:07 +02:00
use_collapsible_file_list = get_settings ( ) . pr_description . collapsible_file_list
2024-01-06 10:36:36 +02:00
num_files = 0
if value :
for semantic_label in value . keys ( ) :
num_files + = len ( value [ semantic_label ] )
2024-01-04 10:27:07 +02:00
if use_collapsible_file_list == " adaptive " :
2024-02-24 17:00:58 +07:00
use_collapsible_file_list = num_files > self . COLLAPSIBLE_FILE_LIST_THRESHOLD
2024-01-06 10:36:36 +02:00
2023-12-06 17:01:21 +02:00
if not self . git_provider . is_supported ( " gfm_markdown " ) :
2024-08-07 16:32:36 +03:00
return pr_body , pr_comments
2023-12-06 17:01:21 +02:00
try :
2023-12-07 09:50:36 +02:00
pr_body + = " <table> "
2023-12-07 10:24:36 +02:00
header = f " Relevant files "
2024-02-05 10:12:47 +02:00
delta = 75
2024-01-21 13:43:37 +02:00
# header += " " * delta
pr_body + = f """ <thead><tr><th></th><th align= " left " > { header } </th></tr></thead> """
2023-12-07 09:50:36 +02:00
pr_body + = """ <tbody> """
2023-12-06 17:01:21 +02:00
for semantic_label in value . keys ( ) :
s_label = semantic_label . strip ( " ' " ) . strip ( ' " ' )
2023-12-07 10:27:19 +02:00
pr_body + = f """ <tr><td><strong> { s_label . capitalize ( ) } </strong></td> """
2023-12-06 17:01:21 +02:00
list_tuples = value [ semantic_label ]
2024-01-04 10:27:07 +02:00
if use_collapsible_file_list :
pr_body + = f """ <td><details><summary> { len ( list_tuples ) } files</summary><table> """
else :
pr_body + = f """ <td><table> """
2024-01-21 13:43:37 +02:00
for filename , file_changes_title , file_change_description in list_tuples :
2024-02-09 11:45:12 +02:00
filename = filename . replace ( " ' " , " ` " ) . rstrip ( )
2023-12-06 17:01:21 +02:00
filename_publish = filename . split ( " / " ) [ - 1 ]
2024-12-29 11:37:05 +02:00
if file_changes_title and file_changes_title . strip ( ) != " ... " :
file_changes_title_code = f " <code> { file_changes_title } </code> "
file_changes_title_code_br = insert_br_after_x_chars ( file_changes_title_code , x = ( delta - 5 ) ) . strip ( )
if len ( file_changes_title_code_br ) < ( delta - 5 ) :
file_changes_title_code_br + = " " * ( ( delta - 5 ) - len ( file_changes_title_code_br ) )
filename_publish = f " <strong> { filename_publish } </strong><dd> { file_changes_title_code_br } </dd> "
else :
filename_publish = f " <strong> { filename_publish } </strong> "
2023-12-06 17:01:21 +02:00
diff_plus_minus = " "
2024-01-21 13:43:37 +02:00
delta_nbsp = " "
2024-06-30 17:33:48 +03:00
diff_files = self . git_provider . get_diff_files ( )
2023-12-06 17:01:21 +02:00
for f in diff_files :
2024-06-30 17:33:48 +03:00
if f . filename . lower ( ) . strip ( ' / ' ) == filename . lower ( ) . strip ( ' / ' ) :
2023-12-06 17:01:21 +02:00
num_plus_lines = f . num_plus_lines
num_minus_lines = f . num_minus_lines
2023-12-07 10:24:36 +02:00
diff_plus_minus + = f " + { num_plus_lines } /- { num_minus_lines } "
2024-12-29 11:37:05 +02:00
if len ( diff_plus_minus ) > 12 or diff_plus_minus == " +0/-0 " :
diff_plus_minus = " [link] "
2024-01-21 13:43:37 +02:00
delta_nbsp = " " * max ( 0 , ( 8 - len ( diff_plus_minus ) ) )
2023-12-06 17:01:21 +02:00
break
# try to add line numbers link to code suggestions
2023-12-07 09:50:36 +02:00
link = " "
2023-12-06 17:01:21 +02:00
if hasattr ( self . git_provider , ' get_line_link ' ) :
filename = filename . strip ( )
link = self . git_provider . get_line_link ( filename , relevant_line_start = - 1 )
2024-12-29 11:37:05 +02:00
if ( not link or not diff_plus_minus ) and ( ' additional files ' not in filename . lower ( ) ) :
2025-02-26 16:40:46 +02:00
# get_logger().warning(f"Error getting line link for '{filename}'")
link = " "
# continue
2023-12-07 09:50:36 +02:00
2024-12-29 11:37:05 +02:00
# Add file data to the PR body
2024-01-21 13:43:37 +02:00
file_change_description_br = insert_br_after_x_chars ( file_change_description , x = ( delta - 5 ) )
2024-12-29 11:37:05 +02:00
pr_body = self . add_file_data ( delta_nbsp , diff_plus_minus , file_change_description_br , filename ,
filename_publish , link , pr_body )
# Close the collapsible file list
if use_collapsible_file_list :
pr_body + = """ </table></details></td></tr> """
else :
pr_body + = """ </table></td></tr> """
pr_body + = """ </tr></tbody></table> """
except Exception as e :
2025-05-20 02:30:17 +09:00
get_logger ( ) . error ( f " Error processing PR files to markdown { self . pr_id } : { str ( e ) } " )
2024-12-29 11:37:05 +02:00
pass
return pr_body , pr_comments
def add_file_data ( self , delta_nbsp , diff_plus_minus , file_change_description_br , filename , filename_publish , link ,
pr_body ) - > str :
if not file_change_description_br :
pr_body + = f """
< tr >
< td > { filename_publish } < / td >
< td > < a href = " {link} " > { diff_plus_minus } < / a > { delta_nbsp } < / td >
< / tr >
"""
else :
pr_body + = f """
2023-12-07 09:50:36 +02:00
< tr >
< td >
< details >
2024-01-21 13:43:37 +02:00
< summary > { filename_publish } < / summary >
< hr >
2023-12-20 16:45:21 +02:00
2024-01-21 13:43:37 +02:00
{ filename }
2024-06-26 20:11:20 +03:00
2024-01-21 13:43:37 +02:00
{ file_change_description_br }
2024-02-05 12:39:03 +02:00
< / details >
2024-06-26 20:11:20 +03:00
2024-02-05 13:00:57 +02:00
2023-12-07 09:50:36 +02:00
< / td >
2024-01-21 13:43:37 +02:00
< td > < a href = " {link} " > { diff_plus_minus } < / a > { delta_nbsp } < / td >
2024-06-26 20:11:20 +03:00
2024-10-30 09:56:03 +09:00
< / tr >
2023-12-07 09:50:36 +02:00
"""
2024-12-29 11:37:05 +02:00
return pr_body
2024-02-11 11:37:11 +02:00
def count_chars_without_html ( string ) :
if ' < ' not in string :
return len ( string )
no_html_string = re . sub ( ' <[^>]+> ' , ' ' , string )
return len ( no_html_string )
2024-12-29 11:37:05 +02:00
def insert_br_after_x_chars ( text : str , x = 70 ) :
2024-01-15 15:10:54 +02:00
"""
Insert < br > into a string after a word that increases its length above x characters .
2024-02-05 09:20:36 +02:00
Use proper HTML tags for code and new lines .
2024-01-15 15:10:54 +02:00
"""
2024-12-29 11:37:05 +02:00
if not text :
return " "
2024-02-11 11:37:11 +02:00
if count_chars_without_html ( text ) < x :
2024-01-15 15:10:54 +02:00
return text
2023-12-06 16:32:53 +02:00
2024-02-05 09:20:36 +02:00
# replace odd instances of ` with <code> and even instances of ` with </code>
text = replace_code_tags ( text )
2024-01-21 13:43:37 +02:00
2024-02-05 09:20:36 +02:00
# convert list items to <li>
2024-09-07 17:25:05 +03:00
if text . startswith ( " - " ) or text . startswith ( " * " ) :
2024-02-05 09:20:36 +02:00
text = " <li> " + text [ 2 : ]
text = text . replace ( " \n - " , ' <br><li> ' ) . replace ( " \n - " , ' <br><li> ' )
2024-09-07 17:25:05 +03:00
text = text . replace ( " \n * " , ' <br><li> ' ) . replace ( " \n * " , ' <br><li> ' )
2024-01-21 13:43:37 +02:00
2024-02-05 09:20:36 +02:00
# convert new lines to <br>
text = text . replace ( " \n " , ' <br> ' )
2024-01-21 13:43:37 +02:00
2024-02-05 09:20:36 +02:00
# split text into lines
lines = text . split ( ' <br> ' )
words = [ ]
for i , line in enumerate ( lines ) :
words + = line . split ( ' ' )
if i < len ( lines ) - 1 :
words [ - 1 ] + = " <br> "
new_text = [ ]
2024-01-21 13:43:37 +02:00
is_inside_code = False
2024-02-05 09:20:36 +02:00
current_length = 0
2024-01-15 15:10:54 +02:00
for word in words :
2024-02-05 09:20:36 +02:00
is_saved_word = False
if word == " <code> " or word == " </code> " or word == " <li> " or word == " <br> " :
is_saved_word = True
2023-12-06 16:32:53 +02:00
2024-02-05 10:12:47 +02:00
len_word = count_chars_without_html ( word )
if not is_saved_word and ( current_length + len_word > x ) :
2024-02-05 09:20:36 +02:00
if is_inside_code :
new_text . append ( " </code><br><code> " )
else :
new_text . append ( " <br> " )
current_length = 0 # Reset counter
new_text . append ( word + " " )
2023-07-24 09:15:45 +03:00
2024-02-05 09:20:36 +02:00
if not is_saved_word :
2024-02-05 10:12:47 +02:00
current_length + = len_word + 1 # Add 1 for the space
2024-01-21 13:43:37 +02:00
2024-02-05 09:20:36 +02:00
if word == " <li> " or word == " <br> " :
2024-01-21 13:43:37 +02:00
current_length = 0
2024-02-05 09:20:36 +02:00
2024-02-05 13:00:57 +02:00
if " <code> " in word :
is_inside_code = True
if " </code> " in word :
is_inside_code = False
2024-02-05 09:20:36 +02:00
return ' ' . join ( new_text ) . strip ( )
2024-06-04 11:19:18 +08:00
2024-02-05 09:20:36 +02:00
def replace_code_tags ( text ) :
"""
Replace odd instances of ` with < code > and even instances of ` with < / code >
"""
parts = text . split ( ' ` ' )
for i in range ( 1 , len ( parts ) , 2 ) :
parts [ i ] = ' <code> ' + parts [ i ] + ' </code> '
2025-05-25 18:37:28 +09:00
return ' ' . join ( parts )