enable ai_metadata

This commit is contained in:
mrT23
2024-09-07 17:25:05 +03:00
parent 24f7e8622f
commit 8706f643ef
32 changed files with 338 additions and 117 deletions

View File

@ -243,7 +243,7 @@ __old hunk__
if hasattr(file, 'edit_type') and file.edit_type == EDIT_TYPE.DELETED:
return f"\n\n## file '{file.filename.strip()}' was deleted\n"
patch_with_lines_str = f"\n\n## file: '{file.filename.strip()}'\n"
patch_with_lines_str = f"\n\n## File: '{file.filename.strip()}'\n"
patch_lines = patch.splitlines()
RE_HUNK_HEADER = re.compile(
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
@ -319,7 +319,7 @@ __old hunk__
def extract_hunk_lines_from_patch(patch: str, file_name, line_start, line_end, side) -> tuple[str, str]:
patch_with_lines_str = f"\n\n## file: '{file_name.strip()}'\n\n"
patch_with_lines_str = f"\n\n## File: '{file_name.strip()}'\n\n"
selected_lines = ""
patch_lines = patch.splitlines()
RE_HUNK_HEADER = re.compile(

View File

@ -200,6 +200,10 @@ def pr_generate_extended_diff(pr_languages: list,
if add_line_numbers_to_hunks:
full_extended_patch = convert_to_hunks_with_lines_numbers(extended_patch, file)
# add AI-summary metadata to the patch
if file.ai_file_summary and get_settings().get("config.enable_ai_metadata", False):
full_extended_patch = add_ai_summary_top_patch(file, full_extended_patch)
patch_tokens = token_handler.count_tokens(full_extended_patch)
file.tokens = patch_tokens
total_tokens += patch_tokens
@ -239,6 +243,10 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo
if convert_hunks_to_line_numbers:
patch = convert_to_hunks_with_lines_numbers(patch, file)
## add AI-summary metadata to the patch (disabled, since we are in the compressed diff)
# if file.ai_file_summary and get_settings().config.get('config.is_auto_command', False):
# patch = add_ai_summary_top_patch(file, patch)
new_patch_tokens = token_handler.count_tokens(patch)
file_dict[file.filename] = {'patch': patch, 'tokens': new_patch_tokens, 'edit_type': file.edit_type}
@ -304,7 +312,7 @@ def generate_full_patch(convert_hunks_to_line_numbers, file_dict, max_tokens_mod
if patch:
if not convert_hunks_to_line_numbers:
patch_final = f"\n\n## file: '{filename.strip()}\n\n{patch.strip()}\n'"
patch_final = f"\n\n## File: '{filename.strip()}\n\n{patch.strip()}\n'"
else:
patch_final = "\n\n" + patch.strip()
patches.append(patch_final)
@ -432,6 +440,9 @@ def get_pr_multi_diffs(git_provider: GitProvider,
continue
patch = convert_to_hunks_with_lines_numbers(patch, file)
# add AI-summary metadata to the patch
if file.ai_file_summary and get_settings().get("config.enable_ai_metadata", False):
patch = add_ai_summary_top_patch(file, patch)
new_patch_tokens = token_handler.count_tokens(patch)
if patch and (token_handler.prompt_tokens + new_patch_tokens) > get_max_tokens(
@ -479,3 +490,33 @@ def get_pr_multi_diffs(git_provider: GitProvider,
final_diff_list.append(final_diff)
return final_diff_list
def add_ai_metadata_to_diff_files(git_provider, pr_description_files):
"""
Adds AI metadata to the diff files based on the PR description files (FilePatchInfo.ai_file_summary).
"""
diff_files = git_provider.get_diff_files()
for file in diff_files:
filename = file.filename.strip()
found = False
for pr_file in pr_description_files:
if filename == pr_file['full_file_name'].strip():
file.ai_file_summary = pr_file
found = True
break
if not found:
get_logger().info(f"File {filename} not found in the PR description files",
artifacts=pr_description_files)
def add_ai_summary_top_patch(file, full_extended_patch):
# below every instance of '## File: ...' in the patch, add the ai-summary metadata
full_extended_patch_lines = full_extended_patch.split("\n")
for i, line in enumerate(full_extended_patch_lines):
if line.startswith("## File:") or line.startswith("## file:"):
full_extended_patch_lines.insert(i + 1,
f"### AI-generated file summary:\n{file.ai_file_summary['long_summary']}")
break
full_extended_patch = "\n".join(full_extended_patch_lines)
return full_extended_patch

View File

@ -21,3 +21,4 @@ class FilePatchInfo:
old_filename: str = None
num_plus_lines: int = -1
num_minus_lines: int = -1
ai_file_summary: str = None

View File

@ -1,4 +1,5 @@
from __future__ import annotations
import html2text
import html
import copy
@ -214,19 +215,6 @@ def convert_to_markdown_v2(output_data: dict,
reference_link = git_provider.get_line_link(relevant_file, start_line, end_line)
if gfm_supported:
if get_settings().pr_reviewer.extra_issue_links:
issue_content_linked =copy.deepcopy(issue_content)
referenced_variables_list = issue.get('referenced_variables', [])
for component in referenced_variables_list:
name = component['variable_name'].strip().strip('`')
ind = issue_content.find(name)
if ind != -1:
reference_link_component = git_provider.get_line_link(relevant_file, component['relevant_line'], component['relevant_line'])
issue_content_linked = issue_content_linked[:ind-1] + f"[`{name}`]({reference_link_component})" + issue_content_linked[ind+len(name)+1:]
else:
get_logger().info(f"Failed to find variable in issue content: {component['variable_name'].strip()}")
issue_content = issue_content_linked
issue_str = f"<a href='{reference_link}'><strong>{issue_header}</strong></a><br>{issue_content}"
else:
issue_str = f"[**{issue_header}**]({reference_link})\n\n{issue_content}\n\n"
@ -945,3 +933,66 @@ def is_value_no(value):
if value_str == 'no' or value_str == 'none' or value_str == 'false':
return True
return False
def process_description(description_full: str):
split_str = "### **Changes walkthrough** 📝"
description_split = description_full.split(split_str)
base_description_str = description_split[0]
changes_walkthrough_str = ""
files = []
if len(description_split) > 1:
changes_walkthrough_str = description_split[1]
else:
get_logger().debug("No changes walkthrough found")
try:
if changes_walkthrough_str:
# get the end of the table
if '</table>\n\n___' in changes_walkthrough_str:
end = changes_walkthrough_str.index("</table>\n\n___")
elif '\n___' in changes_walkthrough_str:
end = changes_walkthrough_str.index("\n___")
else:
end = len(changes_walkthrough_str)
changes_walkthrough_str = changes_walkthrough_str[:end]
h = html2text.HTML2Text()
h.body_width = 0 # Disable line wrapping
# find all the files
pattern = r'<tr>\s*<td>\s*(<details>\s*<summary>(.*?)</summary>(.*?)</details>)\s*</td>'
files_found = re.findall(pattern, changes_walkthrough_str, re.DOTALL)
for file_data in files_found:
try:
if isinstance(file_data, tuple):
file_data = file_data[0]
# pattern = r'<details>\s*<summary><strong>(.*?)</strong><dd><code>(.*?)</code>.*?</summary>\s*<hr>\s*(.*?)\s*((?:\*.*\s*)*)</details>'
pattern = r'<details>\s*<summary><strong>(.*?)</strong><dd><code>(.*?)</code>.*?</summary>\s*<hr>\s*(.*?)\n\n\s*(.*?)</details>'
res = re.search(pattern, file_data, re.DOTALL)
if res and res.lastindex == 4:
short_filename = res.group(1).strip()
short_summary = res.group(2).strip()
long_filename = res.group(3).strip()
long_summary = res.group(4).strip()
long_summary = long_summary.replace('<br> *', '\n*').replace('<br>','').replace('\n','<br>')
long_summary = h.handle(long_summary).strip()
if not long_summary.startswith('*'):
long_summary = f"* {long_summary}"
files.append({
'short_file_name': short_filename,
'full_file_name': long_filename,
'short_summary': short_summary,
'long_summary': long_summary
})
else:
get_logger().error(f"Failed to parse description", artifact={'description': file_data})
except Exception as e:
get_logger().exception(f"Failed to process description: {e}", artifact={'description': file_data})
except Exception as e:
get_logger().exception(f"Failed to process description: {e}")
return base_description_str, files