mirror of
https://github.com/qodo-ai/pr-agent.git
synced 2025-07-02 03:40:38 +08:00
enable ai_metadata
This commit is contained in:
@ -243,7 +243,7 @@ __old hunk__
|
||||
if hasattr(file, 'edit_type') and file.edit_type == EDIT_TYPE.DELETED:
|
||||
return f"\n\n## file '{file.filename.strip()}' was deleted\n"
|
||||
|
||||
patch_with_lines_str = f"\n\n## file: '{file.filename.strip()}'\n"
|
||||
patch_with_lines_str = f"\n\n## File: '{file.filename.strip()}'\n"
|
||||
patch_lines = patch.splitlines()
|
||||
RE_HUNK_HEADER = re.compile(
|
||||
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
|
||||
@ -319,7 +319,7 @@ __old hunk__
|
||||
|
||||
def extract_hunk_lines_from_patch(patch: str, file_name, line_start, line_end, side) -> tuple[str, str]:
|
||||
|
||||
patch_with_lines_str = f"\n\n## file: '{file_name.strip()}'\n\n"
|
||||
patch_with_lines_str = f"\n\n## File: '{file_name.strip()}'\n\n"
|
||||
selected_lines = ""
|
||||
patch_lines = patch.splitlines()
|
||||
RE_HUNK_HEADER = re.compile(
|
||||
|
@ -200,6 +200,10 @@ def pr_generate_extended_diff(pr_languages: list,
|
||||
if add_line_numbers_to_hunks:
|
||||
full_extended_patch = convert_to_hunks_with_lines_numbers(extended_patch, file)
|
||||
|
||||
# add AI-summary metadata to the patch
|
||||
if file.ai_file_summary and get_settings().get("config.enable_ai_metadata", False):
|
||||
full_extended_patch = add_ai_summary_top_patch(file, full_extended_patch)
|
||||
|
||||
patch_tokens = token_handler.count_tokens(full_extended_patch)
|
||||
file.tokens = patch_tokens
|
||||
total_tokens += patch_tokens
|
||||
@ -239,6 +243,10 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo
|
||||
if convert_hunks_to_line_numbers:
|
||||
patch = convert_to_hunks_with_lines_numbers(patch, file)
|
||||
|
||||
## add AI-summary metadata to the patch (disabled, since we are in the compressed diff)
|
||||
# if file.ai_file_summary and get_settings().config.get('config.is_auto_command', False):
|
||||
# patch = add_ai_summary_top_patch(file, patch)
|
||||
|
||||
new_patch_tokens = token_handler.count_tokens(patch)
|
||||
file_dict[file.filename] = {'patch': patch, 'tokens': new_patch_tokens, 'edit_type': file.edit_type}
|
||||
|
||||
@ -304,7 +312,7 @@ def generate_full_patch(convert_hunks_to_line_numbers, file_dict, max_tokens_mod
|
||||
|
||||
if patch:
|
||||
if not convert_hunks_to_line_numbers:
|
||||
patch_final = f"\n\n## file: '{filename.strip()}\n\n{patch.strip()}\n'"
|
||||
patch_final = f"\n\n## File: '{filename.strip()}\n\n{patch.strip()}\n'"
|
||||
else:
|
||||
patch_final = "\n\n" + patch.strip()
|
||||
patches.append(patch_final)
|
||||
@ -432,6 +440,9 @@ def get_pr_multi_diffs(git_provider: GitProvider,
|
||||
continue
|
||||
|
||||
patch = convert_to_hunks_with_lines_numbers(patch, file)
|
||||
# add AI-summary metadata to the patch
|
||||
if file.ai_file_summary and get_settings().get("config.enable_ai_metadata", False):
|
||||
patch = add_ai_summary_top_patch(file, patch)
|
||||
new_patch_tokens = token_handler.count_tokens(patch)
|
||||
|
||||
if patch and (token_handler.prompt_tokens + new_patch_tokens) > get_max_tokens(
|
||||
@ -479,3 +490,33 @@ def get_pr_multi_diffs(git_provider: GitProvider,
|
||||
final_diff_list.append(final_diff)
|
||||
|
||||
return final_diff_list
|
||||
|
||||
|
||||
def add_ai_metadata_to_diff_files(git_provider, pr_description_files):
|
||||
"""
|
||||
Adds AI metadata to the diff files based on the PR description files (FilePatchInfo.ai_file_summary).
|
||||
"""
|
||||
diff_files = git_provider.get_diff_files()
|
||||
for file in diff_files:
|
||||
filename = file.filename.strip()
|
||||
found = False
|
||||
for pr_file in pr_description_files:
|
||||
if filename == pr_file['full_file_name'].strip():
|
||||
file.ai_file_summary = pr_file
|
||||
found = True
|
||||
break
|
||||
if not found:
|
||||
get_logger().info(f"File {filename} not found in the PR description files",
|
||||
artifacts=pr_description_files)
|
||||
|
||||
|
||||
def add_ai_summary_top_patch(file, full_extended_patch):
|
||||
# below every instance of '## File: ...' in the patch, add the ai-summary metadata
|
||||
full_extended_patch_lines = full_extended_patch.split("\n")
|
||||
for i, line in enumerate(full_extended_patch_lines):
|
||||
if line.startswith("## File:") or line.startswith("## file:"):
|
||||
full_extended_patch_lines.insert(i + 1,
|
||||
f"### AI-generated file summary:\n{file.ai_file_summary['long_summary']}")
|
||||
break
|
||||
full_extended_patch = "\n".join(full_extended_patch_lines)
|
||||
return full_extended_patch
|
@ -21,3 +21,4 @@ class FilePatchInfo:
|
||||
old_filename: str = None
|
||||
num_plus_lines: int = -1
|
||||
num_minus_lines: int = -1
|
||||
ai_file_summary: str = None
|
||||
|
@ -1,4 +1,5 @@
|
||||
from __future__ import annotations
|
||||
import html2text
|
||||
|
||||
import html
|
||||
import copy
|
||||
@ -214,19 +215,6 @@ def convert_to_markdown_v2(output_data: dict,
|
||||
reference_link = git_provider.get_line_link(relevant_file, start_line, end_line)
|
||||
|
||||
if gfm_supported:
|
||||
if get_settings().pr_reviewer.extra_issue_links:
|
||||
issue_content_linked =copy.deepcopy(issue_content)
|
||||
referenced_variables_list = issue.get('referenced_variables', [])
|
||||
for component in referenced_variables_list:
|
||||
name = component['variable_name'].strip().strip('`')
|
||||
|
||||
ind = issue_content.find(name)
|
||||
if ind != -1:
|
||||
reference_link_component = git_provider.get_line_link(relevant_file, component['relevant_line'], component['relevant_line'])
|
||||
issue_content_linked = issue_content_linked[:ind-1] + f"[`{name}`]({reference_link_component})" + issue_content_linked[ind+len(name)+1:]
|
||||
else:
|
||||
get_logger().info(f"Failed to find variable in issue content: {component['variable_name'].strip()}")
|
||||
issue_content = issue_content_linked
|
||||
issue_str = f"<a href='{reference_link}'><strong>{issue_header}</strong></a><br>{issue_content}"
|
||||
else:
|
||||
issue_str = f"[**{issue_header}**]({reference_link})\n\n{issue_content}\n\n"
|
||||
@ -945,3 +933,66 @@ def is_value_no(value):
|
||||
if value_str == 'no' or value_str == 'none' or value_str == 'false':
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def process_description(description_full: str):
|
||||
split_str = "### **Changes walkthrough** 📝"
|
||||
description_split = description_full.split(split_str)
|
||||
base_description_str = description_split[0]
|
||||
changes_walkthrough_str = ""
|
||||
files = []
|
||||
if len(description_split) > 1:
|
||||
changes_walkthrough_str = description_split[1]
|
||||
else:
|
||||
get_logger().debug("No changes walkthrough found")
|
||||
|
||||
try:
|
||||
if changes_walkthrough_str:
|
||||
# get the end of the table
|
||||
if '</table>\n\n___' in changes_walkthrough_str:
|
||||
end = changes_walkthrough_str.index("</table>\n\n___")
|
||||
elif '\n___' in changes_walkthrough_str:
|
||||
end = changes_walkthrough_str.index("\n___")
|
||||
else:
|
||||
end = len(changes_walkthrough_str)
|
||||
changes_walkthrough_str = changes_walkthrough_str[:end]
|
||||
|
||||
h = html2text.HTML2Text()
|
||||
h.body_width = 0 # Disable line wrapping
|
||||
|
||||
# find all the files
|
||||
pattern = r'<tr>\s*<td>\s*(<details>\s*<summary>(.*?)</summary>(.*?)</details>)\s*</td>'
|
||||
files_found = re.findall(pattern, changes_walkthrough_str, re.DOTALL)
|
||||
for file_data in files_found:
|
||||
try:
|
||||
if isinstance(file_data, tuple):
|
||||
file_data = file_data[0]
|
||||
# pattern = r'<details>\s*<summary><strong>(.*?)</strong><dd><code>(.*?)</code>.*?</summary>\s*<hr>\s*(.*?)\s*((?:\*.*\s*)*)</details>'
|
||||
pattern = r'<details>\s*<summary><strong>(.*?)</strong><dd><code>(.*?)</code>.*?</summary>\s*<hr>\s*(.*?)\n\n\s*(.*?)</details>'
|
||||
res = re.search(pattern, file_data, re.DOTALL)
|
||||
if res and res.lastindex == 4:
|
||||
short_filename = res.group(1).strip()
|
||||
short_summary = res.group(2).strip()
|
||||
long_filename = res.group(3).strip()
|
||||
long_summary = res.group(4).strip()
|
||||
long_summary = long_summary.replace('<br> *', '\n*').replace('<br>','').replace('\n','<br>')
|
||||
long_summary = h.handle(long_summary).strip()
|
||||
if not long_summary.startswith('*'):
|
||||
long_summary = f"* {long_summary}"
|
||||
|
||||
files.append({
|
||||
'short_file_name': short_filename,
|
||||
'full_file_name': long_filename,
|
||||
'short_summary': short_summary,
|
||||
'long_summary': long_summary
|
||||
})
|
||||
else:
|
||||
get_logger().error(f"Failed to parse description", artifact={'description': file_data})
|
||||
except Exception as e:
|
||||
get_logger().exception(f"Failed to process description: {e}", artifact={'description': file_data})
|
||||
|
||||
|
||||
except Exception as e:
|
||||
get_logger().exception(f"Failed to process description: {e}")
|
||||
|
||||
return base_description_str, files
|
||||
|
Reference in New Issue
Block a user