Files
pr-agent/pr_agent/algo/utils.py

129 lines
4.9 KiB
Python
Raw Normal View History

2023-07-06 00:21:08 +03:00
from __future__ import annotations
2023-07-18 23:14:47 +03:00
import difflib
from datetime import datetime
import json
import logging
import re
2023-07-06 00:21:08 +03:00
import textwrap
2023-07-18 23:14:47 +03:00
from pr_agent.config_loader import settings
2023-07-06 00:21:08 +03:00
def convert_to_markdown(output_data: dict) -> str:
markdown_text = ""
emojis = {
"Main theme": "🎯",
"Type of PR": "📌",
"Score": "🏅",
2023-07-06 00:21:08 +03:00
"Relevant tests added": "🧪",
"Unrelated changes": "⚠️",
2023-07-11 08:50:28 +03:00
"Focused PR": "",
2023-07-06 00:21:08 +03:00
"Security concerns": "🔒",
"General PR suggestions": "💡",
2023-07-18 16:32:51 +03:00
"Insights from user's answers": "📝",
2023-07-19 01:03:47 +03:00
"Code suggestions": "🤖",
2023-07-06 00:21:08 +03:00
}
for key, value in output_data.items():
if not value:
continue
if isinstance(value, dict):
markdown_text += f"## {key}\n\n"
markdown_text += convert_to_markdown(value)
elif isinstance(value, list):
if key.lower() == 'code suggestions':
markdown_text += "\n" # just looks nicer with additional line breaks
2023-07-19 15:12:50 +03:00
emoji = emojis.get(key, "")
2023-07-06 00:21:08 +03:00
markdown_text += f"- {emoji} **{key}:**\n\n"
for item in value:
if isinstance(item, dict) and key.lower() == 'code suggestions':
markdown_text += parse_code_suggestion(item)
elif item:
markdown_text += f" - {item}\n"
elif value != 'n/a':
2023-07-19 15:12:50 +03:00
emoji = emojis.get(key, "")
2023-07-06 00:21:08 +03:00
markdown_text += f"- {emoji} **{key}:** {value}\n"
return markdown_text
def parse_code_suggestion(code_suggestions: dict) -> str:
markdown_text = ""
for sub_key, sub_value in code_suggestions.items():
if isinstance(sub_value, dict): # "code example"
markdown_text += f" - **{sub_key}:**\n"
for code_key, code_value in sub_value.items(): # 'before' and 'after' code
code_str = f"```\n{code_value}\n```"
code_str_indented = textwrap.indent(code_str, ' ')
markdown_text += f" - **{code_key}:**\n{code_str_indented}\n"
else:
2023-07-13 08:10:36 +03:00
if "relevant file" in sub_key.lower():
2023-07-06 12:49:10 +03:00
markdown_text += f"\n - **{sub_key}:** {sub_value}\n"
2023-07-06 00:21:08 +03:00
else:
2023-07-06 12:49:10 +03:00
markdown_text += f" **{sub_key}:** {sub_value}\n"
2023-07-06 00:21:08 +03:00
markdown_text += "\n"
return markdown_text
2023-07-17 01:44:40 +03:00
def try_fix_json(review, max_iter=10, code_suggestions=False):
if review.endswith("}"):
return fix_json_escape_char(review)
# Try to fix JSON if it is broken/incomplete: parse until the last valid code suggestion
data = {}
2023-07-17 01:44:40 +03:00
if code_suggestions:
closing_bracket = "]}"
else:
closing_bracket = "]}}"
if review.rfind("'Code suggestions': [") > 0 or review.rfind('"Code suggestions": [') > 0:
last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1
valid_json = False
2023-07-11 22:22:08 +03:00
iter_count = 0
while last_code_suggestion_ind > 0 and not valid_json and iter_count < max_iter:
try:
2023-07-17 01:44:40 +03:00
data = json.loads(review[:last_code_suggestion_ind] + closing_bracket)
valid_json = True
2023-07-17 01:44:40 +03:00
review = review[:last_code_suggestion_ind].strip() + closing_bracket
except json.decoder.JSONDecodeError:
review = review[:last_code_suggestion_ind]
# Use regular expression to find the last occurrence of "}," with any number of whitespaces or newlines
last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1
2023-07-11 22:22:08 +03:00
iter_count += 1
if not valid_json:
logging.error("Unable to decode JSON response from AI")
data = {}
return data
2023-07-17 01:44:40 +03:00
2023-07-18 11:34:57 +03:00
2023-07-17 01:44:40 +03:00
def fix_json_escape_char(json_message=None):
try:
result = json.loads(json_message)
except Exception as e:
# Find the offending character index:
idx_to_replace = int(str(e).split(' ')[-1].replace(')', ''))
# Remove the offending character:
json_message = list(json_message)
json_message[idx_to_replace] = ' '
new_message = ''.join(json_message)
2023-07-18 11:34:57 +03:00
return fix_json_escape_char(json_message=new_message)
return result
2023-07-18 23:14:47 +03:00
def convert_str_to_datetime(date_str):
datetime_format = '%a, %d %b %Y %H:%M:%S %Z'
return datetime.strptime(date_str, datetime_format)
def load_large_diff(file, new_file_content_str: str, original_file_content_str: str, patch: str) -> str:
if not patch: # to Do - also add condition for file extension
try:
diff = difflib.unified_diff(original_file_content_str.splitlines(keepends=True),
new_file_content_str.splitlines(keepends=True))
if settings.config.verbosity_level >= 2:
logging.warning(f"File was modified, but no patch was found. Manually creating patch: {file.filename}.")
patch = ''.join(diff)
except Exception:
pass
return patch