pr-agent/pr_agent/algo/utils.py

from __future__ import annotations

import difflib
from datetime import datetime
import json
import logging
import re
import textwrap

from pr_agent.config_loader import settings


def convert_to_markdown(output_data: dict) -> str:
    markdown_text = ""

    emojis = {
        "Main theme": "🎯",
        "Type of PR": "📌",
        "Score": "🏅",
        "Relevant tests added": "🧪",
        "Unrelated changes": "⚠️",
        "Focused PR": "✨",
        "Security concerns": "🔒",
        "General PR suggestions": "💡",
        "Insights from user's answers": "📝",
        "Code suggestions": "🤖",
    }

    for key, value in output_data.items():
        if not value:
            continue
        if isinstance(value, dict):
            markdown_text += f"## {key}\n\n"
            markdown_text += convert_to_markdown(value)
        elif isinstance(value, list):
            if key.lower() == 'code suggestions':
                markdown_text += "\n"  # just looks nicer with additional line breaks
            emoji = emojis.get(key, "")
            markdown_text += f"- {emoji} **{key}:**\n\n"
            for item in value:
                if isinstance(item, dict) and key.lower() == 'code suggestions':
                    markdown_text += parse_code_suggestion(item)
                elif item:
                    markdown_text += f"  - {item}\n"
        elif value != 'n/a':
            emoji = emojis.get(key, "")
            markdown_text += f"- {emoji} **{key}:** {value}\n"
    return markdown_text


def parse_code_suggestion(code_suggestions: dict) -> str:
    markdown_text = ""
    for sub_key, sub_value in code_suggestions.items():
        if isinstance(sub_value, dict):  # "code example"
            markdown_text += f"  - **{sub_key}:**\n"
            for code_key, code_value in sub_value.items():  # 'before' and 'after' code
                code_str = f"```\n{code_value}\n```"
                code_str_indented = textwrap.indent(code_str, '        ')
                markdown_text += f"    - **{code_key}:**\n{code_str_indented}\n"
        else:
            if "relevant file" in sub_key.lower():
                markdown_text += f"\n  - **{sub_key}:** {sub_value}\n"
            else:
                markdown_text += f"   **{sub_key}:** {sub_value}\n"

    markdown_text += "\n"
    return markdown_text


def try_fix_json(review, max_iter=10, code_suggestions=False):
    if review.endswith("}"):
        return fix_json_escape_char(review)
    # Try to fix JSON if it is broken/incomplete: parse until the last valid code suggestion
    data = {}
    if code_suggestions:
        closing_bracket = "]}"
    else:
        closing_bracket = "]}}"
    if review.rfind("'Code suggestions': [") > 0 or review.rfind('"Code suggestions": [') > 0:
        last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1
        valid_json = False
        iter_count = 0
        while last_code_suggestion_ind > 0 and not valid_json and iter_count < max_iter:
            try:
                data = json.loads(review[:last_code_suggestion_ind] + closing_bracket)
                valid_json = True
                review = review[:last_code_suggestion_ind].strip() + closing_bracket
            except json.decoder.JSONDecodeError:
                review = review[:last_code_suggestion_ind]
                # Use regular expression to find the last occurrence of "}," with any number of whitespaces or newlines
                last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1
                iter_count += 1
        if not valid_json:
            logging.error("Unable to decode JSON response from AI")
            data = {}
    return data


def fix_json_escape_char(json_message=None):
    try:
        result = json.loads(json_message)
    except Exception as e:
        # Find the offending character index:
        idx_to_replace = int(str(e).split(' ')[-1].replace(')', ''))
        # Remove the offending character:
        json_message = list(json_message)
        json_message[idx_to_replace] = ' '
        new_message = ''.join(json_message)
        return fix_json_escape_char(json_message=new_message)
    return result


def convert_str_to_datetime(date_str):
    datetime_format = '%a, %d %b %Y %H:%M:%S %Z'
    return datetime.strptime(date_str, datetime_format)


def load_large_diff(file, new_file_content_str: str, original_file_content_str: str, patch: str) -> str:
    if not patch:  # to Do - also add condition for file extension
        try:
            diff = difflib.unified_diff(original_file_content_str.splitlines(keepends=True),
                                        new_file_content_str.splitlines(keepends=True))
            if settings.config.verbosity_level >= 2:
                logging.warning(f"File was modified, but no patch was found. Manually creating patch: {file.filename}.")
            patch = ''.join(diff)
        except Exception:
            pass
    return patch
Initial commit - PR-Agent OSS release 2023-07-06 00:21:08 +03:00			`from __future__ import annotations`

Add Incremental Review 2023-07-18 23:14:47 +03:00			`import difflib`
			`from datetime import datetime`
refactor try_fix_json, generalize finding the ending of a json item (support new lines, spaces tab) 2023-07-11 22:11:42 +03:00			`import json`
			`import logging`
			`import re`
Initial commit - PR-Agent OSS release 2023-07-06 00:21:08 +03:00			`import textwrap`

Add Incremental Review 2023-07-18 23:14:47 +03:00			`from pr_agent.config_loader import settings`

Initial commit - PR-Agent OSS release 2023-07-06 00:21:08 +03:00
			`def convert_to_markdown(output_data: dict) -> str:`
			`markdown_text = ""`

			`emojis = {`
			`"Main theme": "🎯",`
			`"Type of PR": "📌",`
Add configuration to request a score for the PR This can help teams compare the review of the PR agent with that of a human reviewer, and fine-tune a score threshold for automatic approval where they decide the agent's review is satisfactory. 2023-07-18 16:27:42 +03:00			`"Score": "🏅",`
Initial commit - PR-Agent OSS release 2023-07-06 00:21:08 +03:00			`"Relevant tests added": "🧪",`
			`"Unrelated changes": "⚠️",`
Focused PR update 2023-07-11 08:50:28 +03:00			`"Focused PR": "✨",`
Initial commit - PR-Agent OSS release 2023-07-06 00:21:08 +03:00			`"Security concerns": "🔒",`
			`"General PR suggestions": "💡",`
bugfix 2023-07-18 16:32:51 +03:00			`"Insights from user's answers": "📝",`
Change Review title when 2023-07-19 01:03:47 +03:00			`"Code suggestions": "🤖",`
Initial commit - PR-Agent OSS release 2023-07-06 00:21:08 +03:00			`}`

			`for key, value in output_data.items():`
			`if not value:`
			`continue`
			`if isinstance(value, dict):`
			`markdown_text += f"## {key}\n\n"`
			`markdown_text += convert_to_markdown(value)`
			`elif isinstance(value, list):`
			`if key.lower() == 'code suggestions':`
			`markdown_text += "\n" # just looks nicer with additional line breaks`
Update utils.py 2023-07-19 15:12:50 +03:00			`emoji = emojis.get(key, "")`
Initial commit - PR-Agent OSS release 2023-07-06 00:21:08 +03:00			`markdown_text += f"- {emoji} {key}:\n\n"`
			`for item in value:`
			`if isinstance(item, dict) and key.lower() == 'code suggestions':`
			`markdown_text += parse_code_suggestion(item)`
			`elif item:`
			`markdown_text += f" - {item}\n"`
			`elif value != 'n/a':`
Update utils.py 2023-07-19 15:12:50 +03:00			`emoji = emojis.get(key, "")`
Initial commit - PR-Agent OSS release 2023-07-06 00:21:08 +03:00			`markdown_text += f"- {emoji} {key}: {value}\n"`
			`return markdown_text`


			`def parse_code_suggestion(code_suggestions: dict) -> str:`
			`markdown_text = ""`
			`for sub_key, sub_value in code_suggestions.items():`
			`if isinstance(sub_value, dict): # "code example"`
			`markdown_text += f" - {sub_key}:\n"`
			`for code_key, code_value in sub_value.items(): # 'before' and 'after' code`
			code_str = f"```\n{code_value}\n```"
			`code_str_indented = textwrap.indent(code_str, ' ')`
			`markdown_text += f" - {code_key}:\n{code_str_indented}\n"`
			`else:`
remove suggestion number 2023-07-13 08:10:36 +03:00			`if "relevant file" in sub_key.lower():`
formatting 2023-07-06 12:49:10 +03:00			`markdown_text += f"\n - {sub_key}: {sub_value}\n"`
Initial commit - PR-Agent OSS release 2023-07-06 00:21:08 +03:00			`else:`
formatting 2023-07-06 12:49:10 +03:00			`markdown_text += f" {sub_key}: {sub_value}\n"`

Initial commit - PR-Agent OSS release 2023-07-06 00:21:08 +03:00			`markdown_text += "\n"`
			`return markdown_text`

refactor try_fix_json, generalize finding the ending of a json item (support new lines, spaces tab) 2023-07-11 22:11:42 +03:00
Support Code Suggestion in Gitlab 2023-07-17 01:44:40 +03:00			`def try_fix_json(review, max_iter=10, code_suggestions=False):`
			`if review.endswith("}"):`
			`return fix_json_escape_char(review)`
refactor try_fix_json, generalize finding the ending of a json item (support new lines, spaces tab) 2023-07-11 22:11:42 +03:00			`# Try to fix JSON if it is broken/incomplete: parse until the last valid code suggestion`
			`data = {}`
Support Code Suggestion in Gitlab 2023-07-17 01:44:40 +03:00			`if code_suggestions:`
			`closing_bracket = "]}"`
			`else:`
			`closing_bracket = "]}}"`
refactor try_fix_json, generalize finding the ending of a json item (support new lines, spaces tab) 2023-07-11 22:11:42 +03:00			`if review.rfind("'Code suggestions': [") > 0 or review.rfind('"Code suggestions": [') > 0:`
			`last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1`
			`valid_json = False`
add max_iter 2023-07-11 22:22:08 +03:00			`iter_count = 0`
			`while last_code_suggestion_ind > 0 and not valid_json and iter_count < max_iter:`
refactor try_fix_json, generalize finding the ending of a json item (support new lines, spaces tab) 2023-07-11 22:11:42 +03:00			`try:`
Support Code Suggestion in Gitlab 2023-07-17 01:44:40 +03:00			`data = json.loads(review[:last_code_suggestion_ind] + closing_bracket)`
refactor try_fix_json, generalize finding the ending of a json item (support new lines, spaces tab) 2023-07-11 22:11:42 +03:00			`valid_json = True`
Support Code Suggestion in Gitlab 2023-07-17 01:44:40 +03:00			`review = review[:last_code_suggestion_ind].strip() + closing_bracket`
refactor try_fix_json, generalize finding the ending of a json item (support new lines, spaces tab) 2023-07-11 22:11:42 +03:00			`except json.decoder.JSONDecodeError:`
			`review = review[:last_code_suggestion_ind]`
			`# Use regular expression to find the last occurrence of "}," with any number of whitespaces or newlines`
			`last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1`
add max_iter 2023-07-11 22:22:08 +03:00			`iter_count += 1`
refactor try_fix_json, generalize finding the ending of a json item (support new lines, spaces tab) 2023-07-11 22:11:42 +03:00			`if not valid_json:`
			`logging.error("Unable to decode JSON response from AI")`
			`data = {}`
			`return data`
Support Code Suggestion in Gitlab 2023-07-17 01:44:40 +03:00
Lint fixes 2023-07-18 11:34:57 +03:00
Support Code Suggestion in Gitlab 2023-07-17 01:44:40 +03:00			`def fix_json_escape_char(json_message=None):`
			`try:`
			`result = json.loads(json_message)`
			`except Exception as e:`
			`# Find the offending character index:`
			`idx_to_replace = int(str(e).split(' ')[-1].replace(')', ''))`
			`# Remove the offending character:`
			`json_message = list(json_message)`
			`json_message[idx_to_replace] = ' '`
			`new_message = ''.join(json_message)`
Lint fixes 2023-07-18 11:34:57 +03:00			`return fix_json_escape_char(json_message=new_message)`
			`return result`
Add Incremental Review 2023-07-18 23:14:47 +03:00

			`def convert_str_to_datetime(date_str):`
			`datetime_format = '%a, %d %b %Y %H:%M:%S %Z'`
			`return datetime.strptime(date_str, datetime_format)`


			`def load_large_diff(file, new_file_content_str: str, original_file_content_str: str, patch: str) -> str:`
			`if not patch: # to Do - also add condition for file extension`
			`try:`
			`diff = difflib.unified_diff(original_file_content_str.splitlines(keepends=True),`
			`new_file_content_str.splitlines(keepends=True))`
			`if settings.config.verbosity_level >= 2:`
			`logging.warning(f"File was modified, but no patch was found. Manually creating patch: {file.filename}.")`
			`patch = ''.join(diff)`
			`except Exception:`
			`pass`
			`return patch`