From fd4a2bf7ffbd4a94538ef261a08783841f4c202b Mon Sep 17 00:00:00 2001 From: "Hussam.lawen" Date: Tue, 11 Jul 2023 22:11:42 +0300 Subject: [PATCH] refactor try_fix_json, generalize finding the ending of a json item (support new lines, spaces tab) --- pr_agent/algo/utils.py | 23 +++++++++++++++++++++++ pr_agent/tools/pr_reviewer.py | 21 +++------------------ 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index 045144d2..c9c2c66a 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -1,5 +1,8 @@ from __future__ import annotations +import json +import logging +import re import textwrap @@ -61,3 +64,23 @@ def parse_code_suggestion(code_suggestions: dict) -> str: markdown_text += "\n" return markdown_text + +def try_fix_json(review): + # Try to fix JSON if it is broken/incomplete: parse until the last valid code suggestion + data = {} + if review.rfind("'Code suggestions': [") > 0 or review.rfind('"Code suggestions": [') > 0: + last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1 + valid_json = False + while last_code_suggestion_ind > 0 and not valid_json: + try: + data = json.loads(review[:last_code_suggestion_ind] + "]}}") + valid_json = True + review = review[:last_code_suggestion_ind].strip() + "]}}" + except json.decoder.JSONDecodeError: + review = review[:last_code_suggestion_ind] + # Use regular expression to find the last occurrence of "}," with any number of whitespaces or newlines + last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1 + if not valid_json: + logging.error("Unable to decode JSON response from AI") + data = {} + return data diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index e7903e40..21104848 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -7,7 +7,7 @@ from jinja2 import Environment, StrictUndefined from pr_agent.algo.ai_handler import AiHandler from pr_agent.algo.pr_processing import get_pr_diff from pr_agent.algo.token_handler import TokenHandler -from pr_agent.algo.utils import convert_to_markdown +from pr_agent.algo.utils import convert_to_markdown, try_fix_json from pr_agent.config_loader import settings from pr_agent.git_providers import get_git_provider from pr_agent.git_providers.git_provider import get_main_pr_language @@ -77,22 +77,7 @@ class PRReviewer: try: data = json.loads(review) except json.decoder.JSONDecodeError: - # Try to fix JSON if it is broken/incomplete: parse until the last valid code suggestion - if review.rfind("'Code suggestions': [") > 0 or review.rfind('"Code suggestions": [') > 0: - last_code_suggestion_ind = review.rfind(",") - valid_json = False - data = {} - while last_code_suggestion_ind > 0 and not valid_json: - try: - data = json.loads(review[:last_code_suggestion_ind] + "]}}") - valid_json = True - review = review[:last_code_suggestion_ind] + "]}}" - except json.decoder.JSONDecodeError: - review = review[:last_code_suggestion_ind] - last_code_suggestion_ind = review.rfind("},") + 1 - if not valid_json: - logging.error("Unable to decode JSON response from AI") - data = {} + data = try_fix_json(review) # reordering for nicer display if 'PR Feedback' in data: @@ -118,4 +103,4 @@ class PRReviewer: if settings.config.verbosity_level >= 2: logging.info(f"Markdown response:\n{markdown_text}") - return markdown_text + return markdown_text \ No newline at end of file