refactor try_fix_json, generalize finding the ending of a json item (support new lines, spaces tab)

This commit is contained in:
Hussam.lawen
2023-07-11 22:11:42 +03:00
parent a3211d4958
commit fd4a2bf7ff
2 changed files with 26 additions and 18 deletions

View File

@ -1,5 +1,8 @@
from __future__ import annotations
import json
import logging
import re
import textwrap
@ -61,3 +64,23 @@ def parse_code_suggestion(code_suggestions: dict) -> str:
markdown_text += "\n"
return markdown_text
def try_fix_json(review):
# Try to fix JSON if it is broken/incomplete: parse until the last valid code suggestion
data = {}
if review.rfind("'Code suggestions': [") > 0 or review.rfind('"Code suggestions": [') > 0:
last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1
valid_json = False
while last_code_suggestion_ind > 0 and not valid_json:
try:
data = json.loads(review[:last_code_suggestion_ind] + "]}}")
valid_json = True
review = review[:last_code_suggestion_ind].strip() + "]}}"
except json.decoder.JSONDecodeError:
review = review[:last_code_suggestion_ind]
# Use regular expression to find the last occurrence of "}," with any number of whitespaces or newlines
last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1
if not valid_json:
logging.error("Unable to decode JSON response from AI")
data = {}
return data

View File

@ -7,7 +7,7 @@ from jinja2 import Environment, StrictUndefined
from pr_agent.algo.ai_handler import AiHandler
from pr_agent.algo.pr_processing import get_pr_diff
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import convert_to_markdown
from pr_agent.algo.utils import convert_to_markdown, try_fix_json
from pr_agent.config_loader import settings
from pr_agent.git_providers import get_git_provider
from pr_agent.git_providers.git_provider import get_main_pr_language
@ -77,22 +77,7 @@ class PRReviewer:
try:
data = json.loads(review)
except json.decoder.JSONDecodeError:
# Try to fix JSON if it is broken/incomplete: parse until the last valid code suggestion
if review.rfind("'Code suggestions': [") > 0 or review.rfind('"Code suggestions": [') > 0:
last_code_suggestion_ind = review.rfind(",")
valid_json = False
data = {}
while last_code_suggestion_ind > 0 and not valid_json:
try:
data = json.loads(review[:last_code_suggestion_ind] + "]}}")
valid_json = True
review = review[:last_code_suggestion_ind] + "]}}"
except json.decoder.JSONDecodeError:
review = review[:last_code_suggestion_ind]
last_code_suggestion_ind = review.rfind("},") + 1
if not valid_json:
logging.error("Unable to decode JSON response from AI")
data = {}
data = try_fix_json(review)
# reordering for nicer display
if 'PR Feedback' in data:
@ -118,4 +103,4 @@ class PRReviewer:
if settings.config.verbosity_level >= 2:
logging.info(f"Markdown response:\n{markdown_text}")
return markdown_text
return markdown_text