refactor try_fix_json, generalize finding the ending of a json item (support new lines, spaces tab)

2025-07-21 04:50:39 +08:00 · 2023-07-11 22:11:42 +03:00
parent a3211d4958
commit fd4a2bf7ff
2 changed files with 26 additions and 18 deletions
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@ -1,5 +1,8 @@
 from __future__ import annotations

+import json
+import logging
+import re
 import textwrap


@ -61,3 +64,23 @@ def parse_code_suggestion(code_suggestions: dict) -> str:
    markdown_text += "\n"
    return markdown_text

+
+def try_fix_json(review):
+    # Try to fix JSON if it is broken/incomplete: parse until the last valid code suggestion
+    data = {}
+    if review.rfind("'Code suggestions': [") > 0 or review.rfind('"Code suggestions": [') > 0:
+        last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1
+        valid_json = False
+        while last_code_suggestion_ind > 0 and not valid_json:
+            try:
+                data = json.loads(review[:last_code_suggestion_ind] + "]}}")
+                valid_json = True
+                review = review[:last_code_suggestion_ind].strip() + "]}}"
+            except json.decoder.JSONDecodeError:
+                review = review[:last_code_suggestion_ind]
+                # Use regular expression to find the last occurrence of "}," with any number of whitespaces or newlines
+                last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1
+        if not valid_json:
+            logging.error("Unable to decode JSON response from AI")
+            data = {}
+    return data
--- a/pr_agent/tools/pr_reviewer.py
+++ b/pr_agent/tools/pr_reviewer.py
@ -7,7 +7,7 @@ from jinja2 import Environment, StrictUndefined
 from pr_agent.algo.ai_handler import AiHandler
 from pr_agent.algo.pr_processing import get_pr_diff
 from pr_agent.algo.token_handler import TokenHandler
-from pr_agent.algo.utils import convert_to_markdown
+from pr_agent.algo.utils import convert_to_markdown, try_fix_json
 from pr_agent.config_loader import settings
 from pr_agent.git_providers import get_git_provider
 from pr_agent.git_providers.git_provider import get_main_pr_language
@ -77,22 +77,7 @@ class PRReviewer:
        try:
            data = json.loads(review)
        except json.decoder.JSONDecodeError:
-            # Try to fix JSON if it is broken/incomplete: parse until the last valid code suggestion
-            if review.rfind("'Code suggestions': [") > 0 or review.rfind('"Code suggestions": [') > 0:
-                last_code_suggestion_ind = review.rfind(",")
-                valid_json = False
-                data = {}
-                while last_code_suggestion_ind > 0 and not valid_json:
-                    try:
-                        data = json.loads(review[:last_code_suggestion_ind] + "]}}")
-                        valid_json = True
-                        review = review[:last_code_suggestion_ind] + "]}}"
-                    except json.decoder.JSONDecodeError:
-                        review = review[:last_code_suggestion_ind]
-                        last_code_suggestion_ind = review.rfind("},") + 1
-                if not valid_json:
-                    logging.error("Unable to decode JSON response from AI")
-                    data = {}
+            data = try_fix_json(review)

        # reordering for nicer display
        if 'PR Feedback' in data:
@ -118,4 +103,4 @@ class PRReviewer:

        if settings.config.verbosity_level >= 2:
            logging.info(f"Markdown response:\n{markdown_text}")
-        return markdown_text
+        return markdown_text