Merge pull request #38 from Codium-ai/hl/try_fix_when_broken_output

Try to fix json output when it's broken or incomplete
2025-07-21 04:50:39 +08:00 · 2023-07-11 22:23:07 +03:00
parent 210d94f2aa b8a71b369d
commit 52a438b3c8
3 changed files with 120 additions and 9 deletions
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@ -1,5 +1,8 @@
 from __future__ import annotations

+import json
+import logging
+import re
 import textwrap


@ -61,3 +64,25 @@ def parse_code_suggestion(code_suggestions: dict) -> str:
    markdown_text += "\n"
    return markdown_text

+
+def try_fix_json(review, max_iter=10):
+    # Try to fix JSON if it is broken/incomplete: parse until the last valid code suggestion
+    data = {}
+    if review.rfind("'Code suggestions': [") > 0 or review.rfind('"Code suggestions": [') > 0:
+        last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1
+        valid_json = False
+        iter_count = 0
+        while last_code_suggestion_ind > 0 and not valid_json and iter_count < max_iter:
+            try:
+                data = json.loads(review[:last_code_suggestion_ind] + "]}}")
+                valid_json = True
+                review = review[:last_code_suggestion_ind].strip() + "]}}"
+            except json.decoder.JSONDecodeError:
+                review = review[:last_code_suggestion_ind]
+                # Use regular expression to find the last occurrence of "}," with any number of whitespaces or newlines
+                last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1
+                iter_count += 1
+        if not valid_json:
+            logging.error("Unable to decode JSON response from AI")
+            data = {}
+    return data
--- a/pr_agent/tools/pr_reviewer.py
+++ b/pr_agent/tools/pr_reviewer.py
@ -7,7 +7,7 @@ from jinja2 import Environment, StrictUndefined
 from pr_agent.algo.ai_handler import AiHandler
 from pr_agent.algo.pr_processing import get_pr_diff
 from pr_agent.algo.token_handler import TokenHandler
-from pr_agent.algo.utils import convert_to_markdown
+from pr_agent.algo.utils import convert_to_markdown, try_fix_json
 from pr_agent.config_loader import settings
 from pr_agent.git_providers import get_git_provider
 from pr_agent.git_providers.git_provider import get_main_pr_language
@ -69,11 +69,7 @@ class PRReviewer:
        model = settings.config.model
        response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
                                                                        system=system_prompt, user=user_prompt)
-        try:
-            json.loads(response)
-        except json.decoder.JSONDecodeError:
-            logging.warning("Could not decode JSON")
-            response = {}
+
        return response

    def _prepare_pr_review(self) -> str:
@ -81,8 +77,7 @@ class PRReviewer:
        try:
            data = json.loads(review)
        except json.decoder.JSONDecodeError:
-            logging.error("Unable to decode JSON response from AI")
-            data = {}
+            data = try_fix_json(review)

        # reordering for nicer display
        if 'PR Feedback' in data:
@ -108,4 +103,4 @@ class PRReviewer:

        if settings.config.verbosity_level >= 2:
            logging.info(f"Markdown response:\n{markdown_text}")
-        return markdown_text
+        return markdown_text
--- a/tests/unit/test_fix_output.py
+++ b/tests/unit/test_fix_output.py
@ -0,0 +1,91 @@
+# Generated by CodiumAI
+from pr_agent.algo.utils import try_fix_json
+
+
+import pytest
+
+class TestTryFixJson:
+    # Tests that JSON with complete 'Code suggestions' section returns expected output
+    def test_incomplete_code_suggestions(self):
+        review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...'
+        expected_output = {
+            'PR Analysis': {
+                'Main theme': 'xxx',
+                'Description and title': 'Yes',
+                'Type of PR': 'Bug fix'
+            },
+            'PR Feedback': {
+                'General PR suggestions': '..., `xxx`...',
+                'Code suggestions': [
+                    {
+                        'suggestion number': 1,
+                        'relevant file': 'xxx.py',
+                        'suggestion content': 'xxx [important]'
+                    }
+                ]
+            }
+        }
+        assert try_fix_json(review) == expected_output
+
+    def test_incomplete_code_suggestions_new_line(self):
+        review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n\t, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...'
+        expected_output = {
+            'PR Analysis': {
+                'Main theme': 'xxx',
+                'Description and title': 'Yes',
+                'Type of PR': 'Bug fix'
+            },
+            'PR Feedback': {
+                'General PR suggestions': '..., `xxx`...',
+                'Code suggestions': [
+                    {
+                        'suggestion number': 1,
+                        'relevant file': 'xxx.py',
+                        'suggestion content': 'xxx [important]'
+                    }
+                ]
+            }
+        }
+        assert try_fix_json(review) == expected_output
+
+    def test_incomplete_code_suggestions_many_close_brackets(self):
+        review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy }, [}\n ,incomp.}  ,..'
+        expected_output = {
+            'PR Analysis': {
+                'Main theme': 'xxx',
+                'Description and title': 'Yes',
+                'Type of PR': 'Bug fix'
+            },
+            'PR Feedback': {
+                'General PR suggestions': '..., `xxx`...',
+                'Code suggestions': [
+                    {
+                        'suggestion number': 1,
+                        'relevant file': 'xxx.py',
+                        'suggestion content': 'xxx [important]'
+                    }
+                ]
+            }
+        }
+        assert try_fix_json(review) == expected_output
+
+    def test_incomplete_code_suggestions_relevant_file(self):
+        review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.p'
+        expected_output = {
+            'PR Analysis': {
+                'Main theme': 'xxx',
+                'Description and title': 'Yes',
+                'Type of PR': 'Bug fix'
+            },
+            'PR Feedback': {
+                'General PR suggestions': '..., `xxx`...',
+                'Code suggestions': [
+                    {
+                        'suggestion number': 1,
+                        'relevant file': 'xxx.py',
+                        'suggestion content': 'xxx [important]'
+                    }
+                ]
+            }
+        }
+        assert try_fix_json(review) == expected_output