Merge pull request #38 from Codium-ai/hl/try_fix_when_broken_output

Try to fix json output when it's broken or incomplete
2025-07-06 13:50:44 +08:00 · 2023-07-11 22:23:07 +03:00
parent 210d94f2aa b8a71b369d
commit 52a438b3c8
3 changed files with 120 additions and 9 deletions
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@ -1,5 +1,8 @@
 from __future__ import annotations
 import json
 import logging
 import re
 import textwrap
@ -61,3 +64,25 @@ def parse_code_suggestion(code_suggestions: dict) -> str:
    markdown_text += "\n"
    return markdown_text
 def try_fix_json(review, max_iter=10):
    # Try to fix JSON if it is broken/incomplete: parse until the last valid code suggestion
    data = {}
    if review.rfind("'Code suggestions': [") > 0 or review.rfind('"Code suggestions": [') > 0:
        last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1
        valid_json = False
        iter_count = 0
        while last_code_suggestion_ind > 0 and not valid_json and iter_count < max_iter:
            try:
                data = json.loads(review[:last_code_suggestion_ind] + "]}}")
                valid_json = True
                review = review[:last_code_suggestion_ind].strip() + "]}}"
            except json.decoder.JSONDecodeError:
                review = review[:last_code_suggestion_ind]
                # Use regular expression to find the last occurrence of "}," with any number of whitespaces or newlines
                last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1
                iter_count += 1
        if not valid_json:
            logging.error("Unable to decode JSON response from AI")
            data = {}
    return data
--- a/pr_agent/tools/pr_reviewer.py
+++ b/pr_agent/tools/pr_reviewer.py
@ -7,7 +7,7 @@ from jinja2 import Environment, StrictUndefined
 from pr_agent.algo.ai_handler import AiHandler
 from pr_agent.algo.pr_processing import get_pr_diff
 from pr_agent.algo.token_handler import TokenHandler
-from pr_agent.algo.utils import convert_to_markdown
+from pr_agent.algo.utils import convert_to_markdown, try_fix_json
 from pr_agent.config_loader import settings
 from pr_agent.git_providers import get_git_provider
 from pr_agent.git_providers.git_provider import get_main_pr_language
@ -69,11 +69,7 @@ class PRReviewer:
        model = settings.config.model
        response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
                                                                        system=system_prompt, user=user_prompt)
-        try:
+
            json.loads(response)
        except json.decoder.JSONDecodeError:
            logging.warning("Could not decode JSON")
            response = {}
        return response
    def _prepare_pr_review(self) -> str:
@ -81,8 +77,7 @@ class PRReviewer:
        try:
            data = json.loads(review)
        except json.decoder.JSONDecodeError:
-            logging.error("Unable to decode JSON response from AI")
+            data = try_fix_json(review)
            data = {}
        # reordering for nicer display
        if 'PR Feedback' in data:
@ -108,4 +103,4 @@ class PRReviewer:
        if settings.config.verbosity_level >= 2:
            logging.info(f"Markdown response:\n{markdown_text}")
-        return markdown_text
+        return markdown_text
--- a/tests/unit/test_fix_output.py
+++ b/tests/unit/test_fix_output.py
@ -0,0 +1,91 @@
 # Generated by CodiumAI
 from pr_agent.algo.utils import try_fix_json
 import pytest
 class TestTryFixJson:
    # Tests that JSON with complete 'Code suggestions' section returns expected output
    def test_incomplete_code_suggestions(self):
        review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...'
        expected_output = {
            'PR Analysis': {
                'Main theme': 'xxx',
                'Description and title': 'Yes',
                'Type of PR': 'Bug fix'
            },
            'PR Feedback': {
                'General PR suggestions': '..., `xxx`...',
                'Code suggestions': [
                    {
                        'suggestion number': 1,
                        'relevant file': 'xxx.py',
                        'suggestion content': 'xxx [important]'
                    }
                ]
            }
        }
        assert try_fix_json(review) == expected_output
    def test_incomplete_code_suggestions_new_line(self):
        review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n\t, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...'
        expected_output = {
            'PR Analysis': {
                'Main theme': 'xxx',
                'Description and title': 'Yes',
                'Type of PR': 'Bug fix'
            },
            'PR Feedback': {
                'General PR suggestions': '..., `xxx`...',
                'Code suggestions': [
                    {
                        'suggestion number': 1,
                        'relevant file': 'xxx.py',
                        'suggestion content': 'xxx [important]'
                    }
                ]
            }
        }
        assert try_fix_json(review) == expected_output
    def test_incomplete_code_suggestions_many_close_brackets(self):
        review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy }, [}\n ,incomp.}  ,..'
        expected_output = {
            'PR Analysis': {
                'Main theme': 'xxx',
                'Description and title': 'Yes',
                'Type of PR': 'Bug fix'
            },
            'PR Feedback': {
                'General PR suggestions': '..., `xxx`...',
                'Code suggestions': [
                    {
                        'suggestion number': 1,
                        'relevant file': 'xxx.py',
                        'suggestion content': 'xxx [important]'
                    }
                ]
            }
        }
        assert try_fix_json(review) == expected_output
    def test_incomplete_code_suggestions_relevant_file(self):
        review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.p'
        expected_output = {
            'PR Analysis': {
                'Main theme': 'xxx',
                'Description and title': 'Yes',
                'Type of PR': 'Bug fix'
            },
            'PR Feedback': {
                'General PR suggestions': '..., `xxx`...',
                'Code suggestions': [
                    {
                        'suggestion number': 1,
                        'relevant file': 'xxx.py',
                        'suggestion content': 'xxx [important]'
                    }
                ]
            }
        }
        assert try_fix_json(review) == expected_output