diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index 045144d2..0102c7b7 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -1,5 +1,8 @@ from __future__ import annotations +import json +import logging +import re import textwrap @@ -61,3 +64,25 @@ def parse_code_suggestion(code_suggestions: dict) -> str: markdown_text += "\n" return markdown_text + +def try_fix_json(review, max_iter=10): + # Try to fix JSON if it is broken/incomplete: parse until the last valid code suggestion + data = {} + if review.rfind("'Code suggestions': [") > 0 or review.rfind('"Code suggestions": [') > 0: + last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1 + valid_json = False + iter_count = 0 + while last_code_suggestion_ind > 0 and not valid_json and iter_count < max_iter: + try: + data = json.loads(review[:last_code_suggestion_ind] + "]}}") + valid_json = True + review = review[:last_code_suggestion_ind].strip() + "]}}" + except json.decoder.JSONDecodeError: + review = review[:last_code_suggestion_ind] + # Use regular expression to find the last occurrence of "}," with any number of whitespaces or newlines + last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1 + iter_count += 1 + if not valid_json: + logging.error("Unable to decode JSON response from AI") + data = {} + return data diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index 504548b1..21104848 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -7,7 +7,7 @@ from jinja2 import Environment, StrictUndefined from pr_agent.algo.ai_handler import AiHandler from pr_agent.algo.pr_processing import get_pr_diff from pr_agent.algo.token_handler import TokenHandler -from pr_agent.algo.utils import convert_to_markdown +from pr_agent.algo.utils import convert_to_markdown, try_fix_json from pr_agent.config_loader import settings from pr_agent.git_providers import get_git_provider from pr_agent.git_providers.git_provider import get_main_pr_language @@ -69,11 +69,7 @@ class PRReviewer: model = settings.config.model response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2, system=system_prompt, user=user_prompt) - try: - json.loads(response) - except json.decoder.JSONDecodeError: - logging.warning("Could not decode JSON") - response = {} + return response def _prepare_pr_review(self) -> str: @@ -81,8 +77,7 @@ class PRReviewer: try: data = json.loads(review) except json.decoder.JSONDecodeError: - logging.error("Unable to decode JSON response from AI") - data = {} + data = try_fix_json(review) # reordering for nicer display if 'PR Feedback' in data: @@ -108,4 +103,4 @@ class PRReviewer: if settings.config.verbosity_level >= 2: logging.info(f"Markdown response:\n{markdown_text}") - return markdown_text + return markdown_text \ No newline at end of file diff --git a/tests/unit/test_fix_output.py b/tests/unit/test_fix_output.py new file mode 100644 index 00000000..8141b74e --- /dev/null +++ b/tests/unit/test_fix_output.py @@ -0,0 +1,91 @@ +# Generated by CodiumAI +from pr_agent.algo.utils import try_fix_json + + +import pytest + +class TestTryFixJson: + # Tests that JSON with complete 'Code suggestions' section returns expected output + def test_incomplete_code_suggestions(self): + review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...' + expected_output = { + 'PR Analysis': { + 'Main theme': 'xxx', + 'Description and title': 'Yes', + 'Type of PR': 'Bug fix' + }, + 'PR Feedback': { + 'General PR suggestions': '..., `xxx`...', + 'Code suggestions': [ + { + 'suggestion number': 1, + 'relevant file': 'xxx.py', + 'suggestion content': 'xxx [important]' + } + ] + } + } + assert try_fix_json(review) == expected_output + + def test_incomplete_code_suggestions_new_line(self): + review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n\t, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...' + expected_output = { + 'PR Analysis': { + 'Main theme': 'xxx', + 'Description and title': 'Yes', + 'Type of PR': 'Bug fix' + }, + 'PR Feedback': { + 'General PR suggestions': '..., `xxx`...', + 'Code suggestions': [ + { + 'suggestion number': 1, + 'relevant file': 'xxx.py', + 'suggestion content': 'xxx [important]' + } + ] + } + } + assert try_fix_json(review) == expected_output + + def test_incomplete_code_suggestions_many_close_brackets(self): + review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy }, [}\n ,incomp.} ,..' + expected_output = { + 'PR Analysis': { + 'Main theme': 'xxx', + 'Description and title': 'Yes', + 'Type of PR': 'Bug fix' + }, + 'PR Feedback': { + 'General PR suggestions': '..., `xxx`...', + 'Code suggestions': [ + { + 'suggestion number': 1, + 'relevant file': 'xxx.py', + 'suggestion content': 'xxx [important]' + } + ] + } + } + assert try_fix_json(review) == expected_output + + def test_incomplete_code_suggestions_relevant_file(self): + review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.p' + expected_output = { + 'PR Analysis': { + 'Main theme': 'xxx', + 'Description and title': 'Yes', + 'Type of PR': 'Bug fix' + }, + 'PR Feedback': { + 'General PR suggestions': '..., `xxx`...', + 'Code suggestions': [ + { + 'suggestion number': 1, + 'relevant file': 'xxx.py', + 'suggestion content': 'xxx [important]' + } + ] + } + } + assert try_fix_json(review) == expected_output