diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index 58f0ccb9..f92309c4 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -286,37 +286,39 @@ def _fix_key_value(key: str, value: str): return key, value -def load_yaml(review_text: str) -> dict: - review_text = review_text.removeprefix('```yaml').rstrip('`') +def load_yaml(response_text: str) -> dict: + response_text = response_text.removeprefix('```yaml').rstrip('`') try: - data = yaml.safe_load(review_text) + data = yaml.safe_load(response_text) except Exception as e: get_logger().error(f"Failed to parse AI prediction: {e}") - data = try_fix_yaml(review_text) + data = try_fix_yaml(response_text) return data -def try_fix_yaml(review_text: str) -> dict: - review_text_lines = review_text.split('\n') +def try_fix_yaml(response_text: str) -> dict: + response_text_lines = response_text.split('\n') + keys = ['relevant line:', 'suggestion content:', 'relevant file:'] # first fallback - try to convert 'relevant line: ...' to relevant line: |-\n ...' - review_text_lines_copy = review_text_lines.copy() - for i in range(0, len(review_text_lines_copy)): - if 'relevant line:' in review_text_lines_copy[i] and not '|-' in review_text_lines_copy[i]: - review_text_lines_copy[i] = review_text_lines_copy[i].replace('relevant line: ', - 'relevant line: |-\n ') + response_text_lines_copy = response_text_lines.copy() + for i in range(0, len(response_text_lines_copy)): + for key in keys: + if key in response_text_lines_copy[i] and not '|-' in response_text_lines_copy[i]: + response_text_lines_copy[i] = response_text_lines_copy[i].replace(f'{key}: ', + f'{key}: |-\n ') try: - data = yaml.load('\n'.join(review_text_lines_copy), Loader=yaml.SafeLoader) - get_logger().info(f"Successfully parsed AI prediction after adding |-\n to relevant line") + data = yaml.safe_load('\n'.join(response_text_lines_copy)) + get_logger().info(f"Successfully parsed AI prediction after adding |-\n") return data except: - get_logger().debug(f"Failed to parse AI prediction after adding |-\n to relevant line") + get_logger().debug(f"Failed to parse AI prediction after adding |-\n") # second fallback - try to remove last lines data = {} - for i in range(1, len(review_text_lines)): - review_text_lines_tmp = '\n'.join(review_text_lines[:-i]) + for i in range(1, len(response_text_lines)): + response_text_lines_tmp = '\n'.join(response_text_lines[:-i]) try: - data = yaml.load(review_text_lines_tmp, Loader=yaml.SafeLoader) + data = yaml.safe_load(response_text_lines_tmp,) get_logger().info(f"Successfully parsed AI prediction after removing {i} lines") break except: diff --git a/pr_agent/settings/pr_code_suggestions_prompts.toml b/pr_agent/settings/pr_code_suggestions_prompts.toml index a3eb93a1..42ec7441 100644 --- a/pr_agent/settings/pr_code_suggestions_prompts.toml +++ b/pr_agent/settings/pr_code_suggestions_prompts.toml @@ -90,16 +90,19 @@ Code suggestions: Example output: ```yaml Code suggestions: - - relevant file: |- - src/file1.py - suggestion content: |- - Add a docstring to func1() - existing code: |- - def func1(): - relevant lines start: 12 - relevant lines end: 12 - improved code: |- - ... +- relevant file: |- + src/file1.py + suggestion content: |- + Add a docstring to func1() + existing code: |- + def func1(): + relevant lines start: |- + 12 + relevant lines end: |- + 12 + improved code: |- + ... +... ``` diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index 3f52a28e..c3e35295 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -278,14 +278,7 @@ class PRReviewer: if get_settings().pr_reviewer.num_code_suggestions == 0: return - review_text = self.prediction.strip() - review_text = review_text.removeprefix('```yaml').rstrip('`') - try: - data = yaml.load(review_text, Loader=SafeLoader) - except Exception as e: - get_logger().error(f"Failed to parse AI prediction: {e}") - data = try_fix_yaml(review_text) - + data = load_yaml(self.prediction.strip()) comments: List[str] = [] for suggestion in data.get('PR Feedback', {}).get('Code feedback', []): relevant_file = suggestion.get('relevant file', '').strip() diff --git a/tests/unittest/test_load_yaml.py b/tests/unittest/test_load_yaml.py index a345aee2..f1d274c2 100644 --- a/tests/unittest/test_load_yaml.py +++ b/tests/unittest/test_load_yaml.py @@ -12,7 +12,7 @@ class TestLoadYaml: expected_output = {'name': 'John Smith', 'age': 35} assert load_yaml(yaml_str) == expected_output - def test_load_complicated_yaml(self): + def test_load_invalid_yaml1(self): yaml_str = \ '''\ PR Analysis: @@ -30,3 +30,11 @@ PR Feedback: Security concerns: No''' expected_output = {'PR Analysis': {'Main theme': 'Enhancing the `/describe` command prompt by adding title and description', 'Type of PR': 'Enhancement', 'Relevant tests added': False, 'Focused PR': 'Yes, the PR is focused on enhancing the `/describe` command prompt.'}, 'PR Feedback': {'General suggestions': 'The PR seems to be well-structured and focused on a specific enhancement. However, it would be beneficial to add tests to ensure the new feature works as expected.', 'Code feedback': [{'relevant file': 'pr_agent/settings/pr_description_prompts.toml', 'suggestion': "Consider using a more descriptive variable name than 'user' for the command prompt. A more descriptive name would make the code more readable and maintainable. [medium]", 'relevant line': 'user="""PR Info:'}], 'Security concerns': False}} assert load_yaml(yaml_str) == expected_output + + def test_load_invalid_yaml2(self): + yaml_str = '''\ +- relevant file: src/app.py + suggestion content: The print statement is outside inside the if __name__ == \ + ''' + expected_output =[{'relevant file': 'src/app.py', 'suggestion content': 'The print statement is outside inside the if __name__ =='}] + assert load_yaml(yaml_str) == expected_output