diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index 4596223c..8cf6f01a 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -511,25 +511,28 @@ def _fix_key_value(key: str, value: str): return key, value -def load_yaml(response_text: str, keys_fix_yaml: List[str] = []) -> dict: +def load_yaml(response_text: str, keys_fix_yaml: List[str] = [], first_key="", last_key="") -> dict: response_text = response_text.removeprefix('```yaml').rstrip('`') try: data = yaml.safe_load(response_text) except Exception as e: get_logger().error(f"Failed to parse AI prediction: {e}") - data = try_fix_yaml(response_text, keys_fix_yaml=keys_fix_yaml) + data = try_fix_yaml(response_text, keys_fix_yaml=keys_fix_yaml, first_key=first_key, last_key=last_key) return data -def try_fix_yaml(response_text: str, keys_fix_yaml: List[str] = []) -> dict: +def try_fix_yaml(response_text: str, + keys_fix_yaml: List[str] = [], + first_key="", + last_key="",) -> dict: response_text_lines = response_text.split('\n') - keys = ['relevant line:', 'suggestion content:', 'relevant file:', 'existing code:', 'improved code:'] - keys = keys + keys_fix_yaml + keys_yaml = ['relevant line:', 'suggestion content:', 'relevant file:', 'existing code:', 'improved code:'] + keys_yaml = keys_yaml + keys_fix_yaml # first fallback - try to convert 'relevant line: ...' to relevant line: |-\n ...' response_text_lines_copy = response_text_lines.copy() for i in range(0, len(response_text_lines_copy)): - for key in keys: + for key in keys_yaml: if key in response_text_lines_copy[i] and not '|-' in response_text_lines_copy[i]: response_text_lines_copy[i] = response_text_lines_copy[i].replace(f'{key}', f'{key} |-\n ') @@ -540,14 +543,14 @@ def try_fix_yaml(response_text: str, keys_fix_yaml: List[str] = []) -> dict: except: get_logger().info(f"Failed to parse AI prediction after adding |-\n") - # second fallback - try to extract only range from first ```yaml to ```` + # second fallback - try to extract only range from first ```yaml to ``` snippet_pattern = r'```(yaml)?[\s\S]*?```' snippet = re.search(snippet_pattern, '\n'.join(response_text_lines_copy)) if snippet: snippet_text = snippet.group() try: data = yaml.safe_load(snippet_text.removeprefix('```yaml').rstrip('`')) - get_logger().info(f"Successfully parsed AI prediction after extracting yaml snippet") + get_logger().info(f"Successfully parsed AI prediction after extracting yaml snippet with second fallback") return data except: pass @@ -562,7 +565,27 @@ def try_fix_yaml(response_text: str, keys_fix_yaml: List[str] = []) -> dict: except: pass - # fourth fallback - try to remove last lines + # forth fallback - try to extract yaml snippet by first_key and 'last_key' + # note that 'last_key' can be in practice a key that is not the last key in the yaml snippet. + # it just needs to be some inner key, so we can look for newlines after it + if first_key and last_key: + index_start = response_text.find(f"\n{first_key}:") + if index_start == -1: + index_start = response_text.find(f"{first_key}:") + index_last_code = response_text.rfind(f"{last_key}:") + index_end = response_text.find("\n\n", index_last_code) # look for newlines after last_key + if index_end == -1: + index_end = len(response_text) + response_text_copy = response_text[index_start:index_end].strip().strip('```yaml').strip('`').strip() + try: + data = yaml.safe_load(response_text_copy) + get_logger().info(f"Successfully parsed AI prediction after extracting yaml snippet") + return data + except: + pass + + + # fifth fallback - try to remove last lines data = {} for i in range(1, len(response_text_lines)): response_text_lines_tmp = '\n'.join(response_text_lines[:-i]) diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index c3276033..a61abad3 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -186,9 +186,12 @@ class PRReviewer: Prepare the PR review by processing the AI prediction and generating a markdown-formatted text that summarizes the feedback. """ + first_key = 'review' + last_key = 'security_concerns' data = load_yaml(self.prediction.strip(), keys_fix_yaml=["estimated_effort_to_review_[1-5]:", "security_concerns:", "key_issues_to_review:", - "relevant_file:", "relevant_line:", "suggestion:"]) + "relevant_file:", "relevant_line:", "suggestion:"], + first_key=first_key, last_key=last_key) github_action_output(data, 'review') # move data['review'] 'key_issues_to_review' key to the end of the dictionary @@ -258,9 +261,12 @@ class PRReviewer: if get_settings().pr_reviewer.num_code_suggestions == 0: return + first_key = 'review' + last_key = 'security_concerns' data = load_yaml(self.prediction.strip(), keys_fix_yaml=["estimated_effort_to_review_[1-5]:", "security_concerns:", "key_issues_to_review:", - "relevant_file:", "relevant_line:", "suggestion:"]) + "relevant_file:", "relevant_line:", "suggestion:"], + first_key=first_key, last_key=last_key) comments: List[str] = [] for suggestion in data.get('code_feedback', []): relevant_file = suggestion.get('relevant_file', '').strip() diff --git a/tests/unittest/test_try_fix_yaml.py b/tests/unittest/test_try_fix_yaml.py index 21ba9211..a05fe807 100644 --- a/tests/unittest/test_try_fix_yaml.py +++ b/tests/unittest/test_try_fix_yaml.py @@ -42,3 +42,49 @@ age: 35 def test_empty_yaml_fixed(self): review_text = "" assert try_fix_yaml(review_text) is None + + + # The function extracts YAML snippet + def test_no_initial_yaml(self): + review_text = '''\ +I suggest the following: + +code_suggestions: +- relevant_file: | + src/index.ts + label: | + best practice + +- relevant_file: | + src/index2.ts + label: | + enhancment +``` + +We can further improve the code by using the `const` keyword instead of `var` in the `src/index.ts` file. +''' + expected_output = {'code_suggestions': [{'relevant_file': 'src/index.ts\n', 'label': 'best practice\n'}, {'relevant_file': 'src/index2.ts\n', 'label': 'enhancment'}]} + + assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='label') == expected_output + + def test_with_initial_yaml(self): + review_text = '''\ +I suggest the following: + +``` +code_suggestions: +- relevant_file: | + src/index.ts + label: | + best practice + +- relevant_file: | + src/index2.ts + label: | + enhancment +``` + +We can further improve the code by using the `const` keyword instead of `var` in the `src/index.ts` file. +''' + expected_output = {'code_suggestions': [{'relevant_file': 'src/index.ts\n', 'label': 'best practice\n'}, {'relevant_file': 'src/index2.ts\n', 'label': 'enhancment'}]} + assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='label') == expected_output \ No newline at end of file