From 16b61eb4e88b91d5acd60fe05178ea71d5ec7440 Mon Sep 17 00:00:00 2001 From: koid Date: Wed, 20 Dec 2023 11:13:14 +0900 Subject: [PATCH 1/4] ignore header description in ai response --- pr_agent/algo/utils.py | 6 ++++++ tests/unittest/test_load_yaml.py | 12 ++++++++++++ 2 files changed, 18 insertions(+) diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index 0ec1397b..97b02b56 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -317,6 +317,12 @@ def _fix_key_value(key: str, value: str): def load_yaml(response_text: str) -> dict: + # remove everything before the first ```yaml + snipet_pattern = r'```(yaml)?[\s\S]*?```' + snipet = re.search(snipet_pattern, response_text) + if snipet: + response_text = snipet.group() + response_text = response_text.removeprefix('```yaml').rstrip('`') try: data = yaml.safe_load(response_text) diff --git a/tests/unittest/test_load_yaml.py b/tests/unittest/test_load_yaml.py index a77c847b..34beee35 100644 --- a/tests/unittest/test_load_yaml.py +++ b/tests/unittest/test_load_yaml.py @@ -15,6 +15,18 @@ class TestLoadYaml: expected_output = {'name': 'John Smith', 'age': 35} assert load_yaml(yaml_str) == expected_output + def test_load_valid_yaml_with_description(self): + yaml_str = '''\ +Here is the answer in YAML format: + +```yaml +name: John Smith +age: 35 +``` +''' + expected_output = {'name': 'John Smith', 'age': 35} + assert load_yaml(yaml_str) == expected_output + def test_load_invalid_yaml1(self): yaml_str = \ '''\ From e2797ad09ae6eb049c3ddf46c1393fb563f152e0 Mon Sep 17 00:00:00 2001 From: koid Date: Thu, 21 Dec 2023 10:48:33 +0900 Subject: [PATCH 2/4] re-implemented YAML extraction as a fallback --- pr_agent/algo/utils.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index 97b02b56..981a0068 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -317,12 +317,6 @@ def _fix_key_value(key: str, value: str): def load_yaml(response_text: str) -> dict: - # remove everything before the first ```yaml - snipet_pattern = r'```(yaml)?[\s\S]*?```' - snipet = re.search(snipet_pattern, response_text) - if snipet: - response_text = snipet.group() - response_text = response_text.removeprefix('```yaml').rstrip('`') try: data = yaml.safe_load(response_text) @@ -349,7 +343,19 @@ def try_fix_yaml(response_text: str) -> dict: except: get_logger().info(f"Failed to parse AI prediction after adding |-\n") - # second fallback - try to remove last lines + # second fallback - try to extract only range from first ```yaml to ```` + snippet_pattern = r'```(yaml)?[\s\S]*?```' + snippet = re.search(snippet_pattern, '\n'.join(response_text_lines_copy)) + if snippet: + snippet_text = snippet.group() + try: + data = yaml.safe_load(snippet_text.removeprefix('```yaml').rstrip('`')) + get_logger().info(f"Successfully parsed AI prediction after extracting yaml snippet") + return data + except: + pass + + # thrid fallback - try to remove last lines data = {} for i in range(1, len(response_text_lines)): response_text_lines_tmp = '\n'.join(response_text_lines[:-i]) @@ -360,7 +366,7 @@ def try_fix_yaml(response_text: str) -> dict: except: pass - # thrid fallback - try to remove leading and trailing curly brackets + # fourth fallback - try to remove leading and trailing curly brackets response_text_copy = response_text.strip().rstrip().removeprefix('{').removesuffix('}') try: data = yaml.safe_load(response_text_copy,) From f99862088efa03a5b4751ffca571e3dbf98844cb Mon Sep 17 00:00:00 2001 From: koid Date: Thu, 21 Dec 2023 11:09:25 +0900 Subject: [PATCH 3/4] re-implemented test case --- tests/unittest/test_load_yaml.py | 12 ------------ .../{try_fix_yaml.py => test_try_fix_yaml.py} | 15 ++++++++++++++- 2 files changed, 14 insertions(+), 13 deletions(-) rename tests/unittest/{try_fix_yaml.py => test_try_fix_yaml.py} (75%) diff --git a/tests/unittest/test_load_yaml.py b/tests/unittest/test_load_yaml.py index 34beee35..a77c847b 100644 --- a/tests/unittest/test_load_yaml.py +++ b/tests/unittest/test_load_yaml.py @@ -15,18 +15,6 @@ class TestLoadYaml: expected_output = {'name': 'John Smith', 'age': 35} assert load_yaml(yaml_str) == expected_output - def test_load_valid_yaml_with_description(self): - yaml_str = '''\ -Here is the answer in YAML format: - -```yaml -name: John Smith -age: 35 -``` -''' - expected_output = {'name': 'John Smith', 'age': 35} - assert load_yaml(yaml_str) == expected_output - def test_load_invalid_yaml1(self): yaml_str = \ '''\ diff --git a/tests/unittest/try_fix_yaml.py b/tests/unittest/test_try_fix_yaml.py similarity index 75% rename from tests/unittest/try_fix_yaml.py rename to tests/unittest/test_try_fix_yaml.py index fe37bb1c..21ba9211 100644 --- a/tests/unittest/try_fix_yaml.py +++ b/tests/unittest/test_try_fix_yaml.py @@ -19,6 +19,19 @@ class TestTryFixYaml: expected_output = {"relevant line": "value: 3"} assert try_fix_yaml(review_text) == expected_output + # The function extracts YAML snippet + def test_extract_snippet(self): + review_text = '''\ +Here is the answer in YAML format: + +```yaml +name: John Smith +age: 35 +``` +''' + expected_output = {'name': 'John Smith', 'age': 35} + assert try_fix_yaml(review_text) == expected_output + # The function removes the last line(s) of the YAML string and successfully parses the YAML string. def test_remove_last_line(self): review_text = "key: value\nextra invalid line\n" @@ -28,4 +41,4 @@ class TestTryFixYaml: # The YAML string is empty. def test_empty_yaml_fixed(self): review_text = "" - assert try_fix_yaml(review_text) is None \ No newline at end of file + assert try_fix_yaml(review_text) is None From a9d789978b54a401a3f29770a41507248f11578f Mon Sep 17 00:00:00 2001 From: koid Date: Thu, 21 Dec 2023 11:11:46 +0900 Subject: [PATCH 4/4] fix: remove last line --- pr_agent/algo/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index 981a0068..8d008e92 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -362,7 +362,7 @@ def try_fix_yaml(response_text: str) -> dict: try: data = yaml.safe_load(response_text_lines_tmp,) get_logger().info(f"Successfully parsed AI prediction after removing {i} lines") - break + return data except: pass