From e589dcb4894a9f058a098d656c6667279d4dbdfc Mon Sep 17 00:00:00 2001 From: mrT23 Date: Fri, 1 Mar 2024 13:02:50 +0200 Subject: [PATCH 1/5] Enhance markdown formatting and update prompt descriptions in pr_reviewer_prompts.toml --- pr_agent/algo/utils.py | 22 ++++++++++++++++++++-- pr_agent/settings/pr_reviewer_prompts.toml | 4 ++-- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index ef2e5712..61bde6f5 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -28,6 +28,20 @@ def get_setting(key: str) -> Any: except Exception: return global_settings.get(key, None) +def emphasize_header(text: str) -> str: + # Finding the position of the first occurrence of ": " + colon_position = text.find(": ") + + # Splitting the string and wrapping the first part in tags + if colon_position != -1: + # Everything before the colon (inclusive) is wrapped in tags + transformed_string = "" + text[:colon_position + 1] + "" + text[colon_position + 1:] + else: + # If there's no ": ", return the original string + transformed_string = text + + return transformed_string + def convert_to_markdown(output_data: dict, gfm_supported: bool=True) -> str: """ Convert a dictionary of data into markdown format. @@ -64,7 +78,10 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool=True) -> str: if gfm_supported: if 'Estimated effort to review' in key_nice: key_nice = 'Estimated effort to review [1-5]' - if 'possible issues' in key_nice.lower(): + if 'security concerns' in key_nice.lower(): + value = emphasize_header(value.strip()) + markdown_text += f" {emoji} {key_nice}\n\n{value}\n\n\n" + elif 'possible issues' in key_nice.lower(): value = value.strip() issues = value.split('\n- ') number_of_issues = len(issues) @@ -72,12 +89,13 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool=True) -> str: markdown_text += f" {emoji} {key_nice}\n" for i, issue in enumerate(issues): issue = issue.strip('-').strip() + issue = emphasize_header(issue.strip()) if i == 0: markdown_text += f"\n\n{issue}\n" else: markdown_text += f"\n\n\n{issue}\n" else: - value = value.strip('-').strip() + value = emphasize_header(value.strip('-').strip()) markdown_text += f" {emoji} {key_nice}\n\n{value}\n\n\n" else: markdown_text += f" {emoji} {key_nice}\n\n{value}\n\n\n" diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index 64ed4af6..fdc1777f 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -65,8 +65,8 @@ class Review(BaseModel) {%- if require_focused %} focused_pr: str = Field(description="Is this a focused PR, in the sense that all the PR code diff changes are united under a single focused theme ? If the theme is too broad, or the PR code diff changes are too scattered, then the PR is not focused. Explain your answer shortly.") {%- endif %} - possible_issues: str = Field(description="Does this PR code introduce clear issues, bugs, or performance concerns? If there are no apparent issues, respond with 'No'. If there are any issues, describe them briefly. Use bullet points if more than one issue. Be specific, and provide examples if possible.") - security_concerns: str = Field(description="does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' if there are no possible issues. Answer 'Yes, because ...' if there are security concerns or issues. Explain your answer. Be specific and give examples if possible") + possible_issues: str = Field(description="Does this PR code introduce clear issues, bugs, or performance concerns? If there are no apparent issues, respond with 'No'. If there are any issues, describe them briefly. Use bullet points if more than one issue. Be specific, and provide examples if possible. Start each bullet point with a short specific header, such as: "- Performance issue: ...", "- Bug: ...", etc.") + security_concerns: str = Field(description="does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible") {%- if num_code_suggestions > 0 %} class CodeSuggestion(BaseModel) From 7d081aa1d10a3e0b9304fdbd9f502941194fbab4 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Fri, 1 Mar 2024 13:09:47 +0200 Subject: [PATCH 2/5] fix tests --- tests/unittest/test_convert_to_markdown.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unittest/test_convert_to_markdown.py b/tests/unittest/test_convert_to_markdown.py index 5337ac6b..9b071a74 100644 --- a/tests/unittest/test_convert_to_markdown.py +++ b/tests/unittest/test_convert_to_markdown.py @@ -52,7 +52,7 @@ class TestConvertToMarkdown: 'suggestion': "Consider raising an exception or logging a warning when 'pr_url' attribute is not found. This can help in debugging issues related to the absence of 'pr_url' in instances where it's expected. [important]\n", 'relevant_line': '[return ""](https://github.com/Codium-ai/pr-agent-pro/pull/102/files#diff-52d45f12b836f77ed1aef86e972e65404634ea4e2a6083fb71a9b0f9bb9e062fR199)'}]} - expected_output = '## PR Review\n\n\n\n\n\n\n\n
โฑ๏ธ Estimated effort to review [1-5]\n\n1, because the changes are minimal and straightforward, focusing on a single functionality addition.\n\n\n
๐Ÿงช Relevant tests\n\nNo\n\n\n
๐Ÿ” Possible issues\n\nNo\n\n
๐Ÿ”’ Security concerns\n\nNo\n\n\n
\n\n\n
Code feedback:\n\n
relevant filepr_agent/git_providers/git_provider.py\n
suggestion      \n\n\n\nConsider raising an exception or logging a warning when \'pr_url\' attribute is not found. This can help in debugging issues related to the absence of \'pr_url\' in instances where it\'s expected. [important]\n\n\n
relevant linereturn ""

\n\n
' + expected_output = '## PR Review\n\n\n\n\n\n\n\n
โฑ๏ธ Estimated effort to review [1-5]\n\n1, because the changes are minimal and straightforward, focusing on a single functionality addition.\n\n\n
๐Ÿงช Relevant tests\n\nNo\n\n\n
๐Ÿ” Possible issues\n\nNo\n\n
๐Ÿ”’ Security concerns\n\nNo\n\n
\n\n\n
Code feedback:\n\n
relevant filepr_agent/git_providers/git_provider.py\n
suggestion      \n\n\n\nConsider raising an exception or logging a warning when \'pr_url\' attribute is not found. This can help in debugging issues related to the absence of \'pr_url\' in instances where it\'s expected. [important]\n\n\n
relevant linereturn ""

\n\n
' assert convert_to_markdown(input_data).strip() == expected_output.strip() From 35315c070f59970d1e52b5b418818efec8fd8716 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Fri, 1 Mar 2024 13:18:53 +0200 Subject: [PATCH 3/5] major --- pr_agent/settings/pr_reviewer_prompts.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index fdc1777f..0f432b7a 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -65,7 +65,7 @@ class Review(BaseModel) {%- if require_focused %} focused_pr: str = Field(description="Is this a focused PR, in the sense that all the PR code diff changes are united under a single focused theme ? If the theme is too broad, or the PR code diff changes are too scattered, then the PR is not focused. Explain your answer shortly.") {%- endif %} - possible_issues: str = Field(description="Does this PR code introduce clear issues, bugs, or performance concerns? If there are no apparent issues, respond with 'No'. If there are any issues, describe them briefly. Use bullet points if more than one issue. Be specific, and provide examples if possible. Start each bullet point with a short specific header, such as: "- Performance issue: ...", "- Bug: ...", etc.") + possible_issues: str = Field(description="Does this PR code introduce clear issues, bugs, or major performance concerns? If there are no apparent issues, respond with 'No'. If there are any issues, describe them briefly. Use bullet points if more than one issue. Be specific, and provide examples if possible. Start each bullet point with a short specific header, such as: "- Performance issue: ...", "- Bug: ...", etc.") security_concerns: str = Field(description="does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible") {%- if num_code_suggestions > 0 %} From b3fd05c465c4e933d07d41e40fc74164b69af901 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 3 Mar 2024 13:58:10 +0200 Subject: [PATCH 4/5] try-except --- pr_agent/algo/utils.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index 61bde6f5..8edf74d2 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -28,19 +28,24 @@ def get_setting(key: str) -> Any: except Exception: return global_settings.get(key, None) + def emphasize_header(text: str) -> str: - # Finding the position of the first occurrence of ": " - colon_position = text.find(": ") + try: + # Finding the position of the first occurrence of ": " + colon_position = text.find(": ") - # Splitting the string and wrapping the first part in tags - if colon_position != -1: - # Everything before the colon (inclusive) is wrapped in tags - transformed_string = "" + text[:colon_position + 1] + "" + text[colon_position + 1:] - else: - # If there's no ": ", return the original string - transformed_string = text + # Splitting the string and wrapping the first part in tags + if colon_position != -1: + # Everything before the colon (inclusive) is wrapped in tags + transformed_string = "" + text[:colon_position + 1] + "" + text[colon_position + 1:] + else: + # If there's no ": ", return the original string + transformed_string = text - return transformed_string + return transformed_string + except Exception as e: + get_logger().exception(f"Failed to emphasize header: {e}") + return text def convert_to_markdown(output_data: dict, gfm_supported: bool=True) -> str: """ From c372c71514b18349a83c5f820e78fbf76fb5c1a1 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 3 Mar 2024 14:04:09 +0200 Subject: [PATCH 5/5] prompt --- pr_agent/settings/pr_reviewer_prompts.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index 0f432b7a..aa05fff8 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -65,7 +65,7 @@ class Review(BaseModel) {%- if require_focused %} focused_pr: str = Field(description="Is this a focused PR, in the sense that all the PR code diff changes are united under a single focused theme ? If the theme is too broad, or the PR code diff changes are too scattered, then the PR is not focused. Explain your answer shortly.") {%- endif %} - possible_issues: str = Field(description="Does this PR code introduce clear issues, bugs, or major performance concerns? If there are no apparent issues, respond with 'No'. If there are any issues, describe them briefly. Use bullet points if more than one issue. Be specific, and provide examples if possible. Start each bullet point with a short specific header, such as: "- Performance issue: ...", "- Bug: ...", etc.") + possible_issues: str = Field(description="Does this PR code introduce clear issues, bugs, or major performance concerns? If there are no apparent issues, respond with 'No'. If there are any issues, describe them briefly. Use bullet points if more than one issue. Be specific, and provide examples if possible. Start each bullet point with a short specific header, such as: "- Possible Bug: ...", etc.") security_concerns: str = Field(description="does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible") {%- if num_code_suggestions > 0 %}