From 4b8420aa16871c52e0a205e1ae9b693da37c4ead Mon Sep 17 00:00:00 2001 From: mrT23 Date: Thu, 13 Jul 2023 08:10:36 +0300 Subject: [PATCH 1/2] remove suggestion number --- pr_agent/algo/utils.py | 5 +---- pr_agent/settings/pr_reviewer_prompts.toml | 5 ----- tests/unit/test_convert_to_markdown.py | 2 -- tests/unit/test_fix_output.py | 12 ++++-------- tests/unit/test_parse_code_suggestion.py | 10 ---------- 5 files changed, 5 insertions(+), 29 deletions(-) diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index 0102c7b7..67c6f52b 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -53,10 +53,7 @@ def parse_code_suggestion(code_suggestions: dict) -> str: code_str_indented = textwrap.indent(code_str, ' ') markdown_text += f" - **{code_key}:**\n{code_str_indented}\n" else: - if "suggestion number" in sub_key.lower(): - # markdown_text += f"- **suggestion {sub_value}:**\n" # prettier formatting - pass - elif "relevant file" in sub_key.lower(): + if "relevant file" in sub_key.lower(): markdown_text += f"\n - **{sub_key}:** {sub_value}\n" else: markdown_text += f" **{sub_key}:** {sub_value}\n" diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index 678c7520..62d91a38 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -47,10 +47,6 @@ You must use the following JSON schema to format your answer: "maxItems": {{ num_code_suggestions }}, "uniqueItems": true, "items": { - "suggestion number": { - "type": "int", - "description": "suggestion number, starting from 1" - }, "relevant file": { "type": "string", "description": "the relevant file name" @@ -115,7 +111,6 @@ Example output: "General PR suggestions": "..., `xxx`...", "Code suggestions": [ { - "suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]", {%- if extended_code_suggestions %} diff --git a/tests/unit/test_convert_to_markdown.py b/tests/unit/test_convert_to_markdown.py index bfd9c9b5..a40574ae 100644 --- a/tests/unit/test_convert_to_markdown.py +++ b/tests/unit/test_convert_to_markdown.py @@ -54,14 +54,12 @@ class TestConvertToMarkdown: 'General PR suggestions': 'general suggestion...', 'Code suggestions': [ { - 'Suggestion number': 1, 'Code example': { 'Before': 'Code before', 'After': 'Code after' } }, { - 'Suggestion number': 2, 'Code example': { 'Before': 'Code before 2', 'After': 'Code after 2' diff --git a/tests/unit/test_fix_output.py b/tests/unit/test_fix_output.py index 8141b74e..af0794ad 100644 --- a/tests/unit/test_fix_output.py +++ b/tests/unit/test_fix_output.py @@ -7,7 +7,7 @@ import pytest class TestTryFixJson: # Tests that JSON with complete 'Code suggestions' section returns expected output def test_incomplete_code_suggestions(self): - review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...' + review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...' expected_output = { 'PR Analysis': { 'Main theme': 'xxx', @@ -18,7 +18,6 @@ class TestTryFixJson: 'General PR suggestions': '..., `xxx`...', 'Code suggestions': [ { - 'suggestion number': 1, 'relevant file': 'xxx.py', 'suggestion content': 'xxx [important]' } @@ -28,7 +27,7 @@ class TestTryFixJson: assert try_fix_json(review) == expected_output def test_incomplete_code_suggestions_new_line(self): - review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n\t, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...' + review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n\t, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...' expected_output = { 'PR Analysis': { 'Main theme': 'xxx', @@ -39,7 +38,6 @@ class TestTryFixJson: 'General PR suggestions': '..., `xxx`...', 'Code suggestions': [ { - 'suggestion number': 1, 'relevant file': 'xxx.py', 'suggestion content': 'xxx [important]' } @@ -49,7 +47,7 @@ class TestTryFixJson: assert try_fix_json(review) == expected_output def test_incomplete_code_suggestions_many_close_brackets(self): - review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy }, [}\n ,incomp.} ,..' + review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy }, [}\n ,incomp.} ,..' expected_output = { 'PR Analysis': { 'Main theme': 'xxx', @@ -60,7 +58,6 @@ class TestTryFixJson: 'General PR suggestions': '..., `xxx`...', 'Code suggestions': [ { - 'suggestion number': 1, 'relevant file': 'xxx.py', 'suggestion content': 'xxx [important]' } @@ -70,7 +67,7 @@ class TestTryFixJson: assert try_fix_json(review) == expected_output def test_incomplete_code_suggestions_relevant_file(self): - review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.p' + review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.p' expected_output = { 'PR Analysis': { 'Main theme': 'xxx', @@ -81,7 +78,6 @@ class TestTryFixJson: 'General PR suggestions': '..., `xxx`...', 'Code suggestions': [ { - 'suggestion number': 1, 'relevant file': 'xxx.py', 'suggestion content': 'xxx [important]' } diff --git a/tests/unit/test_parse_code_suggestion.py b/tests/unit/test_parse_code_suggestion.py index 87e3cac8..aaa03f72 100644 --- a/tests/unit/test_parse_code_suggestion.py +++ b/tests/unit/test_parse_code_suggestion.py @@ -41,14 +41,6 @@ class TestParseCodeSuggestion: expected_output = "\n" # modified to expect a newline character assert parse_code_suggestion(input_data) == expected_output - # Tests that function returns correct output when 'suggestion number' key has a non-integer value - def test_non_integer_suggestion_number(self): - input_data = { - "Suggestion number": "one", - "Description": "This is a suggestion" - } - expected_output = " **Description:** This is a suggestion\n\n" - assert parse_code_suggestion(input_data) == expected_output # Tests that function returns correct output when 'before' or 'after' key has a non-string value def test_non_string_before_or_after(self): @@ -64,7 +56,6 @@ class TestParseCodeSuggestion: # Tests that function returns correct output when input dictionary does not have 'code example' key def test_no_code_example_key(self): code_suggestions = { - 'suggestion number': 1, 'suggestion': 'Suggestion 1', 'description': 'Description 1', 'before': 'Before 1', @@ -76,7 +67,6 @@ class TestParseCodeSuggestion: # Tests that function returns correct output when input dictionary has 'code example' key def test_with_code_example_key(self): code_suggestions = { - 'suggestion number': 2, 'suggestion': 'Suggestion 2', 'description': 'Description 2', 'code example': { From 77a451ada005eb21b1256234d75b38ebeae18d35 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Thu, 13 Jul 2023 09:44:33 +0300 Subject: [PATCH 2/2] inline_code_comments --- README.md | 39 -------------------- pr_agent/algo/pr_processing.py | 4 +-- pr_agent/git_providers/github_provider.py | 2 ++ pr_agent/settings/configuration.toml | 4 +-- pr_agent/settings/pr_reviewer_prompts.toml | 39 +++----------------- pr_agent/tools/pr_reviewer.py | 42 ++++++++++++++++++++-- 6 files changed, 51 insertions(+), 79 deletions(-) diff --git a/README.md b/README.md index 364c722f..a2afa596 100644 --- a/README.md +++ b/README.md @@ -250,45 +250,6 @@ require_tests_review=true require_security_review=true ``` -#### Code Suggestions configuration: - -There are also configuration options to control different aspects of the `code suggestions` feature. -The number of suggestions provided can be controlled by adjusting the following parameter: - -``` -num_code_suggestions=4 -``` - -You can also enable more verbose and informative mode of code suggestions: - -``` -extended_code_suggestions=false -``` - -This is a comparison of the regular and extended code suggestions modes: - -- **relevant file:** sql.py -- **suggestion content:** Remove hardcoded sensitive information like username and password. Use environment variables or a secure method to store these values. [important] - -Example for extended suggestion: - -- **relevant file:** sql.py -- **suggestion content:** Remove hardcoded sensitive information (username and password) [important] -- **why:** Hardcoding sensitive information is a security risk. It's better to use environment variables or a secure way to store these values. -- **code example:** - - **before code:** - ``` - user = "root", - password = "Mysql@123", - ``` - - **after code:** - ``` - user = os.getenv('DB_USER'), - password = os.getenv('DB_PASSWORD'), - ``` - ---- - ## How it works ![PR-Agent Tools](./pics/pr_agent_overview.png) diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index 0853ce9c..63fe0caa 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -24,10 +24,10 @@ def get_pr_diff(git_provider: Union[GithubProvider, Any], token_handler: TokenHa Returns a string with the diff of the PR. If needed, apply diff minimization techniques to reduce the number of tokens """ - files = list(git_provider.get_diff_files()) + git_provider.pr.files = list(git_provider.get_diff_files()) # get pr languages - pr_languages = sort_files_by_main_languages(git_provider.get_languages(), files) + pr_languages = sort_files_by_main_languages(git_provider.get_languages(), git_provider.pr.files) # generate a standard diff string, with patch extension patches_extended, total_tokens = pr_generate_extended_diff(pr_languages, token_handler) diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index ecc624c7..49b0ccbe 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -26,6 +26,8 @@ class GithubProvider: self.pr = self._get_pr() def get_files(self): + if hasattr(self.pr, 'files'): + return self.pr.files return self.pr.get_files() def get_diff_files(self) -> list[FilePatchInfo]: diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index aa476966..09896973 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -2,14 +2,14 @@ model="gpt-4-0613" git_provider="github" publish_review=true -verbosity_level=0 # 0,1,2 +verbosity_level=0 # 0,1,2 [pr_reviewer] require_focused_review=true require_tests_review=true require_security_review=true -extended_code_suggestions=false num_code_suggestions=4 +inline_code_comments = true [pr_questions] diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index 62d91a38..36b4c8cd 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -3,9 +3,6 @@ system="""You are CodiumAI-PR-Reviewer, a language model designed to review git Your task is to provide constructive and concise feedback for the PR, and also provide meaningfull code suggestions to improve the new PR code (the '+' lines). - Provide up to {{ num_code_suggestions }} code suggestions. - Try to focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningfull code improvements, like performance, vulnerability, modularity, and best practices. -{%- if extended_code_suggestions %} -- For each suggestion, provide a short and concise code snippet to illustrate the existing code, and the improved code. -{%- endif %} - Make sure not to provide suggestion repeating modifications already implemented in the new PR code (the '+' lines). You must use the following JSON schema to format your answer: @@ -49,35 +46,16 @@ You must use the following JSON schema to format your answer: "items": { "relevant file": { "type": "string", - "description": "the relevant file name" + "description": "the relevant file full path" }, "suggestion content": { "type": "string", -{%- if extended_code_suggestions %} - "description": "a concrete suggestion for meaningfully improving the new PR code. Don't repeat previous suggestions. Add tags with importance measure that matches each suggestion ('important' or 'medium'). Do not make suggestions for updating or adding docstrings, renaming PR title and description, or linter like. -{%- else %} "description": "a concrete suggestion for meaningfully improving the new PR code. Also describe how, specifically, the suggestion can be applied to new PR code. Add tags with importance measure that matches each suggestion ('important' or 'medium'). Do not make suggestions for updating or adding docstrings, renaming PR title and description, or linter like. -{%- endif %} }, -{%- if extended_code_suggestions %} - "why": { + "relevant line in file": { "type": "string", - "description": "shortly explain why this suggestion is important" - }, - "code example": { - "type": "object", - "properties": { - "before code": { - "type": "string", - "description": "Short and concise code snippet, to illustrate the existing code" - }, - "after code": { - "type": "string", - "description": "Short and concise code snippet, to illustrate the improved code" - } - } + "description": "an authentic single code line from the PR git diff section, to which the suggestion applies." } -{%- endif %} } }, {%- if require_security %} @@ -111,16 +89,9 @@ Example output: "General PR suggestions": "..., `xxx`...", "Code suggestions": [ { - "relevant file": "xxx.py", + "relevant file": "directory/xxx.py", "suggestion content": "xxx [important]", -{%- if extended_code_suggestions %} - "why": "xxx", - "code example": - { - "before code": "xxx", - "after code": "xxx" - } -{%- endif %} + "relevant line in file": "xxx", }, ... ] diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index 21104848..23af99d5 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -33,7 +33,6 @@ class PRReviewer: "require_tests": settings.pr_reviewer.require_tests_review, "require_security": settings.pr_reviewer.require_security_review, "require_focused": settings.pr_reviewer.require_focused_review, - 'extended_code_suggestions': settings.pr_reviewer.extended_code_suggestions, 'num_code_suggestions': settings.pr_reviewer.num_code_suggestions, } self.token_handler = TokenHandler(self.git_provider.pr, @@ -55,6 +54,9 @@ class PRReviewer: logging.info('Pushing PR review...') self.git_provider.publish_comment(pr_comment) self.git_provider.remove_initial_comment() + if settings.pr_reviewer.inline_code_comments: + logging.info('Pushing inline code comments...') + self._publish_inline_code_comments() return "" async def _get_prediction(self): @@ -86,6 +88,9 @@ class PRReviewer: del data['PR Feedback']['Security concerns'] data['PR Analysis']['Security concerns'] = val + if settings.config.git_provider == 'github' and settings.pr_reviewer.inline_code_comments: + del data['PR Feedback']['Code suggestions'] + markdown_text = convert_to_markdown(data) user = self.git_provider.get_user_id() @@ -103,4 +108,37 @@ class PRReviewer: if settings.config.verbosity_level >= 2: logging.info(f"Markdown response:\n{markdown_text}") - return markdown_text \ No newline at end of file + return markdown_text + + def _publish_inline_code_comments(self): + if settings.config.git_provider != 'github': # inline comments are currently only supported for github + return + + review = self.prediction.strip() + try: + data = json.loads(review) + except json.decoder.JSONDecodeError: + data = try_fix_json(review) + + pr = self.git_provider.pr + last_commit_id = list(pr.get_commits())[-1] + files = list(self.git_provider.get_diff_files()) + + for d in data['PR Feedback']['Code suggestions']: + relevant_file = d['relevant file'].strip() + relevant_line_in_file = d['relevant line in file'].strip() + content = d['suggestion content'] + position = -1 + for file in files: + if file.filename.strip() == relevant_file: + patch = file.patch + patch_lines = patch.splitlines() + for i, line in enumerate(patch_lines): + if relevant_line_in_file in line: + position = i + if position == -1: + logging.info(f"Could not find position for {relevant_file} {relevant_line_in_file}") + else: + body = content + path = relevant_file.strip() + pr.create_review_comment(body=body, commit_id=last_commit_id, path=path, position=position) \ No newline at end of file