From 9b1eb86d7599a5e8a257b3682f44335eec74a8e6 Mon Sep 17 00:00:00 2001 From: "Hussam.lawen" Date: Mon, 15 Jan 2024 15:10:54 +0200 Subject: [PATCH 1/6] first iteration of improved UI for /improve --extended --- pr_agent/tools/pr_code_suggestions.py | 175 ++++++++++++++++++-------- pr_agent/tools/pr_description.py | 38 +++--- 2 files changed, 141 insertions(+), 72 deletions(-) diff --git a/pr_agent/tools/pr_code_suggestions.py b/pr_agent/tools/pr_code_suggestions.py index b4f7a973..5b29a01a 100644 --- a/pr_agent/tools/pr_code_suggestions.py +++ b/pr_agent/tools/pr_code_suggestions.py @@ -13,6 +13,7 @@ from pr_agent.config_loader import get_settings from pr_agent.git_providers import get_git_provider from pr_agent.git_providers.git_provider import get_main_pr_language from pr_agent.log import get_logger +from pr_agent.tools.pr_description import insert_br_after_x_chars class PRCodeSuggestions: @@ -55,37 +56,39 @@ class PRCodeSuggestions: get_settings().pr_code_suggestions_prompt.user) async def run(self): - try: - get_logger().info('Generating code suggestions for PR...') - if get_settings().config.publish_output: - self.git_provider.publish_comment("Preparing suggestions...", is_temporary=True) - - get_logger().info('Preparing PR code suggestions...') - if not self.is_extended: - await retry_with_fallback_models(self._prepare_prediction) - data = self._prepare_pr_code_suggestions() - else: - data = await retry_with_fallback_models(self._prepare_prediction_extended) - if (not data) or (not 'code_suggestions' in data): - get_logger().info('No code suggestions found for PR.') - return - - if (not self.is_extended and get_settings().pr_code_suggestions.rank_suggestions) or \ - (self.is_extended and get_settings().pr_code_suggestions.rank_extended_suggestions): - get_logger().info('Ranking Suggestions...') - data['code_suggestions'] = await self.rank_suggestions(data['code_suggestions']) - - if get_settings().config.publish_output: - get_logger().info('Pushing PR code suggestions...') - self.git_provider.remove_initial_comment() - if get_settings().pr_code_suggestions.summarize: - get_logger().info('Pushing summarize code suggestions...') - self.publish_summarizes_suggestions(data) - else: - get_logger().info('Pushing inline code suggestions...') - self.push_inline_code_suggestions(data) - except Exception as e: - get_logger().error(f"Failed to generate code suggestions for PR, error: {e}") + data = {'code_suggestions': [{'relevant_file': 'pr_agent/tools/pr_update_changelog.py', 'suggestion_content': 'The `get_git_provider()` function is called twice in the `__init__` method of `PRUpdateChangelog` class. It would be more efficient to call this function once and store the result in a variable, then use this variable in the conditional statements.\n', 'existing_code': 'if isinstance(self.git_provider, GithubProvider):\n self.git_provider = get_git_provider()(pr_url)\nelif isinstance(self.git_provider, GitLabProvider):\n self.git_provider = get_git_provider()(pr_url, incremental=True)\n', 'improved_code': 'git_provider = get_git_provider()\nif isinstance(self.git_provider, GithubProvider):\n self.git_provider = git_provider(pr_url)\nelif isinstance(self.git_provider, GitLabProvider):\n self.git_provider = git_provider(pr_url, incremental=True)\n', 'relevant_lines_start': 22, 'relevant_lines_end': 25, 'label': 'performance'}, {'relevant_file': 'pr_agent/tools/pr_update_changelog.py', 'suggestion_content': 'The `run` method of `PRUpdateChangelog` class has duplicate code for `GithubProvider` and `GitLabProvider`. This can be refactored to a separate method that accepts a `git_provider` as an argument, reducing code duplication.\n', 'existing_code': 'if isinstance(self.git_provider, GithubProvider):\n if get_settings().config.publish_output:\n self.git_provider.publish_comment("Preparing changelog updates...", is_temporary=True)\nelif isinstance(self.git_provider, GitLabProvider):\n # Add code for preparing changelog updates for GitLabProvider\n pass\n', 'improved_code': 'def prepare_changelog_updates(self, git_provider):\n if get_settings().config.publish_output:\n git_provider.publish_comment("Preparing changelog updates...", is_temporary=True)\n\nif isinstance(self.git_provider, GithubProvider):\n self.prepare_changelog_updates(self.git_provider)\nelif isinstance(self.git_provider, GitLabProvider):\n # Add code for preparing changelog updates for GitLabProvider\n pass\n', 'relevant_lines_start': 55, 'relevant_lines_end': 60, 'label': 'maintainability'}, {'relevant_file': 'pr_agent/tools/pr_update_changelog.py', 'suggestion_content': 'The `assert` statement in the `run` method of `PRUpdateChangelog` class can be replaced with a more descriptive exception. This would provide a more meaningful error message if the condition is not met.\n', 'existing_code': 'assert type(self.git_provider) in [GithubProvider, GitLabProvider], "Currently only Github and GitLab are supported"\n', 'improved_code': 'if type(self.git_provider) not in [GithubProvider, GitLabProvider]:\n raise ValueError("Unsupported git provider. Currently only Github and GitLab are supported.")\n', 'relevant_lines_start': 52, 'relevant_lines_end': 52, 'label': 'enhancement'}, {'relevant_file': 'pr_agent/tools/pr_update_changelog.py', 'suggestion_content': 'The `CHANGELOG_LINES` constant is defined at the global scope but is only used within the `PRUpdateChangelog` class. It would be more appropriate to define it as a class attribute.\n', 'existing_code': 'CHANGELOG_LINES = 25\n', 'improved_code': 'class PRUpdateChangelog:\n CHANGELOG_LINES = 25\n ...\n', 'relevant_lines_start': 16, 'relevant_lines_end': 16, 'label': 'best practice'}, {'relevant_file': '.github/ISSUE_TEMPLATE/sweep-bugfix.yml', 'suggestion_content': 'The `id` attribute for the second textarea is `description2`, which is not very descriptive. Consider using a more meaningful `id`.\n', 'existing_code': 'id: description2\n', 'improved_code': 'id: additional_details\n', 'relevant_lines_start': 13, 'relevant_lines_end': 13, 'label': 'enhancement'}, {'relevant_file': '.github/ISSUE_TEMPLATE/sweep-feature.yml', 'suggestion_content': 'The `description` attribute for the textarea is "More details for Sweep", which is not very descriptive. Consider providing a more specific description.\n', 'existing_code': 'description: More details for Sweep\n', 'improved_code': 'description: Please provide more details about the feature you want to test.\n', 'relevant_lines_start': 10, 'relevant_lines_end': 10, 'label': 'enhancement'}, {'relevant_file': '.github/ISSUE_TEMPLATE/sweep-refactor.yml', 'suggestion_content': 'The `placeholder` attribute for the textarea is "We are migrating this function to ... version because ...", which is not very clear. Consider providing a more specific placeholder.\n', 'existing_code': 'placeholder: We are migrating this function to ... version because ...\n', 'improved_code': 'placeholder: We are migrating this function to version 2.0 because it provides better performance.\n', 'relevant_lines_start': 11, 'relevant_lines_end': 11, 'label': 'enhancement'}]} + self.publish_summarizes_suggestions(data) + # try: + # get_logger().info('Generating code suggestions for PR...') + # if get_settings().config.publish_output: + # self.git_provider.publish_comment("Preparing suggestions...", is_temporary=True) + # + # get_logger().info('Preparing PR code suggestions...') + # if not self.is_extended: + # await retry_with_fallback_models(self._prepare_prediction) + # data = self._prepare_pr_code_suggestions() + # else: + # data = await retry_with_fallback_models(self._prepare_prediction_extended) + # if (not data) or (not 'code_suggestions' in data): + # get_logger().info('No code suggestions found for PR.') + # return + # + # if (not self.is_extended and get_settings().pr_code_suggestions.rank_suggestions) or \ + # (self.is_extended and get_settings().pr_code_suggestions.rank_extended_suggestions): + # get_logger().info('Ranking Suggestions...') + # data['code_suggestions'] = await self.rank_suggestions(data['code_suggestions']) + # + # if get_settings().config.publish_output: + # get_logger().info('Pushing PR code suggestions...') + # self.git_provider.remove_initial_comment() + # if get_settings().pr_code_suggestions.summarize: + # get_logger().info('Pushing summarize code suggestions...') + # self.publish_summarizes_suggestions(data) + # else: + # get_logger().info('Pushing inline code suggestions...') + # self.push_inline_code_suggestions(data) + # except Exception as e: + # get_logger().error(f"Failed to generate code suggestions for PR, error: {e}") async def _prepare_prediction(self, model: str): get_logger().info('Getting PR diff...') @@ -299,7 +302,7 @@ class PRCodeSuggestions: def publish_summarizes_suggestions(self, data: Dict): try: - data_markdown = "## PR Code Suggestions\n\n" + pr_body = "## PR Code Suggestions\n\n" language_extension_map_org = get_settings().language_extension_map_org extension_to_language = {} @@ -307,30 +310,96 @@ class PRCodeSuggestions: for ext in extensions: extension_to_language[ext] = language - for s in data['code_suggestions']: - try: - extension_s = s['relevant_file'].rsplit('.')[-1] - code_snippet_link = self.git_provider.get_line_link(s['relevant_file'], s['relevant_lines_start'], - s['relevant_lines_end']) - label = s['label'].strip() - data_markdown += f"\n💡 [{label}]\n\n**{s['suggestion_content'].rstrip().rstrip()}**\n\n" - if code_snippet_link: - data_markdown += f" File: [{s['relevant_file']} ({s['relevant_lines_start']}-{s['relevant_lines_end']})]({code_snippet_link})\n\n" - else: - data_markdown += f"File: {s['relevant_file']} ({s['relevant_lines_start']}-{s['relevant_lines_end']})\n\n" - if self.git_provider.is_supported("gfm_markdown"): - data_markdown += "
Example code:\n\n" - data_markdown += f"___\n\n" + pr_body += "" + header = f"Suggestions" + delta = 65 + header += "  " * delta + pr_body += f"""""" + pr_body += """""" + suggestions_labels = dict() + # add all suggestions related to to each label + for suggestion in data['code_suggestions']: + label = suggestion['label'].strip().strip("'").strip('"') + if label not in suggestions_labels: + suggestions_labels[label] = [] + suggestions_labels[label].append(suggestion) + + for label, suggestions in suggestions_labels.items(): + pr_body += f"""""" + pr_body += f"""""" + pr_body += """
{header}
{label}""" + for suggestion in suggestions: + + relevant_file = suggestion['relevant_file'].strip() + relevant_lines_start = int(suggestion['relevant_lines_start']) + relevant_lines_end = int(suggestion['relevant_lines_end']) + code_snippet_link = self.git_provider.get_line_link(relevant_file, relevant_lines_start, + relevant_lines_end) + # add html table for each suggestion + + suggestion_content = suggestion['suggestion_content'].rstrip().rstrip() + suggestion_content = insert_br_after_x_chars(suggestion_content, 90) + # pr_body += f" + + + +""" + pr_body += """
{suggestion_content}" + existing_code = suggestion['existing_code'].rstrip() + improved_code = suggestion['improved_code'].rstrip() language_name = "python" + extension_s = suggestion['relevant_file'].rsplit('.')[-1] if extension_s and (extension_s in extension_to_language): language_name = extension_to_language[extension_s] - data_markdown += f"Existing code:\n```{language_name}\n{s['existing_code'].rstrip()}\n```\n" - data_markdown += f"Improved code:\n```{language_name}\n{s['improved_code'].rstrip()}\n```\n" + example_code = "" if self.git_provider.is_supported("gfm_markdown"): - data_markdown += "
\n" - data_markdown += "\n___\n\n" - except Exception as e: - get_logger().error(f"Could not parse suggestion: {s}, error: {e}") - self.git_provider.publish_comment(data_markdown) + example_code = "
Example code:\n\n" + example_code += f"___\n\n" + example_code += f"Existing code:\n```{language_name}\n{existing_code}\n```\n" + example_code += f"Improved code:\n```{language_name}\n{improved_code}\n```\n" + if self.git_provider.is_supported("gfm_markdown"): + example_code += "
\n" + pr_body += f""" +
+ + +**{suggestion_content}** + +[{relevant_file} [{relevant_lines_start}-{relevant_lines_end}]]({code_snippet_link}) + +{example_code} +
""" + # for s in data['code_suggestions']: + # try: + # extension_s = s['relevant_file'].rsplit('.')[-1] + # code_snippet_link = self.git_provider.get_line_link(s['relevant_file'], s['relevant_lines_start'], + # s['relevant_lines_end']) + # label = s['label'].strip() + # data_markdown += f"\n💡 [{label}]\n\n**{s['suggestion_content'].rstrip().rstrip()}**\n\n" + # if code_snippet_link: + # data_markdown += f" File: [{s['relevant_file']} ({s['relevant_lines_start']}-{s['relevant_lines_end']})]({code_snippet_link})\n\n" + # else: + # data_markdown += f"File: {s['relevant_file']} ({s['relevant_lines_start']}-{s['relevant_lines_end']})\n\n" + # if self.git_provider.is_supported("gfm_markdown"): + # data_markdown += "
Example code:\n\n" + # data_markdown += f"___\n\n" + # language_name = "python" + # if extension_s and (extension_s in extension_to_language): + # language_name = extension_to_language[extension_s] + # data_markdown += f"Existing code:\n```{language_name}\n{s['existing_code'].rstrip()}\n```\n" + # data_markdown += f"Improved code:\n```{language_name}\n{s['improved_code'].rstrip()}\n```\n" + # if self.git_provider.is_supported("gfm_markdown"): + # data_markdown += "
\n" + # data_markdown += "\n___\n\n" + # + # pr_body += f"""{label}{s['relevant_file']}""" + # # in the right side of the table, add two collapsable sections with the existing and improved code + # pr_body += f"""
Existing code
{s['existing_code'].rstrip()}
""" + # pr_body += f"""
Improved code
{s['improved_code'].rstrip()}
""" + # + # + # except Exception as e: + # get_logger().error(f"Could not parse suggestion: {s}, error: {e}") + self.git_provider.publish_comment(pr_body) except Exception as e: get_logger().info(f"Failed to publish summarized code suggestions, error: {e}") diff --git a/pr_agent/tools/pr_description.py b/pr_agent/tools/pr_description.py index 71906f0e..51df9ee4 100644 --- a/pr_agent/tools/pr_description.py +++ b/pr_agent/tools/pr_description.py @@ -399,7 +399,7 @@ class PRDescription: filename = filename.strip() link = self.git_provider.get_line_link(filename, relevant_line_start=-1) - file_change_description = self._insert_br_after_x_chars(file_change_description, x=(delta - 5)) + file_change_description = insert_br_after_x_chars(file_change_description, x=(delta - 5)) pr_body += f""" @@ -427,25 +427,25 @@ class PRDescription: pass return pr_body - def _insert_br_after_x_chars(self, text, x=70): - """ - Insert
into a string after a word that increases its length above x characters. - """ - if len(text) < x: - return text +def insert_br_after_x_chars(text, x=70): + """ + Insert
into a string after a word that increases its length above x characters. + """ + if len(text) < x: + return text - words = text.split(' ') - new_text = "" - current_length = 0 + words = text.split(' ') + new_text = "" + current_length = 0 - for word in words: - # Check if adding this word exceeds x characters - if current_length + len(word) > x: - new_text += "
" # Insert line break - current_length = 0 # Reset counter + for word in words: + # Check if adding this word exceeds x characters + if current_length + len(word) > x: + new_text += "
" # Insert line break + current_length = 0 # Reset counter - # Add the word to the new text - new_text += word + " " - current_length += len(word) + 1 # Add 1 for the space + # Add the word to the new text + new_text += word + " " + current_length += len(word) + 1 # Add 1 for the space - return new_text.strip() # Remove trailing space + return new_text.strip() # Remove trailing space From 5e17ccaf865012ae7d84d9d1a9cd26f944b17b11 Mon Sep 17 00:00:00 2001 From: "Hussam.lawen" Date: Mon, 15 Jan 2024 15:17:57 +0200 Subject: [PATCH 2/6] add colaplsable --- pr_agent/tools/pr_code_suggestions.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pr_agent/tools/pr_code_suggestions.py b/pr_agent/tools/pr_code_suggestions.py index 5b29a01a..e0029382 100644 --- a/pr_agent/tools/pr_code_suggestions.py +++ b/pr_agent/tools/pr_code_suggestions.py @@ -326,7 +326,9 @@ class PRCodeSuggestions: for label, suggestions in suggestions_labels.items(): pr_body += f"""{label}""" - pr_body += f"""""" + pr_body += f"""""" + + pr_body += """
""" + pr_body += f"""
{len(suggestions)} suggestions""" + pr_body += f"""""" for suggestion in suggestions: relevant_file = suggestion['relevant_file'].strip() @@ -367,7 +369,10 @@ class PRCodeSuggestions: """ - pr_body += """
""" + pr_body += "
" + pr_body += """""" pr_body += """""" # for s in data['code_suggestions']: # try: From afefc15b9c2377af138fff7e4ab776db0e06e479 Mon Sep 17 00:00:00 2001 From: "Hussam.lawen" Date: Mon, 15 Jan 2024 15:56:48 +0200 Subject: [PATCH 3/6] improve doce suggestions UI with difflib --- pr_agent/tools/pr_code_suggestions.py | 40 +++++++++++++++++++-------- 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/pr_agent/tools/pr_code_suggestions.py b/pr_agent/tools/pr_code_suggestions.py index e0029382..0d990184 100644 --- a/pr_agent/tools/pr_code_suggestions.py +++ b/pr_agent/tools/pr_code_suggestions.py @@ -14,7 +14,7 @@ from pr_agent.git_providers import get_git_provider from pr_agent.git_providers.git_provider import get_main_pr_language from pr_agent.log import get_logger from pr_agent.tools.pr_description import insert_br_after_x_chars - +import difflib class PRCodeSuggestions: def __init__(self, pr_url: str, cli_mode=False, args: list = None, @@ -312,7 +312,7 @@ class PRCodeSuggestions: pr_body += "" header = f"Suggestions" - delta = 65 + delta = 77 header += "  " * delta pr_body += f"""""" pr_body += """""" @@ -334,6 +334,11 @@ class PRCodeSuggestions: relevant_file = suggestion['relevant_file'].strip() relevant_lines_start = int(suggestion['relevant_lines_start']) relevant_lines_end = int(suggestion['relevant_lines_end']) + range_str = "" + if relevant_lines_start == relevant_lines_end: + range_str = f"[{relevant_lines_start}]" + else: + range_str = f"[{relevant_lines_start}-{relevant_lines_end}]" code_snippet_link = self.git_provider.get_line_link(relevant_file, relevant_lines_start, relevant_lines_end) # add html table for each suggestion @@ -341,20 +346,31 @@ class PRCodeSuggestions: suggestion_content = suggestion['suggestion_content'].rstrip().rstrip() suggestion_content = insert_br_after_x_chars(suggestion_content, 90) # pr_body += f" From ea39e8684f12bb17c4ea73ce22284eedc78656dd Mon Sep 17 00:00:00 2001 From: "Hussam.lawen" Date: Mon, 15 Jan 2024 16:42:50 +0200 Subject: [PATCH 4/6] works --- pr_agent/tools/pr_code_suggestions.py | 66 +++++++++++++-------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/pr_agent/tools/pr_code_suggestions.py b/pr_agent/tools/pr_code_suggestions.py index 0d990184..b3929df5 100644 --- a/pr_agent/tools/pr_code_suggestions.py +++ b/pr_agent/tools/pr_code_suggestions.py @@ -56,39 +56,39 @@ class PRCodeSuggestions: get_settings().pr_code_suggestions_prompt.user) async def run(self): - data = {'code_suggestions': [{'relevant_file': 'pr_agent/tools/pr_update_changelog.py', 'suggestion_content': 'The `get_git_provider()` function is called twice in the `__init__` method of `PRUpdateChangelog` class. It would be more efficient to call this function once and store the result in a variable, then use this variable in the conditional statements.\n', 'existing_code': 'if isinstance(self.git_provider, GithubProvider):\n self.git_provider = get_git_provider()(pr_url)\nelif isinstance(self.git_provider, GitLabProvider):\n self.git_provider = get_git_provider()(pr_url, incremental=True)\n', 'improved_code': 'git_provider = get_git_provider()\nif isinstance(self.git_provider, GithubProvider):\n self.git_provider = git_provider(pr_url)\nelif isinstance(self.git_provider, GitLabProvider):\n self.git_provider = git_provider(pr_url, incremental=True)\n', 'relevant_lines_start': 22, 'relevant_lines_end': 25, 'label': 'performance'}, {'relevant_file': 'pr_agent/tools/pr_update_changelog.py', 'suggestion_content': 'The `run` method of `PRUpdateChangelog` class has duplicate code for `GithubProvider` and `GitLabProvider`. This can be refactored to a separate method that accepts a `git_provider` as an argument, reducing code duplication.\n', 'existing_code': 'if isinstance(self.git_provider, GithubProvider):\n if get_settings().config.publish_output:\n self.git_provider.publish_comment("Preparing changelog updates...", is_temporary=True)\nelif isinstance(self.git_provider, GitLabProvider):\n # Add code for preparing changelog updates for GitLabProvider\n pass\n', 'improved_code': 'def prepare_changelog_updates(self, git_provider):\n if get_settings().config.publish_output:\n git_provider.publish_comment("Preparing changelog updates...", is_temporary=True)\n\nif isinstance(self.git_provider, GithubProvider):\n self.prepare_changelog_updates(self.git_provider)\nelif isinstance(self.git_provider, GitLabProvider):\n # Add code for preparing changelog updates for GitLabProvider\n pass\n', 'relevant_lines_start': 55, 'relevant_lines_end': 60, 'label': 'maintainability'}, {'relevant_file': 'pr_agent/tools/pr_update_changelog.py', 'suggestion_content': 'The `assert` statement in the `run` method of `PRUpdateChangelog` class can be replaced with a more descriptive exception. This would provide a more meaningful error message if the condition is not met.\n', 'existing_code': 'assert type(self.git_provider) in [GithubProvider, GitLabProvider], "Currently only Github and GitLab are supported"\n', 'improved_code': 'if type(self.git_provider) not in [GithubProvider, GitLabProvider]:\n raise ValueError("Unsupported git provider. Currently only Github and GitLab are supported.")\n', 'relevant_lines_start': 52, 'relevant_lines_end': 52, 'label': 'enhancement'}, {'relevant_file': 'pr_agent/tools/pr_update_changelog.py', 'suggestion_content': 'The `CHANGELOG_LINES` constant is defined at the global scope but is only used within the `PRUpdateChangelog` class. It would be more appropriate to define it as a class attribute.\n', 'existing_code': 'CHANGELOG_LINES = 25\n', 'improved_code': 'class PRUpdateChangelog:\n CHANGELOG_LINES = 25\n ...\n', 'relevant_lines_start': 16, 'relevant_lines_end': 16, 'label': 'best practice'}, {'relevant_file': '.github/ISSUE_TEMPLATE/sweep-bugfix.yml', 'suggestion_content': 'The `id` attribute for the second textarea is `description2`, which is not very descriptive. Consider using a more meaningful `id`.\n', 'existing_code': 'id: description2\n', 'improved_code': 'id: additional_details\n', 'relevant_lines_start': 13, 'relevant_lines_end': 13, 'label': 'enhancement'}, {'relevant_file': '.github/ISSUE_TEMPLATE/sweep-feature.yml', 'suggestion_content': 'The `description` attribute for the textarea is "More details for Sweep", which is not very descriptive. Consider providing a more specific description.\n', 'existing_code': 'description: More details for Sweep\n', 'improved_code': 'description: Please provide more details about the feature you want to test.\n', 'relevant_lines_start': 10, 'relevant_lines_end': 10, 'label': 'enhancement'}, {'relevant_file': '.github/ISSUE_TEMPLATE/sweep-refactor.yml', 'suggestion_content': 'The `placeholder` attribute for the textarea is "We are migrating this function to ... version because ...", which is not very clear. Consider providing a more specific placeholder.\n', 'existing_code': 'placeholder: We are migrating this function to ... version because ...\n', 'improved_code': 'placeholder: We are migrating this function to version 2.0 because it provides better performance.\n', 'relevant_lines_start': 11, 'relevant_lines_end': 11, 'label': 'enhancement'}]} - self.publish_summarizes_suggestions(data) - # try: - # get_logger().info('Generating code suggestions for PR...') - # if get_settings().config.publish_output: - # self.git_provider.publish_comment("Preparing suggestions...", is_temporary=True) - # - # get_logger().info('Preparing PR code suggestions...') - # if not self.is_extended: - # await retry_with_fallback_models(self._prepare_prediction) - # data = self._prepare_pr_code_suggestions() - # else: - # data = await retry_with_fallback_models(self._prepare_prediction_extended) - # if (not data) or (not 'code_suggestions' in data): - # get_logger().info('No code suggestions found for PR.') - # return - # - # if (not self.is_extended and get_settings().pr_code_suggestions.rank_suggestions) or \ - # (self.is_extended and get_settings().pr_code_suggestions.rank_extended_suggestions): - # get_logger().info('Ranking Suggestions...') - # data['code_suggestions'] = await self.rank_suggestions(data['code_suggestions']) - # - # if get_settings().config.publish_output: - # get_logger().info('Pushing PR code suggestions...') - # self.git_provider.remove_initial_comment() - # if get_settings().pr_code_suggestions.summarize: - # get_logger().info('Pushing summarize code suggestions...') - # self.publish_summarizes_suggestions(data) - # else: - # get_logger().info('Pushing inline code suggestions...') - # self.push_inline_code_suggestions(data) - # except Exception as e: - # get_logger().error(f"Failed to generate code suggestions for PR, error: {e}") + # data = {'code_suggestions': [{'relevant_file': 'pr_agent/tools/pr_update_changelog.py', 'suggestion_content': 'The `get_git_provider()` function is called twice in the `__init__` method of `PRUpdateChangelog` class. It would be more efficient to call this function once and store the result in a variable, then use this variable in the conditional statements.\n', 'existing_code': 'if isinstance(self.git_provider, GithubProvider):\n self.git_provider = get_git_provider()(pr_url)\nelif isinstance(self.git_provider, GitLabProvider):\n self.git_provider = get_git_provider()(pr_url, incremental=True)\n', 'improved_code': 'git_provider = get_git_provider()\nif isinstance(self.git_provider, GithubProvider):\n self.git_provider = git_provider(pr_url)\nelif isinstance(self.git_provider, GitLabProvider):\n self.git_provider = git_provider(pr_url, incremental=True)\n', 'relevant_lines_start': 22, 'relevant_lines_end': 25, 'label': 'performance'}, {'relevant_file': 'pr_agent/tools/pr_update_changelog.py', 'suggestion_content': 'The `run` method of `PRUpdateChangelog` class has duplicate code for `GithubProvider` and `GitLabProvider`. This can be refactored to a separate method that accepts a `git_provider` as an argument, reducing code duplication.\n', 'existing_code': 'if isinstance(self.git_provider, GithubProvider):\n if get_settings().config.publish_output:\n self.git_provider.publish_comment("Preparing changelog updates...", is_temporary=True)\nelif isinstance(self.git_provider, GitLabProvider):\n # Add code for preparing changelog updates for GitLabProvider\n pass\n', 'improved_code': 'def prepare_changelog_updates(self, git_provider):\n if get_settings().config.publish_output:\n git_provider.publish_comment("Preparing changelog updates...", is_temporary=True)\n\nif isinstance(self.git_provider, GithubProvider):\n self.prepare_changelog_updates(self.git_provider)\nelif isinstance(self.git_provider, GitLabProvider):\n # Add code for preparing changelog updates for GitLabProvider\n pass\n', 'relevant_lines_start': 55, 'relevant_lines_end': 60, 'label': 'maintainability'}, {'relevant_file': 'pr_agent/tools/pr_update_changelog.py', 'suggestion_content': 'The `assert` statement in the `run` method of `PRUpdateChangelog` class can be replaced with a more descriptive exception. This would provide a more meaningful error message if the condition is not met.\n', 'existing_code': 'assert type(self.git_provider) in [GithubProvider, GitLabProvider], "Currently only Github and GitLab are supported"\n', 'improved_code': 'if type(self.git_provider) not in [GithubProvider, GitLabProvider]:\n raise ValueError("Unsupported git provider. Currently only Github and GitLab are supported.")\n', 'relevant_lines_start': 52, 'relevant_lines_end': 52, 'label': 'enhancement'}, {'relevant_file': 'pr_agent/tools/pr_update_changelog.py', 'suggestion_content': 'The `CHANGELOG_LINES` constant is defined at the global scope but is only used within the `PRUpdateChangelog` class. It would be more appropriate to define it as a class attribute.\n', 'existing_code': 'CHANGELOG_LINES = 25\n', 'improved_code': 'class PRUpdateChangelog:\n CHANGELOG_LINES = 25\n ...\n', 'relevant_lines_start': 16, 'relevant_lines_end': 16, 'label': 'best practice'}, {'relevant_file': '.github/ISSUE_TEMPLATE/sweep-bugfix.yml', 'suggestion_content': 'The `id` attribute for the second textarea is `description2`, which is not very descriptive. Consider using a more meaningful `id`.\n', 'existing_code': 'id: description2\n', 'improved_code': 'id: additional_details\n', 'relevant_lines_start': 13, 'relevant_lines_end': 13, 'label': 'enhancement'}, {'relevant_file': '.github/ISSUE_TEMPLATE/sweep-feature.yml', 'suggestion_content': 'The `description` attribute for the textarea is "More details for Sweep", which is not very descriptive. Consider providing a more specific description.\n', 'existing_code': 'description: More details for Sweep\n', 'improved_code': 'description: Please provide more details about the feature you want to test.\n', 'relevant_lines_start': 10, 'relevant_lines_end': 10, 'label': 'enhancement'}, {'relevant_file': '.github/ISSUE_TEMPLATE/sweep-refactor.yml', 'suggestion_content': 'The `placeholder` attribute for the textarea is "We are migrating this function to ... version because ...", which is not very clear. Consider providing a more specific placeholder.\n', 'existing_code': 'placeholder: We are migrating this function to ... version because ...\n', 'improved_code': 'placeholder: We are migrating this function to version 2.0 because it provides better performance.\n', 'relevant_lines_start': 11, 'relevant_lines_end': 11, 'label': 'enhancement'}]} + # self.publish_summarizes_suggestions(data) + try: + get_logger().info('Generating code suggestions for PR...') + if get_settings().config.publish_output: + self.git_provider.publish_comment("Preparing suggestions...", is_temporary=True) + + get_logger().info('Preparing PR code suggestions...') + if not self.is_extended: + await retry_with_fallback_models(self._prepare_prediction) + data = self._prepare_pr_code_suggestions() + else: + data = await retry_with_fallback_models(self._prepare_prediction_extended) + if (not data) or (not 'code_suggestions' in data): + get_logger().info('No code suggestions found for PR.') + return + + if (not self.is_extended and get_settings().pr_code_suggestions.rank_suggestions) or \ + (self.is_extended and get_settings().pr_code_suggestions.rank_extended_suggestions): + get_logger().info('Ranking Suggestions...') + data['code_suggestions'] = await self.rank_suggestions(data['code_suggestions']) + + if get_settings().config.publish_output: + get_logger().info('Pushing PR code suggestions...') + self.git_provider.remove_initial_comment() + if get_settings().pr_code_suggestions.summarize: + get_logger().info('Pushing summarize code suggestions...') + self.publish_summarizes_suggestions(data) + else: + get_logger().info('Pushing inline code suggestions...') + self.push_inline_code_suggestions(data) + except Exception as e: + get_logger().error(f"Failed to generate code suggestions for PR, error: {e}") async def _prepare_prediction(self, model: str): get_logger().info('Getting PR diff...') From 31576b77ff40bb7f14334010960a7b4bde7077e2 Mon Sep 17 00:00:00 2001 From: "Hussam.lawen" Date: Mon, 15 Jan 2024 19:07:41 +0200 Subject: [PATCH 5/6] improve backticks --- pr_agent/algo/utils.py | 11 ++- .../settings/pr_code_suggestions_prompts.toml | 18 ++++- pr_agent/tools/pr_code_suggestions.py | 80 +++++-------------- 3 files changed, 45 insertions(+), 64 deletions(-) diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index 87411430..9a150864 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -474,4 +474,13 @@ def clip_tokens(text: str, max_tokens: int, add_three_dots=True) -> str: return clipped_text except Exception as e: get_logger().warning(f"Failed to clip tokens: {e}") - return text \ No newline at end of file + return text + +def replace_code_tags(text): + """ + Replace odd instances of ` with and even instances of ` with + """ + parts = text.split('`') + for i in range(1, len(parts), 2): + parts[i] = '' + parts[i] + '' + return ''.join(parts) \ No newline at end of file diff --git a/pr_agent/settings/pr_code_suggestions_prompts.toml b/pr_agent/settings/pr_code_suggestions_prompts.toml index 5aa3cce2..2fb224c7 100644 --- a/pr_agent/settings/pr_code_suggestions_prompts.toml +++ b/pr_agent/settings/pr_code_suggestions_prompts.toml @@ -47,14 +47,15 @@ Extra instructions from the user: ====== {%- endif %} -The output must be a YAML object equivalent to type PRCodeSuggestions, according to the following Pydantic definitions: +The output must be a YAML object equivalent to type $PRCodeSuggestions, according to the following Pydantic definitions: ===== class CodeSuggestion(BaseModel): relevant_file: str = Field(description="the relevant file full path") suggestion_content: str = Field(description="an actionable suggestion for meaningfully improving the new code introduced in the PR") {%- if summarize_mode %} - existing_code: str = Field(description="a short code snippet from a '__new hunk__' section to illustrate the relevant existing code. Don't show the line numbers. Shorten parts of the code ('...') if needed") - improved_code: str = Field(description="a short code snippet to illustrate the improved code, after applying the suggestion. Shorten parts of the code ('...') if needed") + existing_code: str = Field(description="a short code snippet from a '__new hunk__' section to illustrate the relevant existing code. Don't show the line numbers.") + improved_code: str = Field(description="a short code snippet to illustrate the improved code, after applying the suggestion.") + one_sentence_summary:str = Field(description="a short summary of the suggestion action, in a single sentence. Focus on the 'what'. Be general, and avoid method or variable names.") {%- else %} existing_code: str = Field(description="a code snippet, demonstrating the relevant code lines from a '__new hunk__' section. It must be contiguous, correctly formatted and indented, and without line numbers") improved_code: str = Field(description="a new code snippet, that can be used to replace the relevant lines in '__new hunk__' code. Replacement suggestions should be complete, correctly formatted and indented, and without line numbers") @@ -75,12 +76,23 @@ code_suggestions: src/file1.py suggestion_content: |- Add a docstring to func1() +{%- if summarize_mode %} + existing_code: |- + def func1(): + improved_code: |- + ... + one_sentence_summary: |- + ... + relevant_lines_start: 12 + relevant_lines_end: 12 +{%- else %} existing_code: |- def func1(): relevant_lines_start: 12 relevant_lines_end: 12 improved_code: |- ... +{%- endif %} label: |- ... ``` diff --git a/pr_agent/tools/pr_code_suggestions.py b/pr_agent/tools/pr_code_suggestions.py index b3929df5..2b143905 100644 --- a/pr_agent/tools/pr_code_suggestions.py +++ b/pr_agent/tools/pr_code_suggestions.py @@ -8,7 +8,7 @@ from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler from pr_agent.algo.pr_processing import get_pr_diff, get_pr_multi_diffs, retry_with_fallback_models from pr_agent.algo.token_handler import TokenHandler -from pr_agent.algo.utils import load_yaml +from pr_agent.algo.utils import load_yaml, replace_code_tags from pr_agent.config_loader import get_settings from pr_agent.git_providers import get_git_provider from pr_agent.git_providers.git_provider import get_main_pr_language @@ -327,7 +327,7 @@ class PRCodeSuggestions: for label, suggestions in suggestions_labels.items(): pr_body += f"""""" pr_body += f"""""" pr_body += """
{header}
{suggestion_content}" - existing_code = suggestion['existing_code'].rstrip() - improved_code = suggestion['improved_code'].rstrip() + existing_code = suggestion['existing_code'].rstrip()+"\n" + improved_code = suggestion['improved_code'].rstrip()+"\n" language_name = "python" + diff = difflib.unified_diff(existing_code.split('\n'), + improved_code.split('\n')) + patch_orig = "\n".join(diff) + print(patch_orig) + patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n') extension_s = suggestion['relevant_file'].rsplit('.')[-1] if extension_s and (extension_s in extension_to_language): language_name = extension_to_language[extension_s] - example_code = "" - if self.git_provider.is_supported("gfm_markdown"): - example_code = "
Example code:\n\n" - example_code += f"___\n\n" - example_code += f"Existing code:\n```{language_name}\n{existing_code}\n```\n" - example_code += f"Improved code:\n```{language_name}\n{improved_code}\n```\n" - if self.git_provider.is_supported("gfm_markdown"): + + if len(suggestions) > 1: + example_code = "
Preview code:\n\n" + else: + example_code = "" + example_code += f"___\n\n" + if len(suggestions) == 1: + example_code +="Preview code:\n" + example_code += f"```diff\n{patch}\n```\n" + # example_code += f"Existing code:\n```{language_name}\n{existing_code}\n```\n" + # example_code += f"Improved code:\n```{language_name}\n{improved_code}\n```\n" + if len(suggestions) > 1: example_code += "
\n" + pr_body += f"""
@@ -362,7 +378,7 @@ class PRCodeSuggestions: **{suggestion_content}** -[{relevant_file} [{relevant_lines_start}-{relevant_lines_end}]]({code_snippet_link}) +[{relevant_file} {range_str}]({code_snippet_link}) {example_code}
{label}""" - pr_body += f"""
{len(suggestions)} suggestions""" + # pr_body += f"""
{len(suggestions)} suggestions""" pr_body += f"""""" for suggestion in suggestions: @@ -344,83 +344,43 @@ class PRCodeSuggestions: # add html table for each suggestion suggestion_content = suggestion['suggestion_content'].rstrip().rstrip() + # backticks + suggestion_content = replace_code_tags(suggestion_content) + suggestion_content = insert_br_after_x_chars(suggestion_content, 90) # pr_body += f" - - - +{example_code} """ + pr_body += f"" + pr_body += f"" pr_body += """
{suggestion_content}" existing_code = suggestion['existing_code'].rstrip()+"\n" improved_code = suggestion['improved_code'].rstrip()+"\n" - language_name = "python" - diff = difflib.unified_diff(existing_code.split('\n'), - improved_code.split('\n')) - patch_orig = "\n".join(diff) - print(patch_orig) - patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n') - extension_s = suggestion['relevant_file'].rsplit('.')[-1] - if extension_s and (extension_s in extension_to_language): - language_name = extension_to_language[extension_s] - if len(suggestions) > 1: - example_code = "
Preview code:\n\n" - else: - example_code = "" - example_code += f"___\n\n" - if len(suggestions) == 1: - example_code +="Preview code:\n" + diff = difflib.unified_diff(existing_code.split('\n'), + improved_code.split('\n'), n=999) + patch_orig = "\n".join(diff) + patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n') + + example_code = "" example_code += f"```diff\n{patch}\n```\n" - # example_code += f"Existing code:\n```{language_name}\n{existing_code}\n```\n" - # example_code += f"Improved code:\n```{language_name}\n{improved_code}\n```\n" - if len(suggestions) > 1: - example_code += "
\n" + + pr_body += f"""
""" + suggestion_summary = suggestion['one_sentence_summary'].strip() + suggestion_summary= suggestion_summary + max((77-len(suggestion_summary)), 0)*" " + pr_body += f"""\n\n
{suggestion_summary}\n\n___\n\n""" pr_body += f""" -
**{suggestion_content}** [{relevant_file} {range_str}]({code_snippet_link}) -{example_code} -
""" - pr_body += "
" + # pr_body += "
" pr_body += """
""" - # for s in data['code_suggestions']: - # try: - # extension_s = s['relevant_file'].rsplit('.')[-1] - # code_snippet_link = self.git_provider.get_line_link(s['relevant_file'], s['relevant_lines_start'], - # s['relevant_lines_end']) - # label = s['label'].strip() - # data_markdown += f"\n💡 [{label}]\n\n**{s['suggestion_content'].rstrip().rstrip()}**\n\n" - # if code_snippet_link: - # data_markdown += f" File: [{s['relevant_file']} ({s['relevant_lines_start']}-{s['relevant_lines_end']})]({code_snippet_link})\n\n" - # else: - # data_markdown += f"File: {s['relevant_file']} ({s['relevant_lines_start']}-{s['relevant_lines_end']})\n\n" - # if self.git_provider.is_supported("gfm_markdown"): - # data_markdown += "
Example code:\n\n" - # data_markdown += f"___\n\n" - # language_name = "python" - # if extension_s and (extension_s in extension_to_language): - # language_name = extension_to_language[extension_s] - # data_markdown += f"Existing code:\n```{language_name}\n{s['existing_code'].rstrip()}\n```\n" - # data_markdown += f"Improved code:\n```{language_name}\n{s['improved_code'].rstrip()}\n```\n" - # if self.git_provider.is_supported("gfm_markdown"): - # data_markdown += "
\n" - # data_markdown += "\n___\n\n" - # - # pr_body += f"""{label}{s['relevant_file']}""" - # # in the right side of the table, add two collapsable sections with the existing and improved code - # pr_body += f"""
Existing code
{s['existing_code'].rstrip()}
""" - # pr_body += f"""
Improved code
{s['improved_code'].rstrip()}
""" - # - # - # except Exception as e: - # get_logger().error(f"Could not parse suggestion: {s}, error: {e}") self.git_provider.publish_comment(pr_body) except Exception as e: get_logger().info(f"Failed to publish summarized code suggestions, error: {e}") From d7e0aad5276c0e2a0b2b7f3580119fed4929abeb Mon Sep 17 00:00:00 2001 From: "Hussam.lawen" Date: Tue, 16 Jan 2024 09:41:31 +0200 Subject: [PATCH 6/6] small fixes --- pr_agent/tools/pr_code_suggestions.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pr_agent/tools/pr_code_suggestions.py b/pr_agent/tools/pr_code_suggestions.py index 2b143905..ddc7913e 100644 --- a/pr_agent/tools/pr_code_suggestions.py +++ b/pr_agent/tools/pr_code_suggestions.py @@ -56,8 +56,6 @@ class PRCodeSuggestions: get_settings().pr_code_suggestions_prompt.user) async def run(self): - # data = {'code_suggestions': [{'relevant_file': 'pr_agent/tools/pr_update_changelog.py', 'suggestion_content': 'The `get_git_provider()` function is called twice in the `__init__` method of `PRUpdateChangelog` class. It would be more efficient to call this function once and store the result in a variable, then use this variable in the conditional statements.\n', 'existing_code': 'if isinstance(self.git_provider, GithubProvider):\n self.git_provider = get_git_provider()(pr_url)\nelif isinstance(self.git_provider, GitLabProvider):\n self.git_provider = get_git_provider()(pr_url, incremental=True)\n', 'improved_code': 'git_provider = get_git_provider()\nif isinstance(self.git_provider, GithubProvider):\n self.git_provider = git_provider(pr_url)\nelif isinstance(self.git_provider, GitLabProvider):\n self.git_provider = git_provider(pr_url, incremental=True)\n', 'relevant_lines_start': 22, 'relevant_lines_end': 25, 'label': 'performance'}, {'relevant_file': 'pr_agent/tools/pr_update_changelog.py', 'suggestion_content': 'The `run` method of `PRUpdateChangelog` class has duplicate code for `GithubProvider` and `GitLabProvider`. This can be refactored to a separate method that accepts a `git_provider` as an argument, reducing code duplication.\n', 'existing_code': 'if isinstance(self.git_provider, GithubProvider):\n if get_settings().config.publish_output:\n self.git_provider.publish_comment("Preparing changelog updates...", is_temporary=True)\nelif isinstance(self.git_provider, GitLabProvider):\n # Add code for preparing changelog updates for GitLabProvider\n pass\n', 'improved_code': 'def prepare_changelog_updates(self, git_provider):\n if get_settings().config.publish_output:\n git_provider.publish_comment("Preparing changelog updates...", is_temporary=True)\n\nif isinstance(self.git_provider, GithubProvider):\n self.prepare_changelog_updates(self.git_provider)\nelif isinstance(self.git_provider, GitLabProvider):\n # Add code for preparing changelog updates for GitLabProvider\n pass\n', 'relevant_lines_start': 55, 'relevant_lines_end': 60, 'label': 'maintainability'}, {'relevant_file': 'pr_agent/tools/pr_update_changelog.py', 'suggestion_content': 'The `assert` statement in the `run` method of `PRUpdateChangelog` class can be replaced with a more descriptive exception. This would provide a more meaningful error message if the condition is not met.\n', 'existing_code': 'assert type(self.git_provider) in [GithubProvider, GitLabProvider], "Currently only Github and GitLab are supported"\n', 'improved_code': 'if type(self.git_provider) not in [GithubProvider, GitLabProvider]:\n raise ValueError("Unsupported git provider. Currently only Github and GitLab are supported.")\n', 'relevant_lines_start': 52, 'relevant_lines_end': 52, 'label': 'enhancement'}, {'relevant_file': 'pr_agent/tools/pr_update_changelog.py', 'suggestion_content': 'The `CHANGELOG_LINES` constant is defined at the global scope but is only used within the `PRUpdateChangelog` class. It would be more appropriate to define it as a class attribute.\n', 'existing_code': 'CHANGELOG_LINES = 25\n', 'improved_code': 'class PRUpdateChangelog:\n CHANGELOG_LINES = 25\n ...\n', 'relevant_lines_start': 16, 'relevant_lines_end': 16, 'label': 'best practice'}, {'relevant_file': '.github/ISSUE_TEMPLATE/sweep-bugfix.yml', 'suggestion_content': 'The `id` attribute for the second textarea is `description2`, which is not very descriptive. Consider using a more meaningful `id`.\n', 'existing_code': 'id: description2\n', 'improved_code': 'id: additional_details\n', 'relevant_lines_start': 13, 'relevant_lines_end': 13, 'label': 'enhancement'}, {'relevant_file': '.github/ISSUE_TEMPLATE/sweep-feature.yml', 'suggestion_content': 'The `description` attribute for the textarea is "More details for Sweep", which is not very descriptive. Consider providing a more specific description.\n', 'existing_code': 'description: More details for Sweep\n', 'improved_code': 'description: Please provide more details about the feature you want to test.\n', 'relevant_lines_start': 10, 'relevant_lines_end': 10, 'label': 'enhancement'}, {'relevant_file': '.github/ISSUE_TEMPLATE/sweep-refactor.yml', 'suggestion_content': 'The `placeholder` attribute for the textarea is "We are migrating this function to ... version because ...", which is not very clear. Consider providing a more specific placeholder.\n', 'existing_code': 'placeholder: We are migrating this function to ... version because ...\n', 'improved_code': 'placeholder: We are migrating this function to version 2.0 because it provides better performance.\n', 'relevant_lines_start': 11, 'relevant_lines_end': 11, 'label': 'enhancement'}]} - # self.publish_summarizes_suggestions(data) try: get_logger().info('Generating code suggestions for PR...') if get_settings().config.publish_output: @@ -344,8 +342,6 @@ class PRCodeSuggestions: # add html table for each suggestion suggestion_content = suggestion['suggestion_content'].rstrip().rstrip() - # backticks - suggestion_content = replace_code_tags(suggestion_content) suggestion_content = insert_br_after_x_chars(suggestion_content, 90) # pr_body += f"
{suggestion_content}" @@ -362,7 +358,9 @@ class PRCodeSuggestions: pr_body += f"""""" suggestion_summary = suggestion['one_sentence_summary'].strip() - suggestion_summary= suggestion_summary + max((77-len(suggestion_summary)), 0)*" " + if '`' in suggestion_summary: + suggestion_summary = replace_code_tags(suggestion_summary) + suggestion_summary = suggestion_summary + max((77-len(suggestion_summary)), 0)*" " pr_body += f"""\n\n
{suggestion_summary}\n\n___\n\n""" pr_body += f"""