diff --git a/pr_agent/settings/pr_code_suggestions_prompts.toml b/pr_agent/settings/pr_code_suggestions_prompts.toml index 001dbb27..6218e13a 100644 --- a/pr_agent/settings/pr_code_suggestions_prompts.toml +++ b/pr_agent/settings/pr_code_suggestions_prompts.toml @@ -32,7 +32,7 @@ __old hunk__ Specific instructions: - Provide up to {{ num_code_suggestions }} code suggestions. Try to provide diverse and insightful suggestions. -- Prioritize suggestions that address major problems, issues and bugs in the code. As a second priority, suggestions should focus on best practices, code readability, maintainability, enhancments, performance, and other aspects. +- Prioritize suggestions that address major problems, issues and bugs in the code. As a second priority, suggestions should focus on enhancment, best practice, performance, maintainability, and other aspects. - Don't suggest to add docstring, type hints, or comments. - Suggestions should refer only to code from the '__new hunk__' sections, and focus on new lines of code (lines starting with '+'). - Avoid making suggestions that have already been implemented in the PR code. For example, if you want to add logs, or change a variable to const, or anything else, make sure it isn't already in the '__new hunk__' code. @@ -49,65 +49,41 @@ Extra instructions from the user: ====== {%- endif %} +The output must be a YAML object equivalent to type $PRCodeSuggestins, according to the following Pydantic definitions: +===== +class CodeSuggestion(BaseModel): + relevant_file: str = Field(description="the relevant file full path") + suggestion_content: str = Field(description="a concrete suggestion for meaningfully improving the new code introduced in the PR") + existing_code: str = Field(description="a code snippet showing the relevant code lines from a '__new hunk__' section. It must be contiguous, correctly formatted and indented, and without line numbers.") + relevant_lines_start: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion starts (inclusive). Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above.") + relevant_lines_end: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion ends (inclusive). Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above.") + improved_code: str = Field(description="a new code snippet that can be used to replace the relevant lines in '__new hunk__' code. Replacement suggestions should be complete, correctly formatted and indented, and without line numbers.") + label: str = Field(description="a single label for the suggestion, to help the user understand the suggestion type. For example: 'security', 'bug', 'performance', 'enhancement', 'possible issue', 'best practice', 'maintainability', etc. Other labels are also allowed.") + +class PRCodeSuggestins(BaseModel): + code_suggestions: List[CodeSuggestion] +===== -You must use the following YAML schema to format your answer: -```yaml -Code suggestions: - type: array - minItems: 1 - maxItems: {{ num_code_suggestions }} - uniqueItems: true - items: - relevant file: - type: string - description: the relevant file full path - suggestion content: - type: string - description: |- - a concrete suggestion for meaningfully improving the new PR code. - existing code: - type: string - description: |- - a code snippet showing the relevant code lines from a '__new hunk__' section. - It must be contiguous, correctly formatted and indented, and without line numbers. - relevant lines start: - type: integer - description: |- - The relevant line number from a '__new hunk__' section where the suggestion starts (inclusive). - Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above. - relevant lines end: - type: integer - description: |- - The relevant line number from a '__new hunk__' section where the suggestion ends (inclusive). - Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above. - improved code: - type: string - description: |- - a new code snippet that can be used to replace the relevant lines in '__new hunk__' code. - Replacement suggestions should be complete, correctly formatted and indented, and without line numbers. -``` Example output: ```yaml -Code suggestions: -- relevant file: |- +code_suggestions: +- relevant_file: |- src/file1.py - suggestion content: |- + suggestion_content: |- Add a docstring to func1() - existing code: |- + existing_code: |- def func1(): - relevant lines start: |- - 12 - relevant lines end: |- - 12 - improved code: |- + relevant_lines_start: 12 + relevant_lines_end: 12 + improved_code: |- + ... + label: |- ... -... ``` Each YAML output MUST be after a newline, indented, with block scalar indicator ('|-'). -Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields. """ user="""PR Info: diff --git a/pr_agent/tools/pr_code_suggestions.py b/pr_agent/tools/pr_code_suggestions.py index f2f612a1..662d6f62 100644 --- a/pr_agent/tools/pr_code_suggestions.py +++ b/pr_agent/tools/pr_code_suggestions.py @@ -65,14 +65,14 @@ class PRCodeSuggestions: data = self._prepare_pr_code_suggestions() else: data = await retry_with_fallback_models(self._prepare_prediction_extended) - if (not data) or (not 'Code suggestions' in data): + if (not data) or (not 'code_suggestions' in data): get_logger().info('No code suggestions found for PR.') return if (not self.is_extended and get_settings().pr_code_suggestions.rank_suggestions) or \ (self.is_extended and get_settings().pr_code_suggestions.rank_extended_suggestions): get_logger().info('Ranking Suggestions...') - data['Code suggestions'] = await self.rank_suggestions(data['Code suggestions']) + data['code_suggestions'] = await self.rank_suggestions(data['code_suggestions']) if get_settings().config.publish_output: get_logger().info('Pushing PR code suggestions...') @@ -116,27 +116,37 @@ class PRCodeSuggestions: def _prepare_pr_code_suggestions(self) -> Dict: review = self.prediction.strip() - data = load_yaml(review) + data = load_yaml(review, keys_fix_yaml=["relevant_file", "suggestion_content", "existing_code", "improved_code"]) if isinstance(data, list): - data = {'Code suggestions': data} + data = {'code_suggestions': data} + + # remove invalid suggestions + suggestion_list = [] + for i, suggestion in enumerate(data['code_suggestions']): + if suggestion['existing_code'] != suggestion['improved_code']: + suggestion_list.append(suggestion) + else: + get_logger().debug(f"Skipping suggestion {i + 1}, because existing code is equal to improved code {suggestion['existing_code']}") + data['code_suggestions'] = suggestion_list + return data def push_inline_code_suggestions(self, data): code_suggestions = [] - if not data['Code suggestions']: + if not data['code_suggestions']: get_logger().info('No suggestions found to improve this PR.') return self.git_provider.publish_comment('No suggestions found to improve this PR.') - for d in data['Code suggestions']: + for d in data['code_suggestions']: try: if get_settings().config.verbosity_level >= 2: get_logger().info(f"suggestion: {d}") - relevant_file = d['relevant file'].strip() - relevant_lines_start = int(d['relevant lines start']) # absolute position - relevant_lines_end = int(d['relevant lines end']) - content = d['suggestion content'] - new_code_snippet = d['improved code'] + relevant_file = d['relevant_file'].strip() + relevant_lines_start = int(d['relevant_lines_start']) # absolute position + relevant_lines_end = int(d['relevant_lines_end']) + content = d['suggestion_content'].rstrip() + new_code_snippet = d['improved_code'].rstrip() if new_code_snippet: new_code_snippet = self.dedent_code(relevant_file, relevant_lines_start, new_code_snippet) @@ -195,8 +205,8 @@ class PRCodeSuggestions: for prediction in prediction_list: self.prediction = prediction data_per_chunk = self._prepare_pr_code_suggestions() - if "Code suggestions" in data: - data["Code suggestions"].extend(data_per_chunk["Code suggestions"]) + if "code_suggestions" in data: + data["code_suggestions"].extend(data_per_chunk["code_suggestions"]) else: data.update(data_per_chunk) self.data = data @@ -214,11 +224,6 @@ class PRCodeSuggestions: """ suggestion_list = [] - # remove invalid suggestions - for i, suggestion in enumerate(data): - if suggestion['existing code'] != suggestion['improved code']: - suggestion_list.append(suggestion) - data_sorted = [[]] * len(suggestion_list) try: @@ -264,24 +269,25 @@ class PRCodeSuggestions: for ext in extensions: extension_to_language[ext] = language - for s in data['Code suggestions']: + for s in data['code_suggestions']: try: - extension_s = s['relevant file'].rsplit('.')[-1] - code_snippet_link = self.git_provider.get_line_link(s['relevant file'], s['relevant lines start'], - s['relevant lines end']) - data_markdown += f"\n💡 Suggestion:\n\n**{s['suggestion content']}**\n\n" + extension_s = s['relevant_file'].rsplit('.')[-1] + code_snippet_link = self.git_provider.get_line_link(s['relevant_file'], s['relevant_lines_start'], + s['relevant_lines_end']) + label = s['label'].strip() + data_markdown += f"\n💡 Type: [{label}]\n\n**{s['suggestion_content'].rstrip().rstrip()}**\n\n" if code_snippet_link: - data_markdown += f" File: [{s['relevant file']} ({s['relevant lines start']}-{s['relevant lines end']})]({code_snippet_link})\n\n" + data_markdown += f" File: [{s['relevant_file']} ({s['relevant_lines_start']}-{s['relevant_lines_end']})]({code_snippet_link})\n\n" else: - data_markdown += f"File: {s['relevant file']} ({s['relevant lines start']}-{s['relevant lines end']})\n\n" + data_markdown += f"File: {s['relevant_file']} ({s['relevant_lines_start']}-{s['relevant_lines_end']})\n\n" if self.git_provider.is_supported("gfm_markdown"): data_markdown += "
Example code:\n\n" data_markdown += f"___\n\n" language_name = "python" if extension_s and (extension_s in extension_to_language): language_name = extension_to_language[extension_s] - data_markdown += f"Existing code:\n```{language_name}\n{s['existing code']}\n```\n" - data_markdown += f"Improved code:\n```{language_name}\n{s['improved code']}\n```\n" + data_markdown += f"Existing code:\n```{language_name}\n{s['existing_code'].rstrip()}\n```\n" + data_markdown += f"Improved code:\n```{language_name}\n{s['improved_code'].rstrip()}\n```\n" if self.git_provider.is_supported("gfm_markdown"): data_markdown += "
\n" data_markdown += "\n___\n\n"