review with links

This commit is contained in:
mrT23
2024-07-14 08:53:53 +03:00
parent ce35d2c313
commit 5d6e1de157
4 changed files with 124 additions and 34 deletions

View File

@ -1,5 +1,6 @@
from __future__ import annotations from __future__ import annotations
import copy
import difflib import difflib
import json import json
import os import os
@ -44,7 +45,7 @@ def get_setting(key: str) -> Any:
return global_settings.get(key, None) return global_settings.get(key, None)
def emphasize_header(text: str, only_markdown=False) -> str: def emphasize_header(text: str, only_markdown=False, reference_link=None) -> str:
try: try:
# Finding the position of the first occurrence of ": " # Finding the position of the first occurrence of ": "
colon_position = text.find(": ") colon_position = text.find(": ")
@ -53,9 +54,15 @@ def emphasize_header(text: str, only_markdown=False) -> str:
if colon_position != -1: if colon_position != -1:
# Everything before the colon (inclusive) is wrapped in <strong> tags # Everything before the colon (inclusive) is wrapped in <strong> tags
if only_markdown: if only_markdown:
transformed_string = f"**{text[:colon_position + 1]}**\n" + text[colon_position + 1:] if reference_link:
transformed_string = f"[**{text[:colon_position + 1]}**]({reference_link})\n" + text[colon_position + 1:]
else:
transformed_string = f"**{text[:colon_position + 1]}**\n" + text[colon_position + 1:]
else: else:
transformed_string = "<strong>" + text[:colon_position + 1] + "</strong>" +'<br>' + text[colon_position + 1:] if reference_link:
transformed_string = f"<strong><a href='{reference_link}'>{text[:colon_position + 1]}</a></strong><br>" + text[colon_position + 1:]
else:
transformed_string = "<strong>" + text[:colon_position + 1] + "</strong>" +'<br>' + text[colon_position + 1:]
else: else:
# If there's no ": ", return the original string # If there's no ": ", return the original string
transformed_string = text transformed_string = text
@ -77,7 +84,10 @@ def unique_strings(input_list: List[str]) -> List[str]:
seen.add(item) seen.add(item)
return unique_list return unique_list
def convert_to_markdown_v2(output_data: dict, gfm_supported: bool = True, incremental_review=None) -> str: def convert_to_markdown_v2(output_data: dict,
gfm_supported: bool = True,
incremental_review=None,
git_provider=None) -> str:
""" """
Convert a dictionary of data into markdown format. Convert a dictionary of data into markdown format.
Args: Args:
@ -113,12 +123,13 @@ def convert_to_markdown_v2(output_data: dict, gfm_supported: bool = True, increm
for key, value in output_data['review'].items(): for key, value in output_data['review'].items():
if value is None or value == '' or value == {} or value == []: if value is None or value == '' or value == {} or value == []:
if key.lower() != 'can_be_split': if key.lower() != 'can_be_split' and key.lower() != 'key_issues_to_review':
continue continue
key_nice = key.replace('_', ' ').capitalize() key_nice = key.replace('_', ' ').capitalize()
emoji = emojis.get(key_nice, "") emoji = emojis.get(key_nice, "")
if 'Estimated effort to review' in key_nice: if 'Estimated effort to review' in key_nice:
key_nice = 'Estimated effort to review' key_nice = 'Estimated effort to review'
value = str(value).strip()
if value.isnumeric(): if value.isnumeric():
value_int = int(value) value_int = int(value)
else: else:
@ -179,7 +190,7 @@ def convert_to_markdown_v2(output_data: dict, gfm_supported: bool = True, increm
markdown_text += process_can_be_split(emoji, value) markdown_text += process_can_be_split(emoji, value)
markdown_text += f"</td></tr>\n" markdown_text += f"</td></tr>\n"
elif 'key issues to review' in key_nice.lower(): elif 'key issues to review' in key_nice.lower():
value = value.strip() # value is a list of issues
if is_value_no(value): if is_value_no(value):
if gfm_supported: if gfm_supported:
markdown_text += f"<tr><td>" markdown_text += f"<tr><td>"
@ -188,20 +199,46 @@ def convert_to_markdown_v2(output_data: dict, gfm_supported: bool = True, increm
else: else:
markdown_text += f"### {emoji} No key issues to review\n\n" markdown_text += f"### {emoji} No key issues to review\n\n"
else: else:
issues = value.split('\n- ') # issues = value.split('\n- ')
for i, _ in enumerate(issues): issues =value
issues[i] = issues[i].strip().strip('-').strip() # for i, _ in enumerate(issues):
issues = unique_strings(issues) # remove duplicates # issues[i] = issues[i].strip().strip('-').strip()
if gfm_supported: if gfm_supported:
markdown_text += f"<tr><td>" markdown_text += f"<tr><td>"
markdown_text += f"{emoji}&nbsp;<strong>{key_nice}</strong><br><br>\n\n" markdown_text += f"{emoji}&nbsp;<strong>{key_nice}</strong><br><br>\n\n"
else: else:
markdown_text += f"### {emoji} Key issues to review:\n\n" markdown_text += f"### {emoji} Key issues to review\n\n#### \n"
for i, issue in enumerate(issues): for i, issue in enumerate(issues):
if not issue: try:
continue if not issue:
issue = emphasize_header(issue, only_markdown=True) continue
markdown_text += f"{issue}\n\n" relevant_file = issue.get('relevant_file', '').strip()
issue_header = issue.get('issue_header', '').strip()
issue_content = issue.get('issue_content', '').strip()
start_line = int(str(issue.get('start_line', 0)).strip())
end_line = int(str(issue.get('end_line', 0)).strip())
reference_link = git_provider.get_line_link(relevant_file, start_line, end_line)
if gfm_supported:
if get_settings().pr_reviewer.extra_issue_links:
issue_content_linked =copy.deepcopy(issue_content)
referenced_variables_list = issue.get('referenced_variables', [])
for component in referenced_variables_list:
name = component['variable_name'].strip().strip('`')
ind = issue_content.find(name)
if ind != -1:
reference_link_component = git_provider.get_line_link(relevant_file, component['relevant_line'], component['relevant_line'])
issue_content_linked = issue_content_linked[:ind-1] + f"[`{name}`]({reference_link_component})" + issue_content_linked[ind+len(name)+1:]
else:
get_logger().info(f"Failed to find variable in issue content: {component['variable_name'].strip()}")
issue_content = issue_content_linked
issue_str = f"<a href='{reference_link}'><strong>{issue_header}</strong></a><br>{issue_content}"
else:
issue_str = f"[**{issue_header}**]({reference_link})\n\n{issue_content}\n\n"
markdown_text += f"{issue_str}\n\n"
except Exception as e:
get_logger().exception(f"Failed to process key issues to review: {e}")
if gfm_supported: if gfm_supported:
markdown_text += f"</td></tr>\n" markdown_text += f"</td></tr>\n"
else: else:
@ -536,6 +573,7 @@ def load_yaml(response_text: str, keys_fix_yaml: List[str] = [], first_key="", l
return data return data
def try_fix_yaml(response_text: str, def try_fix_yaml(response_text: str,
keys_fix_yaml: List[str] = [], keys_fix_yaml: List[str] = [],
first_key="", first_key="",
@ -558,14 +596,14 @@ def try_fix_yaml(response_text: str,
except: except:
get_logger().info(f"Failed to parse AI prediction after adding |-\n") get_logger().info(f"Failed to parse AI prediction after adding |-\n")
# second fallback - try to extract only range from first ```yaml to ``` # second fallback - try to extract only range from first ```yaml to ````
snippet_pattern = r'```(yaml)?[\s\S]*?```' snippet_pattern = r'```(yaml)?[\s\S]*?```'
snippet = re.search(snippet_pattern, '\n'.join(response_text_lines_copy)) snippet = re.search(snippet_pattern, '\n'.join(response_text_lines_copy))
if snippet: if snippet:
snippet_text = snippet.group() snippet_text = snippet.group()
try: try:
data = yaml.safe_load(snippet_text.removeprefix('```yaml').rstrip('`')) data = yaml.safe_load(snippet_text.removeprefix('```yaml').rstrip('`'))
get_logger().info(f"Successfully parsed AI prediction after extracting yaml snippet with second fallback") get_logger().info(f"Successfully parsed AI prediction after extracting yaml snippet")
return data return data
except: except:
pass pass
@ -580,6 +618,7 @@ def try_fix_yaml(response_text: str,
except: except:
pass pass
# forth fallback - try to extract yaml snippet by 'first_key' and 'last_key' # forth fallback - try to extract yaml snippet by 'first_key' and 'last_key'
# note that 'last_key' can be in practice a key that is not the last key in the yaml snippet. # note that 'last_key' can be in practice a key that is not the last key in the yaml snippet.
# it just needs to be some inner key, so we can look for newlines after it # it just needs to be some inner key, so we can look for newlines after it
@ -885,7 +924,7 @@ def show_relevant_configurations(relevant_section: str) -> str:
return markdown_text return markdown_text
def is_value_no(value): def is_value_no(value):
if value is None: if not value:
return True return True
value_str = str(value).strip().lower() value_str = str(value).strip().lower()
if value_str == 'no' or value_str == 'none' or value_str == 'false': if value_str == 'no' or value_str == 'none' or value_str == 'false':

View File

@ -30,6 +30,7 @@ require_tests_review=true
require_estimate_effort_to_review=true require_estimate_effort_to_review=true
require_can_be_split_review=false require_can_be_split_review=false
require_security_review=true require_security_review=true
extra_issue_links=false
# soc2 # soc2
require_soc2_ticket=false require_soc2_ticket=false
soc2_ticket_prompt="Does the PR description include a link to ticket in a project management system (e.g., Jira, Asana, Trello, etc.) ?" soc2_ticket_prompt="Does the PR description include a link to ticket in a project management system (e.g., Jira, Asana, Trello, etc.) ?"

View File

@ -7,39 +7,51 @@ Your task is to provide constructive and concise feedback for the PR.
{%- endif %} {%- endif %}
The review should focus on new code added in the PR diff (lines starting with '+') The review should focus on new code added in the PR diff (lines starting with '+')
Example PR Diff:
The format we will use to present the PR code diff:
====== ======
## file: 'src/file1.py' ## file: 'src/file1.py'
@@ -12,5 +12,5 @@ def func1(): @@ ... @@ def func1():
code line 1 that remained unchanged in the PR __new hunk__
code line 2 that remained unchanged in the PR 12 code line1 that remained unchanged in the PR
-code line that was removed in the PR 13 +new hunk code line2 added in the PR
+code line added in the PR 14 code line3 that remained unchanged in the PR
code line 3 that remained unchanged in the PR __old hunk__
code line1 that remained unchanged in the PR
-old hunk code line2 that was removed in the PR
code line3 that remained unchanged in the PR
@@ ... @@ def func2(): @@ ... @@ def func2():
__new hunk__
...
__old hunk__
... ...
## file: 'src/file2.py' ## file: 'src/file2.py'
... ...
====== ======
- In this format, we separated each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed.
- We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference.
- Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \
The review should focus on new code added in the PR diff (lines starting with '+')
- When quoting variables or names from the code, use backticks (`) instead of single quote (').
{%- if num_code_suggestions > 0 %} {%- if num_code_suggestions > 0 %}
Code suggestions guidelines: Code suggestions guidelines:
- Provide up to {{ num_code_suggestions }} code suggestions. Try to provide diverse and insightful suggestions. - Provide up to {{ num_code_suggestions }} code suggestions. Try to provide diverse and insightful suggestions.
- Focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningful code improvements like performance, vulnerability, modularity, and best practices. - Focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningful code improvements, like performance, vulnerability, modularity, and best practices.
- Avoid making suggestions that have already been implemented in the PR code. For example, if you want to add logs, or change a variable to const, or anything else, make sure it isn't already in the PR code. - Avoid making suggestions that have already been implemented in the PR code. For example, if you want to add logs, or change a variable to const, or anything else, make sure it isn't already in the PR code.
- Don't suggest to add docstring, type hints, or comments. - Don't suggest to add docstring, type hints, or comments.
- Suggestions should focus on the new code added in the PR diff (lines starting with '+') - Suggestions should focus on the new code added in the PR diff (lines starting with '+')
- When quoting variables or names from the code, use backticks (`) instead of single quote (').
{%- endif %} {%- endif %}
{%- if extra_instructions %} {%- if extra_instructions %}
Extra instructions from the user: Extra instructions from the user:
====== ======
{{ extra_instructions }} {{ extra_instructions }}
@ -55,6 +67,22 @@ class SubPR(BaseModel):
title: str = Field(description="Short and concise title for an independent and meaningful sub-PR, composed only from the relevant files") title: str = Field(description="Short and concise title for an independent and meaningful sub-PR, composed only from the relevant files")
{%- endif %} {%- endif %}
class KeyIssuesComponentLink(BaseModel):
relevant_file: str = Field(description="The full file path of the relevant file")
issue_header: str = Field(description="one or two word title for the the issue. For example: 'Possible Bug', 'Performance Issue', 'Code Smell', etc.")
issue_content: str = Field(description="a short and concise description of the issue that needs to be reviewed")
start_line: int = Field(description="the start line that corresponds to this issue in the relevant file")
end_line: int = Field(description="the end line that corresponds to this issue in the relevant file")
{%- if extra_issue_links %}
referenced_variables: List[Refs] = Field(description="a list of relevant variables or names that appear in the 'issue_content' output. For each variable, output is name, and the line number where it appears in the relevant file")
{% endif %}
{%- if extra_issue_links %}
class Refs(BaseModel):
variable_name: str = Field(description="the name of a variable or name that appears in the relevant 'issue_content' output.")
relevant_line: int = Field(description="the line number where the variable or name appears in the relevant file")
{%- endif %}
class Review(BaseModel): class Review(BaseModel):
{%- if require_estimate_effort_to_review %} {%- if require_estimate_effort_to_review %}
estimated_effort_to_review_[1-5]: int = Field(description="Estimate, on a scale of 1-5 (inclusive), the time and effort required to review this PR by an experienced and knowledgeable developer. 1 means short and easy review , 5 means long and hard review. Take into account the size, complexity, quality, and the needed changes of the PR code diff.") estimated_effort_to_review_[1-5]: int = Field(description="Estimate, on a scale of 1-5 (inclusive), the time and effort required to review this PR by an experienced and knowledgeable developer. 1 means short and easy review , 5 means long and hard review. Take into account the size, complexity, quality, and the needed changes of the PR code diff.")
@ -68,7 +96,7 @@ class Review(BaseModel):
{%- if question_str %} {%- if question_str %}
insights_from_user_answers: str = Field(description="shortly summarize the insights you gained from the user's answers to the questions") insights_from_user_answers: str = Field(description="shortly summarize the insights you gained from the user's answers to the questions")
{%- endif %} {%- endif %}
key_issues_to_review: str = Field(description="Does this PR code introduce issues, bugs, or major performance concerns, which the PR reviewer should further investigate ? If there are no apparent issues, respond with 'None'. If there are any issues, describe them briefly. Use bullet points if more than one issue. Be specific, and provide examples if possible. Start each bullet point with a short specific header, such as: "- Possible Bug: ...", etc.") key_issues_to_review: List[KeyIssuesComponentLink] = Field("A list of bugs, issue or major performance concerns introduced in this PR, which the PR reviewer should further investigate")
{%- if require_security_review %} {%- if require_security_review %}
security_concerns: str = Field(description="does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible") security_concerns: str = Field(description="does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible")
{%- endif %} {%- endif %}
@ -78,8 +106,8 @@ class Review(BaseModel):
{%- if num_code_suggestions > 0 %} {%- if num_code_suggestions > 0 %}
class CodeSuggestion(BaseModel): class CodeSuggestion(BaseModel):
relevant_file: str = Field(description="The full file path of the relevant file.") relevant_file: str = Field(description="The full file path of the relevant file")
language: str = Field(description="The programming language of the relevant file.") language: str = Field(description="The programming language of the relevant file")
suggestion: str = Field(description="a concrete suggestion for meaningfully improving the new PR code. Also describe how, specifically, the suggestion can be applied to new PR code. Add tags with importance measure that matches each suggestion ('important' or 'medium'). Do not make suggestions for updating or adding docstrings, renaming PR title and description, or linter like.") suggestion: str = Field(description="a concrete suggestion for meaningfully improving the new PR code. Also describe how, specifically, the suggestion can be applied to new PR code. Add tags with importance measure that matches each suggestion ('important' or 'medium'). Do not make suggestions for updating or adding docstrings, renaming PR title and description, or linter like.")
relevant_line: str = Field(description="a single code line taken from the relevant file, to which the suggestion applies. The code line should start with a '+'. Make sure to output the line exactly as it appears in the relevant file") relevant_line: str = Field(description="a single code line taken from the relevant file, to which the suggestion applies. The code line should start with a '+'. Make sure to output the line exactly as it appears in the relevant file")
{%- endif %} {%- endif %}
@ -90,6 +118,7 @@ class PRReview(BaseModel):
code_feedback: List[CodeSuggestion] code_feedback: List[CodeSuggestion]
{%- else %} {%- else %}
class PRReview(BaseModel): class PRReview(BaseModel):
review: Review review: Review
{%- endif %} {%- endif %}
@ -108,8 +137,22 @@ review:
{%- endif %} {%- endif %}
relevant_tests: | relevant_tests: |
No No
key_issues_to_review: | key_issues_to_review:
... - relevant_file: |
directory/xxx.py
issue_header: |
Possible Bug
issue_content: |
...
start_line: 12
end_line: 14
{%- if extra_issue_links %}
referenced_variables:
- variable_name: |
...
relevant_line: 13
{%- endif %}
- ...
security_concerns: | security_concerns: |
No No
{%- if require_can_be_split_review %} {%- if require_can_be_split_review %}
@ -120,6 +163,7 @@ review:
title: ... title: ...
- ... - ...
{%- endif %} {%- endif %}
{%- if num_code_suggestions > 0 %} {%- if num_code_suggestions > 0 %}
code_feedback code_feedback
- relevant_file: | - relevant_file: |

View File

@ -75,6 +75,7 @@ class PRReviewer:
"commit_messages_str": self.git_provider.get_commit_messages(), "commit_messages_str": self.git_provider.get_commit_messages(),
"custom_labels": "", "custom_labels": "",
"enable_custom_labels": get_settings().config.enable_custom_labels, "enable_custom_labels": get_settings().config.enable_custom_labels,
"extra_issue_links": get_settings().pr_reviewer.extra_issue_links,
} }
self.token_handler = TokenHandler( self.token_handler = TokenHandler(
@ -147,7 +148,12 @@ class PRReviewer:
get_logger().error(f"Failed to review PR: {e}") get_logger().error(f"Failed to review PR: {e}")
async def _prepare_prediction(self, model: str) -> None: async def _prepare_prediction(self, model: str) -> None:
self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model) self.patches_diff = get_pr_diff(self.git_provider,
self.token_handler,
model,
add_line_numbers_to_hunks=True,
disable_extra_lines=True,)
if self.patches_diff: if self.patches_diff:
get_logger().debug(f"PR diff", diff=self.patches_diff) get_logger().debug(f"PR diff", diff=self.patches_diff)
self.prediction = await self._get_prediction(model) self.prediction = await self._get_prediction(model)
@ -234,7 +240,7 @@ class PRReviewer:
incremental_review_markdown_text = f"Starting from commit {last_commit_url}" incremental_review_markdown_text = f"Starting from commit {last_commit_url}"
markdown_text = convert_to_markdown_v2(data, self.git_provider.is_supported("gfm_markdown"), markdown_text = convert_to_markdown_v2(data, self.git_provider.is_supported("gfm_markdown"),
incremental_review_markdown_text) incremental_review_markdown_text, git_provider=self.git_provider)
# Add help text if gfm_markdown is supported # Add help text if gfm_markdown is supported
if self.git_provider.is_supported("gfm_markdown") and get_settings().pr_reviewer.enable_help_text: if self.git_provider.is_supported("gfm_markdown") and get_settings().pr_reviewer.enable_help_text: