Merge pull request #1869 from PullPullers/feat/review-todo-section

feat: surface TODO comments in review tool
This commit is contained in:
Tal
2025-06-18 20:15:43 +03:00
committed by GitHub
5 changed files with 121 additions and 1 deletions

View File

@ -98,6 +98,11 @@ extra_instructions = "..."
<tr>
<td><b>require_security_review</b></td>
<td>If set to true, the tool will add a section that checks if the PR contains a possible security or vulnerability issue. Default is true.</td>
</tr>
<tr>
<td><b>require_todo_scan</b></td>
<td>If set to true, the tool will add a section that lists TODO comments found in the PR code changes. Default is false.
</td>
</tr>
<tr>
<td><b>require_ticket_analysis_review</b></td>

View File

@ -1,5 +1,6 @@
from __future__ import annotations
import ast
import copy
import difflib
import hashlib
@ -14,7 +15,7 @@ import traceback
from datetime import datetime
from enum import Enum
from importlib.metadata import PackageNotFoundError, version
from typing import Any, List, Tuple
from typing import Any, List, Tuple, TypedDict
import html2text
import requests
@ -37,21 +38,31 @@ def get_model(model_type: str = "model_weak") -> str:
return get_settings().config.model_reasoning
return get_settings().config.model
class Range(BaseModel):
line_start: int # should be 0-indexed
line_end: int
column_start: int = -1
column_end: int = -1
class ModelType(str, Enum):
REGULAR = "regular"
WEAK = "weak"
REASONING = "reasoning"
class TodoItem(TypedDict):
relevant_file: str
line_range: Tuple[int, int]
content: str
class PRReviewHeader(str, Enum):
REGULAR = "## PR Reviewer Guide"
INCREMENTAL = "## Incremental PR Reviewer Guide"
class ReasoningEffort(str, Enum):
HIGH = "high"
MEDIUM = "medium"
@ -109,6 +120,7 @@ def unique_strings(input_list: List[str]) -> List[str]:
seen.add(item)
return unique_list
def convert_to_markdown_v2(output_data: dict,
gfm_supported: bool = True,
incremental_review=None,
@ -131,6 +143,7 @@ def convert_to_markdown_v2(output_data: dict,
"Focused PR": "",
"Relevant ticket": "🎫",
"Security concerns": "🔒",
"Todo sections": "📝",
"Insights from user's answers": "📝",
"Code feedback": "🤖",
"Estimated effort to review [1-5]": "⏱️",
@ -151,6 +164,7 @@ def convert_to_markdown_v2(output_data: dict,
if gfm_supported:
markdown_text += "<table>\n"
todo_summary = output_data['review'].pop('todo_summary', '')
for key, value in output_data['review'].items():
if value is None or value == '' or value == {} or value == []:
if key.lower() not in ['can_be_split', 'key_issues_to_review']:
@ -209,6 +223,82 @@ def convert_to_markdown_v2(output_data: dict,
markdown_text += f"### {emoji} Security concerns\n\n"
value = emphasize_header(value.strip(), only_markdown=True)
markdown_text += f"{value}\n\n"
elif 'todo sections' in key_nice.lower():
def format_todo_item(todo_item: TodoItem) -> str:
relevant_file = todo_item.get('relevant_file', '').strip()
line_range = todo_item.get('line_range', [])
content = todo_item.get('content', '')
reference_link = None
if isinstance(line_range, str):
line_range = ast.literal_eval(line_range.strip())
try:
if git_provider and relevant_file and line_range:
reference_link = git_provider.get_line_link(relevant_file, line_range[0], line_range[1])
except Exception as e:
get_logger().exception(f"Error generating link: {e}")
line_str = f"[{line_range[0]}]" if line_range[0] == line_range[1] else f"[{line_range[0]}-{line_range[1]}]"
return f"{relevant_file} {line_str}: {content}"
line_str = f"[{line_range[0]}]" if line_range[0] == line_range[1] else f"[{line_range[0]}-{line_range[1]}]"
file_ref = f"{relevant_file} {line_str}"
if reference_link:
if gfm_supported:
file_ref = f"<a href='{reference_link}'>{file_ref}</a>"
else:
file_ref = f"[{file_ref}]({reference_link})"
content_lines = content.strip().split("\n")
# if TODO content is single-line :
if len(content_lines) == 1:
return f"{file_ref}: {content_lines[0]}"
# else if TODO content is multi-line:
elif len(content_lines) > 1:
content_lines = "<br>".join(content_lines)
return f"{file_ref}: <blockquote>\n{content_lines}\n</blockquote>"
# else if TODO content is empty:
else:
return file_ref
def format_todo_items(value: list[TodoItem] | TodoItem) -> str:
markdown_text = ""
if gfm_supported:
if isinstance(value, list):
markdown_text += "<ul>\n"
for todo_item in value:
markdown_text += f"<li>{format_todo_item(todo_item)}</li>\n"
markdown_text += "</ul>\n"
else:
markdown_text += f"<p>{format_todo_item(value)}</p>\n"
else:
if isinstance(value, list):
for todo_item in value:
markdown_text += f"- {format_todo_item(todo_item)}\n"
else:
markdown_text += f"- {format_todo_item(value)}\n"
return markdown_text
if gfm_supported:
markdown_text += "<tr><td>"
if is_value_no(value):
markdown_text += f"{emoji}&nbsp;<strong>No TODO sections</strong>"
else:
markdown_todo_items = format_todo_items(value)
markdown_text += f"{emoji}&nbsp;<strong>TODO sections</strong>\n<br><br>\n"
markdown_text += f"<details><summary>{todo_summary}</summary>\n\n"
markdown_text += markdown_todo_items
markdown_text += "\n</details>\n"
markdown_text += "</td></tr>\n"
else:
if is_value_no(value):
markdown_text += f"### {emoji} No TODO sections\n\n"
else:
markdown_todo_items = format_todo_items(value)
markdown_text += f"### {emoji} TODO sections\n<details><summary>{todo_summary}</summary>\n\n"
markdown_text += markdown_todo_items
markdown_text += "\n</details>\n\n"
elif 'can be split' in key_nice.lower():
if gfm_supported:
markdown_text += f"<tr><td>"

View File

@ -78,6 +78,7 @@ require_tests_review=true
require_estimate_effort_to_review=true
require_can_be_split_review=false
require_security_review=true
require_todo_scan=false
require_ticket_analysis_review=true
# general options
publish_output_no_suggestions=true # Set to "false" if you only need the reviewer's remarks (not labels, not "security audit", etc.) and want to avoid noisy "No major issues detected" comments.

View File

@ -72,6 +72,13 @@ class KeyIssuesComponentLink(BaseModel):
start_line: int = Field(description="The start line that corresponds to this issue in the relevant file")
end_line: int = Field(description="The end line that corresponds to this issue in the relevant file")
{%- if require_todo_scan %}
class TodoSection(BaseModel):
relevant_file: str = Field(description="The file containing the TODO comment")
line_range: Tuple[int, int] = Field(description="Start and end line numbers of the TODO comment (inclusive). Must be a tuple of two integers, e.g., (7, 7) for a single line or (7, 10) for a range. Do not use list format [7, 7].")
content: str = Field(description="The content of the TODO comment. Only include actual TODO comments within code comments (e.g., lines starting with '#', '//', '/*', '<!--'). Remove leading 'TODO' prefixes. Include lines like '# TODO', even if empty after prefix removal. Exclude TODOs outside comments or without appropriate prefixes. Use | block style only when the line range covers multiple lines (e.g., (7, 10)).")
{%- endif %}
{%- if related_tickets %}
class TicketCompliance(BaseModel):
@ -102,6 +109,10 @@ class Review(BaseModel):
{%- if require_security_review %}
security_concerns: str = Field(description="Does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' (without explaining why) if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible")
{%- endif %}
{%- if require_todo_scan %}
todo_sections: Union[List[TodoSection], str] = Field(description="A list of TODO comments found in the PR code. Return 'No' (as a string) if there are no TODO comments in the PR"
todo_summary: str = Field(description="Up to 6 words summarizing the functional areas of the TODO comments found in the code.")
{%- endif %}
{%- if require_can_be_split_review %}
can_be_split: List[SubPR] = Field(min_items=0, max_items=3, description="Can this PR, which contains {{ num_pr_files }} changed files in total, be divided into smaller sub-PRs with distinct tasks that can be reviewed and merged independently, regardless of the order ? Make sure that the sub-PRs are indeed independent, with no code dependencies between them, and that each sub-PR represent a meaningful independent task. Output an empty list if the PR code does not need to be split.")
{%- endif %}
@ -148,6 +159,12 @@ review:
- ...
security_concerns: |
No
{%- if require_todo_scan %}
todo_sections: |
No
todo_summary: |
...
{%- endif %}
{%- if require_can_be_split_review %}
can_be_split:
- relevant_files:
@ -266,6 +283,12 @@ review:
- ...
security_concerns: |
No
{%- if require_todo_scan %}
todo_sections: |
No
todo_summary: |
...
{%- endif %}
{%- if require_can_be_split_review %}
can_be_split:
- relevant_files:

View File

@ -87,6 +87,7 @@ class PRReviewer:
"require_estimate_effort_to_review": get_settings().pr_reviewer.require_estimate_effort_to_review,
'require_can_be_split_review': get_settings().pr_reviewer.require_can_be_split_review,
'require_security_review': get_settings().pr_reviewer.require_security_review,
'require_todo_scan': get_settings().pr_reviewer.require_todo_scan,
'question_str': question_str,
'answer_str': answer_str,
"extra_instructions": get_settings().pr_reviewer.extra_instructions,