Merge pull request #1869 from PullPullers/feat/review-todo-section

feat: surface TODO comments in review tool
2025-07-21 04:50:39 +08:00 · 2025-06-18 20:15:43 +03:00
parent 2cfe52e294 4a00854b15
commit 3e1cf2deed
5 changed files with 121 additions and 1 deletions
--- a/docs/docs/tools/review.md
+++ b/docs/docs/tools/review.md
@ -98,6 +98,11 @@ extra_instructions = "..."
      <tr>
        <td><b>require_security_review</b></td>
        <td>If set to true, the tool will add a section that checks if the PR contains a possible security or vulnerability issue. Default is true.</td>
+      </tr>
+        <tr>
+        <td><b>require_todo_scan</b></td>
+        <td>If set to true, the tool will add a section that lists TODO comments found in the PR code changes. Default is false.
+        </td>
      </tr>
      <tr>
        <td><b>require_ticket_analysis_review</b></td>
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@ -1,5 +1,6 @@
 from __future__ import annotations

+import ast
 import copy
 import difflib
 import hashlib
@ -14,7 +15,7 @@ import traceback
 from datetime import datetime
 from enum import Enum
 from importlib.metadata import PackageNotFoundError, version
-from typing import Any, List, Tuple
+from typing import Any, List, Tuple, TypedDict

 import html2text
 import requests
@ -37,21 +38,31 @@ def get_model(model_type: str = "model_weak") -> str:
        return get_settings().config.model_reasoning
    return get_settings().config.model

+
 class Range(BaseModel):
    line_start: int  # should be 0-indexed
    line_end: int
    column_start: int = -1
    column_end: int = -1

+
 class ModelType(str, Enum):
    REGULAR = "regular"
    WEAK = "weak"
    REASONING = "reasoning"

+
+class TodoItem(TypedDict):
+    relevant_file: str
+    line_range: Tuple[int, int]
+    content: str
+
+
 class PRReviewHeader(str, Enum):
    REGULAR = "## PR Reviewer Guide"
    INCREMENTAL = "## Incremental PR Reviewer Guide"

+
 class ReasoningEffort(str, Enum):
    HIGH = "high"
    MEDIUM = "medium"
@ -109,6 +120,7 @@ def unique_strings(input_list: List[str]) -> List[str]:
            seen.add(item)
    return unique_list

+
 def convert_to_markdown_v2(output_data: dict,
                           gfm_supported: bool = True,
                           incremental_review=None,
@ -131,6 +143,7 @@ def convert_to_markdown_v2(output_data: dict,
        "Focused PR": "✨",
        "Relevant ticket": "🎫",
        "Security concerns": "🔒",
+        "Todo sections": "📝",
        "Insights from user's answers": "📝",
        "Code feedback": "🤖",
        "Estimated effort to review [1-5]": "⏱️",
@ -151,6 +164,7 @@ def convert_to_markdown_v2(output_data: dict,
    if gfm_supported:
        markdown_text += "<table>\n"

+    todo_summary = output_data['review'].pop('todo_summary', '')
    for key, value in output_data['review'].items():
        if value is None or value == '' or value == {} or value == []:
            if key.lower() not in ['can_be_split', 'key_issues_to_review']:
@ -209,6 +223,82 @@ def convert_to_markdown_v2(output_data: dict,
                    markdown_text += f"### {emoji} Security concerns\n\n"
                    value = emphasize_header(value.strip(), only_markdown=True)
                    markdown_text += f"{value}\n\n"
+        elif 'todo sections' in key_nice.lower():
+            def format_todo_item(todo_item: TodoItem) -> str:
+                relevant_file = todo_item.get('relevant_file', '').strip()
+                line_range = todo_item.get('line_range', [])
+                content = todo_item.get('content', '')
+                reference_link = None
+
+                if isinstance(line_range, str):
+                    line_range = ast.literal_eval(line_range.strip())
+                try:
+                    if git_provider and relevant_file and line_range:
+                        reference_link = git_provider.get_line_link(relevant_file, line_range[0], line_range[1])
+                except Exception as e:
+                    get_logger().exception(f"Error generating link: {e}")
+                    line_str = f"[{line_range[0]}]" if line_range[0] == line_range[1] else f"[{line_range[0]}-{line_range[1]}]"
+                    return f"{relevant_file} {line_str}: {content}"
+
+                line_str = f"[{line_range[0]}]" if line_range[0] == line_range[1] else f"[{line_range[0]}-{line_range[1]}]"
+                file_ref = f"{relevant_file} {line_str}"
+                if reference_link:
+                    if gfm_supported:
+                        file_ref = f"<a href='{reference_link}'>{file_ref}</a>"
+                    else:
+                        file_ref = f"[{file_ref}]({reference_link})"
+
+                content_lines = content.strip().split("\n")
+                # if TODO content is single-line :
+                if len(content_lines) == 1:
+                    return f"{file_ref}: {content_lines[0]}"
+                # else if TODO content is multi-line:
+                elif len(content_lines) > 1:
+                    content_lines = "<br>".join(content_lines)
+                    return f"{file_ref}: <blockquote>\n{content_lines}\n</blockquote>"
+                # else if TODO content is empty:
+                else:
+                    return file_ref
+
+            def format_todo_items(value: list[TodoItem] | TodoItem) -> str: 
+                markdown_text = ""
+                if gfm_supported:
+                    if isinstance(value, list):
+                        markdown_text += "<ul>\n"
+                        for todo_item in value:
+                            markdown_text += f"<li>{format_todo_item(todo_item)}</li>\n"
+                        markdown_text += "</ul>\n"
+                    else:
+                        markdown_text += f"<p>{format_todo_item(value)}</p>\n"
+                else:
+                    if isinstance(value, list):
+                        for todo_item in value:
+                            markdown_text += f"- {format_todo_item(todo_item)}\n"
+                    else:
+                        markdown_text += f"- {format_todo_item(value)}\n"
+                return markdown_text
+ 
+            if gfm_supported:
+                markdown_text += "<tr><td>"
+                if is_value_no(value):
+                    markdown_text += f"{emoji}&nbsp;<strong>No TODO sections</strong>"
+                else:
+                    markdown_todo_items = format_todo_items(value)
+
+                    markdown_text += f"{emoji}&nbsp;<strong>TODO sections</strong>\n<br><br>\n"
+                    markdown_text += f"<details><summary>{todo_summary}</summary>\n\n"
+                    markdown_text += markdown_todo_items
+                    markdown_text += "\n</details>\n"
+                markdown_text += "</td></tr>\n"
+            else:
+                if is_value_no(value):
+                    markdown_text += f"### {emoji} No TODO sections\n\n"
+                else:
+                    markdown_todo_items = format_todo_items(value)
+
+                    markdown_text += f"### {emoji} TODO sections\n<details><summary>{todo_summary}</summary>\n\n"
+                    markdown_text += markdown_todo_items
+                    markdown_text += "\n</details>\n\n"
        elif 'can be split' in key_nice.lower():
            if gfm_supported:
                markdown_text += f"<tr><td>"
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@ -78,6 +78,7 @@ require_tests_review=true
 require_estimate_effort_to_review=true
 require_can_be_split_review=false
 require_security_review=true
+require_todo_scan=false
 require_ticket_analysis_review=true
 # general options
 publish_output_no_suggestions=true # Set to "false" if you only need the reviewer's remarks (not labels, not "security audit", etc.) and want to avoid noisy "No major issues detected" comments.
--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@ -72,6 +72,13 @@ class KeyIssuesComponentLink(BaseModel):
    start_line: int = Field(description="The start line that corresponds to this issue in the relevant file")
    end_line: int = Field(description="The end line that corresponds to this issue in the relevant file")

+{%- if require_todo_scan %}
+class TodoSection(BaseModel):
+    relevant_file: str = Field(description="The file containing the TODO comment")
+    line_range: Tuple[int, int] = Field(description="Start and end line numbers of the TODO comment (inclusive). Must be a tuple of two integers, e.g., (7, 7) for a single line or (7, 10) for a range. Do not use list format [7, 7].")
+    content: str = Field(description="The content of the TODO comment. Only include actual TODO comments within code comments (e.g., lines starting with '#', '//', '/*', '<!--').  Remove leading 'TODO' prefixes. Include lines like '# TODO', even if empty after prefix removal. Exclude TODOs outside comments or without appropriate prefixes. Use | block style only when the line range covers multiple lines (e.g., (7, 10)).")
+{%- endif %}
+
 {%- if related_tickets %}

 class TicketCompliance(BaseModel):
@ -102,6 +109,10 @@ class Review(BaseModel):
 {%- if require_security_review %}
    security_concerns: str = Field(description="Does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' (without explaining why) if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible")
 {%- endif %}
+{%- if require_todo_scan %}
+    todo_sections: Union[List[TodoSection], str] = Field(description="A list of TODO comments found in the PR code. Return 'No' (as a string) if there are no TODO comments in the PR"
+    todo_summary: str = Field(description="Up to 6 words summarizing the functional areas of the TODO comments found in the code.")
+{%- endif %}
 {%- if require_can_be_split_review %}
    can_be_split: List[SubPR] = Field(min_items=0, max_items=3, description="Can this PR, which contains {{ num_pr_files }} changed files in total, be divided into smaller sub-PRs with distinct tasks that can be reviewed and merged independently, regardless of the order ? Make sure that the sub-PRs are indeed independent, with no code dependencies between them, and that each sub-PR represent a meaningful independent task. Output an empty list if the PR code does not need to be split.")
 {%- endif %}
@ -148,6 +159,12 @@ review:
    - ...
  security_concerns: |
    No
+{%- if require_todo_scan %}
+  todo_sections: |
+    No
+  todo_summary: |
+    ...
+{%- endif %} 
 {%- if require_can_be_split_review %}
  can_be_split:
  - relevant_files:
@ -266,6 +283,12 @@ review:
    - ...
  security_concerns: |
    No
+{%- if require_todo_scan %}
+  todo_sections: |
+    No
+  todo_summary: |
+    ...
+{%- endif %}
 {%- if require_can_be_split_review %}
  can_be_split:
  - relevant_files:
--- a/pr_agent/tools/pr_reviewer.py
+++ b/pr_agent/tools/pr_reviewer.py
@ -87,6 +87,7 @@ class PRReviewer:
            "require_estimate_effort_to_review": get_settings().pr_reviewer.require_estimate_effort_to_review,
            'require_can_be_split_review': get_settings().pr_reviewer.require_can_be_split_review,
            'require_security_review': get_settings().pr_reviewer.require_security_review,
+            'require_todo_scan': get_settings().pr_reviewer.require_todo_scan,
            'question_str': question_str,
            'answer_str': answer_str,
            "extra_instructions": get_settings().pr_reviewer.extra_instructions,