Merge pull request #756 from Codium-ai/tr/split

can PR be split feature
2025-07-21 04:50:39 +08:00 · 2024-03-17 01:03:06 -07:00
parent 1e504fc9c6 ac2c062190
commit 11fed1e22d
9 changed files with 77 additions and 12 deletions
--- a/README.md
+++ b/README.md
@ -32,6 +32,12 @@ Making pull requests less painful with an AI agent
 ## News and Updates
 ### Jan 17, 2024
 - A new feature is now available for the review tool: [`require_can_be_split_review`](https://pr-agent-docs.codium.ai/tools/review/#enabledisable-features). 
 If set to true, the tool will add a section that checks if the PR contains several themes, and can be split into smaller PRs.
 <kbd><img src="https://codium.ai/images/pr_agent/multiple_pr_themes.png" width="512"></kbd>
 ### Jan 10, 2024
 - A new [knowledge-base website](https://pr-agent-docs.codium.ai/) for PR-Agent is now available. It includes detailed information about the different tools, usage guides and more, in an accessible and organized format.
--- a/docs/docs/tools/review.md
+++ b/docs/docs/tools/review.md
@ -25,10 +25,10 @@ To edit [configurations](https://github.com/Codium-ai/pr-agent/blob/main/pr_agen
 - `extra_instructions`: Optional extra instructions to the tool. For example: "focus on the changes in the file X. Ignore change in ...".
 #### Enable\\disable features
 - `require_focused_review`: if set to true, the tool will add a section - 'is the PR a focused one'. Default is false.
 - `require_score_review`: if set to true, the tool will add a section that scores the PR. Default is false.
 - `require_tests_review`: if set to true, the tool will add a section that checks if the PR contains tests. Default is true.
 - `require_estimate_effort_to_review`: if set to true, the tool will add a section that estimates the effort needed to review the PR. Default is true.
 - `require_can_be_split_review`: if set to true, the tool will add a section that checks if the PR contains several themes, and can be split into smaller PRs. Default is false.
 #### SOC2 ticket compliance 💎
 > This feature is available only in PR-Agent Pro 
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@ -70,6 +70,7 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
    """    
    emojis = {
        "Can be split": "🔀",
        "Possible issues": "🔍",
        "Score": "🏅",
        "Relevant tests": "🧪",
@ -94,7 +95,8 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
    for key, value in output_data['review'].items():
        if value is None or value == '' or value == {} or value == []:
-            continue
+            if key.lower() != 'can_be_split':
                continue
        key_nice = key.replace('_', ' ').capitalize()
        emoji = emojis.get(key_nice, "")
        if gfm_supported:
@ -103,6 +105,8 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
            if 'security concerns' in key_nice.lower():
                value = emphasize_header(value.strip())
                markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n\n{value}\n\n</td></tr>\n"
            elif 'can be split' in key_nice.lower():
                markdown_text += process_can_be_split(emoji, value)
            elif 'possible issues' in key_nice.lower():
                value = value.strip()
                issues = value.split('\n- ')
@ -154,6 +158,38 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
    return markdown_text
 def process_can_be_split(emoji, value):
    # key_nice = "Can this PR be split?"
    key_nice = "Multiple PR themes"
    markdown_text = ""
    if not value or isinstance(value, list) and len(value) == 1:
        value = "No"
        markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n\n{value}\n\n</td></tr>\n"
    else:
        number_of_splits = len(value)
        markdown_text += f"<tr><td rowspan={number_of_splits}> {emoji}&nbsp;<strong>{key_nice}</strong></td>\n"
        for i, split in enumerate(value):
            title = split.get('title', '')
            relevant_files = split.get('relevant_files', [])
            if i == 0:
                markdown_text += f"<td><details><summary>\nSub-PR theme: <strong>{title}</strong></summary>\n\n"
                markdown_text += f"<hr>\n"
                markdown_text += f"Relevant files:\n"
                markdown_text += f"<ul>\n"
                for file in relevant_files:
                    markdown_text += f"<li>{file}</li>\n"
                markdown_text += f"</ul>\n\n</details></td></tr>\n"
            else:
                markdown_text += f"<tr>\n<td><details><summary>\nSub-PR theme: <strong>{title}</strong></summary>\n\n"
                markdown_text += f"<hr>\n"
                markdown_text += f"Relevant files:\n"
                markdown_text += f"<ul>\n"
                for file in relevant_files:
                    markdown_text += f"<li>{file}</li>\n"
                markdown_text += f"</ul>\n\n</details></td></tr>\n"
    return markdown_text
 def parse_code_suggestion(code_suggestion: dict, i: int = 0, gfm_supported: bool = True) -> str:
    """
    Convert a dictionary of data into markdown format.
--- a/pr_agent/git_providers/git_provider.py
+++ b/pr_agent/git_providers/git_provider.py
@ -197,6 +197,12 @@ class GitProvider(ABC):
    def calc_pr_statistics(self, pull_request_data: dict):
        return {}
    def get_num_of_files(self):
        try:
            return len(self.get_diff_files())
        except Exception as e:
            return -1
 def get_main_pr_language(languages, files) -> str:
    """
@ -266,6 +272,7 @@ def get_main_pr_language(languages, files) -> str:
    return main_language_str
 class IncrementalPR:
    def __init__(self, is_incremental: bool = False):
        self.is_incremental = is_incremental
--- a/pr_agent/git_providers/github_provider.py
+++ b/pr_agent/git_providers/github_provider.py
@ -114,6 +114,11 @@ class GithubProvider(GitProvider):
                self.git_files = self.pr.get_files()
            return self.git_files
    def get_num_of_files(self):
        if self.git_files:
            return self.git_files.totalCount
        else:
            return -1
    @retry(exceptions=RateLimitExceeded,
           tries=get_settings().github.ratelimit_retries, delay=2, backoff=2, jitter=(1, 3))
--- a/pr_agent/servers/help.py
+++ b/pr_agent/servers/help.py
@ -95,7 +95,7 @@ The `review` tool can auto-generate two specific types of labels for a PR:
 The `review` tool provides a collection of possible feedbacks about a PR.
 It is recommended to review the [possible options](https://pr-agent-docs.codium.ai/tools/review/#enabledisable-features), and choose the ones relevant for your use case.
 Some of the feature that are disabled by default are quite useful, and should be considered for enabling. For example: 
-`require_score_review`, `require_soc2_ticket`, and more.
+`require_score_review`, `require_soc2_ticket`, `require_can_be_split_review`, and more.
 """
        output += "\n\n</details></td></tr>\n\n"
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@ -22,10 +22,10 @@ ai_disclaimer=""  # Pro feature, full text for the AI disclaimer
 [pr_reviewer] # /review #
 # enable/disable features
 require_focused_review=false
 require_score_review=false
 require_tests_review=true
 require_estimate_effort_to_review=true
 require_can_be_split_review=false
 # soc2
 require_soc2_ticket=false
 soc2_ticket_prompt="Does the PR description include a link to ticket in a project management system (e.g., Jira, Asana, Trello, etc.) ?"
--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@ -49,6 +49,12 @@ Extra instructions from the user:
 The output must be a YAML object equivalent to type $PRReview, according to the following Pydantic definitions:
 =====
 {%- if require_can_be_split_review %}
 class SubPR(BaseModel):
    relevant_files: List[str] = Field(description="The relevant files of the sub-PR")
    title: str = Field(description="Short and concise title for an independent and meaningful sub-PR, composed only from the relevant files")
 {%- endif %}
 class Review(BaseModel):
 {%- if require_estimate_effort_to_review %}
    estimated_effort_to_review_[1-5]: str = Field(description="Estimate, on a scale of 1-5 (inclusive), the time and effort required to review this PR by an experienced and knowledgeable developer. 1 means short and easy review , 5 means long and hard review. Take into account the size, complexity, quality, and the needed changes of the PR code diff. Explain your answer in a short and concise manner.")
@ -61,12 +67,12 @@ class Review(BaseModel):
 {%- endif %}
 {%- if question_str %}
    insights_from_user_answers: str = Field(description="shortly summarize the insights you gained from the user's answers to the questions")
 {%- endif %}
 {%- if require_focused %}
    focused_pr: str = Field(description="Is this a focused PR, in the sense that all the PR code diff changes are united under a single focused theme ? If the theme is too broad, or the PR code diff changes are too scattered, then the PR is not focused. Explain your answer shortly.")
 {%- endif %}
    possible_issues: str = Field(description="Does this PR code introduce clear issues, bugs, or major performance concerns? If there are no apparent issues, respond with 'No'. If there are any issues, describe them briefly. Use bullet points if more than one issue. Be specific, and provide examples if possible. Start each bullet point with a short specific header, such as: "- Possible Bug: ...", etc.")
    security_concerns: str = Field(description="does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible")
 {%- if require_can_be_split_review %}
    can_be_split: List[SubPR] = Field(min_items=0, max_items=3, description="Can this PR, which contains {{ num_pr_files }} changed files in total, be divided into smaller sub-PRs with distinct tasks that can be reviewed and merged independently, regardless of the order ? Make sure that the sub-PRs are indeed independent, with no code dependencies between them, and that each sub-PR represent a meaningfull independent task. Output an empty list if the PR code does not needd to be split.")
 {%- endif %}
 {%- if num_code_suggestions > 0 %}
 class CodeSuggestion(BaseModel):
@ -100,14 +106,18 @@ review:
 {%- endif %}
  relevant_tests: |
    No
 {%- if require_focused %}
  focused_pr: |
    no, because ...
 {%- endif %}
  possible_issues: |
    No
  security_concerns: |
    No
 {%- if require_can_be_split_review %}
    can_be_split: |
    - relevant_files:
      - ...
      - ...
     title: ...
    - ...
 {%- endif %}
 {%- if num_code_suggestions > 0 %}
 code_feedback
 - relevant_file: |
--- a/pr_agent/tools/pr_reviewer.py
+++ b/pr_agent/tools/pr_reviewer.py
@ -58,10 +58,11 @@ class PRReviewer:
            "description": self.git_provider.get_pr_description(),
            "language": self.main_language,
            "diff": "",  # empty diff for initial calculation
            "num_pr_files": self.git_provider.get_num_of_files(),
            "require_score": get_settings().pr_reviewer.require_score_review,
            "require_tests": get_settings().pr_reviewer.require_tests_review,
            "require_focused": get_settings().pr_reviewer.require_focused_review,
            "require_estimate_effort_to_review": get_settings().pr_reviewer.require_estimate_effort_to_review,
            'require_can_be_split_review': get_settings().pr_reviewer.require_can_be_split_review,
            'num_code_suggestions': get_settings().pr_reviewer.num_code_suggestions,
            'question_str': question_str,
            'answer_str': answer_str,