diff --git a/README.md b/README.md
index da5159c4..ad3c0407 100644
--- a/README.md
+++ b/README.md
@@ -32,6 +32,12 @@ Making pull requests less painful with an AI agent
## News and Updates
+### Jan 17, 2024
+- A new feature is now available for the review tool: [`require_can_be_split_review`](https://pr-agent-docs.codium.ai/tools/review/#enabledisable-features).
+If set to true, the tool will add a section that checks if the PR contains several themes, and can be split into smaller PRs.
+
+
+
### Jan 10, 2024
- A new [knowledge-base website](https://pr-agent-docs.codium.ai/) for PR-Agent is now available. It includes detailed information about the different tools, usage guides and more, in an accessible and organized format.
diff --git a/docs/docs/tools/review.md b/docs/docs/tools/review.md
index 5094f8f3..ce00a6fe 100644
--- a/docs/docs/tools/review.md
+++ b/docs/docs/tools/review.md
@@ -25,10 +25,10 @@ To edit [configurations](https://github.com/Codium-ai/pr-agent/blob/main/pr_agen
- `extra_instructions`: Optional extra instructions to the tool. For example: "focus on the changes in the file X. Ignore change in ...".
#### Enable\\disable features
-- `require_focused_review`: if set to true, the tool will add a section - 'is the PR a focused one'. Default is false.
- `require_score_review`: if set to true, the tool will add a section that scores the PR. Default is false.
- `require_tests_review`: if set to true, the tool will add a section that checks if the PR contains tests. Default is true.
- `require_estimate_effort_to_review`: if set to true, the tool will add a section that estimates the effort needed to review the PR. Default is true.
+- `require_can_be_split_review`: if set to true, the tool will add a section that checks if the PR contains several themes, and can be split into smaller PRs. Default is false.
#### SOC2 ticket compliance ๐
> This feature is available only in PR-Agent Pro
diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py
index dede9850..c2b6323c 100644
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@@ -70,6 +70,7 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
"""
emojis = {
+ "Can be split": "๐",
"Possible issues": "๐",
"Score": "๐
",
"Relevant tests": "๐งช",
@@ -94,7 +95,8 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
for key, value in output_data['review'].items():
if value is None or value == '' or value == {} or value == []:
- continue
+ if key.lower() != 'can_be_split':
+ continue
key_nice = key.replace('_', ' ').capitalize()
emoji = emojis.get(key_nice, "")
if gfm_supported:
@@ -103,6 +105,8 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
if 'security concerns' in key_nice.lower():
value = emphasize_header(value.strip())
markdown_text += f"
{emoji} {key_nice} | \n\n{value}\n\n |
\n"
+ elif 'can be split' in key_nice.lower():
+ markdown_text += process_can_be_split(emoji, value)
elif 'possible issues' in key_nice.lower():
value = value.strip()
issues = value.split('\n- ')
@@ -154,6 +158,38 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
return markdown_text
+def process_can_be_split(emoji, value):
+ # key_nice = "Can this PR be split?"
+ key_nice = "Multiple PR themes"
+ markdown_text = ""
+ if not value or isinstance(value, list) and len(value) == 1:
+ value = "No"
+ markdown_text += f" {emoji} {key_nice} | \n\n{value}\n\n |
\n"
+ else:
+ number_of_splits = len(value)
+ markdown_text += f" {emoji} {key_nice} | \n"
+ for i, split in enumerate(value):
+ title = split.get('title', '')
+ relevant_files = split.get('relevant_files', [])
+ if i == 0:
+ markdown_text += f"\nSub-PR theme: {title}\n\n"
+ markdown_text += f" \n"
+ markdown_text += f"Relevant files:\n"
+ markdown_text += f"\n"
+ for file in relevant_files:
+ markdown_text += f"- {file}
\n"
+ markdown_text += f" \n\n |
\n"
+ else:
+ markdown_text += f"\n\nSub-PR theme: {title}\n\n"
+ markdown_text += f" \n"
+ markdown_text += f"Relevant files:\n"
+ markdown_text += f"\n"
+ for file in relevant_files:
+ markdown_text += f"- {file}
\n"
+ markdown_text += f" \n\n |
\n"
+ return markdown_text
+
+
def parse_code_suggestion(code_suggestion: dict, i: int = 0, gfm_supported: bool = True) -> str:
"""
Convert a dictionary of data into markdown format.
diff --git a/pr_agent/git_providers/git_provider.py b/pr_agent/git_providers/git_provider.py
index 9eb09a1a..a7d772b7 100644
--- a/pr_agent/git_providers/git_provider.py
+++ b/pr_agent/git_providers/git_provider.py
@@ -197,6 +197,12 @@ class GitProvider(ABC):
def calc_pr_statistics(self, pull_request_data: dict):
return {}
+ def get_num_of_files(self):
+ try:
+ return len(self.get_diff_files())
+ except Exception as e:
+ return -1
+
def get_main_pr_language(languages, files) -> str:
"""
@@ -266,6 +272,7 @@ def get_main_pr_language(languages, files) -> str:
return main_language_str
+
class IncrementalPR:
def __init__(self, is_incremental: bool = False):
self.is_incremental = is_incremental
diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py
index b6d04bc0..de54ec3f 100644
--- a/pr_agent/git_providers/github_provider.py
+++ b/pr_agent/git_providers/github_provider.py
@@ -114,6 +114,11 @@ class GithubProvider(GitProvider):
self.git_files = self.pr.get_files()
return self.git_files
+ def get_num_of_files(self):
+ if self.git_files:
+ return self.git_files.totalCount
+ else:
+ return -1
@retry(exceptions=RateLimitExceeded,
tries=get_settings().github.ratelimit_retries, delay=2, backoff=2, jitter=(1, 3))
diff --git a/pr_agent/servers/help.py b/pr_agent/servers/help.py
index 1168c743..835ad8b9 100644
--- a/pr_agent/servers/help.py
+++ b/pr_agent/servers/help.py
@@ -95,7 +95,7 @@ The `review` tool can auto-generate two specific types of labels for a PR:
The `review` tool provides a collection of possible feedbacks about a PR.
It is recommended to review the [possible options](https://pr-agent-docs.codium.ai/tools/review/#enabledisable-features), and choose the ones relevant for your use case.
Some of the feature that are disabled by default are quite useful, and should be considered for enabling. For example:
-`require_score_review`, `require_soc2_ticket`, and more.
+`require_score_review`, `require_soc2_ticket`, `require_can_be_split_review`, and more.
"""
output += "\n\n\n\n"
diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml
index b5a0dab1..3038f3df 100644
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@@ -22,10 +22,10 @@ ai_disclaimer="" # Pro feature, full text for the AI disclaimer
[pr_reviewer] # /review #
# enable/disable features
-require_focused_review=false
require_score_review=false
require_tests_review=true
require_estimate_effort_to_review=true
+require_can_be_split_review=false
# soc2
require_soc2_ticket=false
soc2_ticket_prompt="Does the PR description include a link to ticket in a project management system (e.g., Jira, Asana, Trello, etc.) ?"
diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml
index 16897292..aeeef5f8 100644
--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@@ -49,6 +49,12 @@ Extra instructions from the user:
The output must be a YAML object equivalent to type $PRReview, according to the following Pydantic definitions:
=====
+{%- if require_can_be_split_review %}
+class SubPR(BaseModel):
+ relevant_files: List[str] = Field(description="The relevant files of the sub-PR")
+ title: str = Field(description="Short and concise title for an independent and meaningful sub-PR, composed only from the relevant files")
+{%- endif %}
+
class Review(BaseModel):
{%- if require_estimate_effort_to_review %}
estimated_effort_to_review_[1-5]: str = Field(description="Estimate, on a scale of 1-5 (inclusive), the time and effort required to review this PR by an experienced and knowledgeable developer. 1 means short and easy review , 5 means long and hard review. Take into account the size, complexity, quality, and the needed changes of the PR code diff. Explain your answer in a short and concise manner.")
@@ -61,12 +67,12 @@ class Review(BaseModel):
{%- endif %}
{%- if question_str %}
insights_from_user_answers: str = Field(description="shortly summarize the insights you gained from the user's answers to the questions")
-{%- endif %}
-{%- if require_focused %}
- focused_pr: str = Field(description="Is this a focused PR, in the sense that all the PR code diff changes are united under a single focused theme ? If the theme is too broad, or the PR code diff changes are too scattered, then the PR is not focused. Explain your answer shortly.")
{%- endif %}
possible_issues: str = Field(description="Does this PR code introduce clear issues, bugs, or major performance concerns? If there are no apparent issues, respond with 'No'. If there are any issues, describe them briefly. Use bullet points if more than one issue. Be specific, and provide examples if possible. Start each bullet point with a short specific header, such as: "- Possible Bug: ...", etc.")
security_concerns: str = Field(description="does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible")
+{%- if require_can_be_split_review %}
+ can_be_split: List[SubPR] = Field(min_items=0, max_items=3, description="Can this PR, which contains {{ num_pr_files }} changed files in total, be divided into smaller sub-PRs with distinct tasks that can be reviewed and merged independently, regardless of the order ? Make sure that the sub-PRs are indeed independent, with no code dependencies between them, and that each sub-PR represent a meaningfull independent task. Output an empty list if the PR code does not needd to be split.")
+{%- endif %}
{%- if num_code_suggestions > 0 %}
class CodeSuggestion(BaseModel):
@@ -100,14 +106,18 @@ review:
{%- endif %}
relevant_tests: |
No
-{%- if require_focused %}
- focused_pr: |
- no, because ...
-{%- endif %}
possible_issues: |
No
security_concerns: |
No
+{%- if require_can_be_split_review %}
+ can_be_split: |
+ - relevant_files:
+ - ...
+ - ...
+ title: ...
+ - ...
+{%- endif %}
{%- if num_code_suggestions > 0 %}
code_feedback
- relevant_file: |
diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py
index f0475b82..19d1bb94 100644
--- a/pr_agent/tools/pr_reviewer.py
+++ b/pr_agent/tools/pr_reviewer.py
@@ -58,10 +58,11 @@ class PRReviewer:
"description": self.git_provider.get_pr_description(),
"language": self.main_language,
"diff": "", # empty diff for initial calculation
+ "num_pr_files": self.git_provider.get_num_of_files(),
"require_score": get_settings().pr_reviewer.require_score_review,
"require_tests": get_settings().pr_reviewer.require_tests_review,
- "require_focused": get_settings().pr_reviewer.require_focused_review,
"require_estimate_effort_to_review": get_settings().pr_reviewer.require_estimate_effort_to_review,
+ 'require_can_be_split_review': get_settings().pr_reviewer.require_can_be_split_review,
'num_code_suggestions': get_settings().pr_reviewer.num_code_suggestions,
'question_str': question_str,
'answer_str': answer_str,