Merge pull request #756 from Codium-ai/tr/split

can PR be split feature
This commit is contained in:
Tal
2024-03-17 01:03:06 -07:00
committed by GitHub
9 changed files with 77 additions and 12 deletions

View File

@ -32,6 +32,12 @@ Making pull requests less painful with an AI agent
## News and Updates ## News and Updates
### Jan 17, 2024
- A new feature is now available for the review tool: [`require_can_be_split_review`](https://pr-agent-docs.codium.ai/tools/review/#enabledisable-features).
If set to true, the tool will add a section that checks if the PR contains several themes, and can be split into smaller PRs.
<kbd><img src="https://codium.ai/images/pr_agent/multiple_pr_themes.png" width="512"></kbd>
### Jan 10, 2024 ### Jan 10, 2024
- A new [knowledge-base website](https://pr-agent-docs.codium.ai/) for PR-Agent is now available. It includes detailed information about the different tools, usage guides and more, in an accessible and organized format. - A new [knowledge-base website](https://pr-agent-docs.codium.ai/) for PR-Agent is now available. It includes detailed information about the different tools, usage guides and more, in an accessible and organized format.

View File

@ -25,10 +25,10 @@ To edit [configurations](https://github.com/Codium-ai/pr-agent/blob/main/pr_agen
- `extra_instructions`: Optional extra instructions to the tool. For example: "focus on the changes in the file X. Ignore change in ...". - `extra_instructions`: Optional extra instructions to the tool. For example: "focus on the changes in the file X. Ignore change in ...".
#### Enable\\disable features #### Enable\\disable features
- `require_focused_review`: if set to true, the tool will add a section - 'is the PR a focused one'. Default is false.
- `require_score_review`: if set to true, the tool will add a section that scores the PR. Default is false. - `require_score_review`: if set to true, the tool will add a section that scores the PR. Default is false.
- `require_tests_review`: if set to true, the tool will add a section that checks if the PR contains tests. Default is true. - `require_tests_review`: if set to true, the tool will add a section that checks if the PR contains tests. Default is true.
- `require_estimate_effort_to_review`: if set to true, the tool will add a section that estimates the effort needed to review the PR. Default is true. - `require_estimate_effort_to_review`: if set to true, the tool will add a section that estimates the effort needed to review the PR. Default is true.
- `require_can_be_split_review`: if set to true, the tool will add a section that checks if the PR contains several themes, and can be split into smaller PRs. Default is false.
#### SOC2 ticket compliance 💎 #### SOC2 ticket compliance 💎
> This feature is available only in PR-Agent Pro > This feature is available only in PR-Agent Pro

View File

@ -70,6 +70,7 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
""" """
emojis = { emojis = {
"Can be split": "🔀",
"Possible issues": "🔍", "Possible issues": "🔍",
"Score": "🏅", "Score": "🏅",
"Relevant tests": "🧪", "Relevant tests": "🧪",
@ -94,7 +95,8 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
for key, value in output_data['review'].items(): for key, value in output_data['review'].items():
if value is None or value == '' or value == {} or value == []: if value is None or value == '' or value == {} or value == []:
continue if key.lower() != 'can_be_split':
continue
key_nice = key.replace('_', ' ').capitalize() key_nice = key.replace('_', ' ').capitalize()
emoji = emojis.get(key_nice, "") emoji = emojis.get(key_nice, "")
if gfm_supported: if gfm_supported:
@ -103,6 +105,8 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
if 'security concerns' in key_nice.lower(): if 'security concerns' in key_nice.lower():
value = emphasize_header(value.strip()) value = emphasize_header(value.strip())
markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n\n{value}\n\n</td></tr>\n" markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n\n{value}\n\n</td></tr>\n"
elif 'can be split' in key_nice.lower():
markdown_text += process_can_be_split(emoji, value)
elif 'possible issues' in key_nice.lower(): elif 'possible issues' in key_nice.lower():
value = value.strip() value = value.strip()
issues = value.split('\n- ') issues = value.split('\n- ')
@ -154,6 +158,38 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
return markdown_text return markdown_text
def process_can_be_split(emoji, value):
# key_nice = "Can this PR be split?"
key_nice = "Multiple PR themes"
markdown_text = ""
if not value or isinstance(value, list) and len(value) == 1:
value = "No"
markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n\n{value}\n\n</td></tr>\n"
else:
number_of_splits = len(value)
markdown_text += f"<tr><td rowspan={number_of_splits}> {emoji}&nbsp;<strong>{key_nice}</strong></td>\n"
for i, split in enumerate(value):
title = split.get('title', '')
relevant_files = split.get('relevant_files', [])
if i == 0:
markdown_text += f"<td><details><summary>\nSub-PR theme: <strong>{title}</strong></summary>\n\n"
markdown_text += f"<hr>\n"
markdown_text += f"Relevant files:\n"
markdown_text += f"<ul>\n"
for file in relevant_files:
markdown_text += f"<li>{file}</li>\n"
markdown_text += f"</ul>\n\n</details></td></tr>\n"
else:
markdown_text += f"<tr>\n<td><details><summary>\nSub-PR theme: <strong>{title}</strong></summary>\n\n"
markdown_text += f"<hr>\n"
markdown_text += f"Relevant files:\n"
markdown_text += f"<ul>\n"
for file in relevant_files:
markdown_text += f"<li>{file}</li>\n"
markdown_text += f"</ul>\n\n</details></td></tr>\n"
return markdown_text
def parse_code_suggestion(code_suggestion: dict, i: int = 0, gfm_supported: bool = True) -> str: def parse_code_suggestion(code_suggestion: dict, i: int = 0, gfm_supported: bool = True) -> str:
""" """
Convert a dictionary of data into markdown format. Convert a dictionary of data into markdown format.

View File

@ -197,6 +197,12 @@ class GitProvider(ABC):
def calc_pr_statistics(self, pull_request_data: dict): def calc_pr_statistics(self, pull_request_data: dict):
return {} return {}
def get_num_of_files(self):
try:
return len(self.get_diff_files())
except Exception as e:
return -1
def get_main_pr_language(languages, files) -> str: def get_main_pr_language(languages, files) -> str:
""" """
@ -266,6 +272,7 @@ def get_main_pr_language(languages, files) -> str:
return main_language_str return main_language_str
class IncrementalPR: class IncrementalPR:
def __init__(self, is_incremental: bool = False): def __init__(self, is_incremental: bool = False):
self.is_incremental = is_incremental self.is_incremental = is_incremental

View File

@ -114,6 +114,11 @@ class GithubProvider(GitProvider):
self.git_files = self.pr.get_files() self.git_files = self.pr.get_files()
return self.git_files return self.git_files
def get_num_of_files(self):
if self.git_files:
return self.git_files.totalCount
else:
return -1
@retry(exceptions=RateLimitExceeded, @retry(exceptions=RateLimitExceeded,
tries=get_settings().github.ratelimit_retries, delay=2, backoff=2, jitter=(1, 3)) tries=get_settings().github.ratelimit_retries, delay=2, backoff=2, jitter=(1, 3))

View File

@ -95,7 +95,7 @@ The `review` tool can auto-generate two specific types of labels for a PR:
The `review` tool provides a collection of possible feedbacks about a PR. The `review` tool provides a collection of possible feedbacks about a PR.
It is recommended to review the [possible options](https://pr-agent-docs.codium.ai/tools/review/#enabledisable-features), and choose the ones relevant for your use case. It is recommended to review the [possible options](https://pr-agent-docs.codium.ai/tools/review/#enabledisable-features), and choose the ones relevant for your use case.
Some of the feature that are disabled by default are quite useful, and should be considered for enabling. For example: Some of the feature that are disabled by default are quite useful, and should be considered for enabling. For example:
`require_score_review`, `require_soc2_ticket`, and more. `require_score_review`, `require_soc2_ticket`, `require_can_be_split_review`, and more.
""" """
output += "\n\n</details></td></tr>\n\n" output += "\n\n</details></td></tr>\n\n"

View File

@ -22,10 +22,10 @@ ai_disclaimer="" # Pro feature, full text for the AI disclaimer
[pr_reviewer] # /review # [pr_reviewer] # /review #
# enable/disable features # enable/disable features
require_focused_review=false
require_score_review=false require_score_review=false
require_tests_review=true require_tests_review=true
require_estimate_effort_to_review=true require_estimate_effort_to_review=true
require_can_be_split_review=false
# soc2 # soc2
require_soc2_ticket=false require_soc2_ticket=false
soc2_ticket_prompt="Does the PR description include a link to ticket in a project management system (e.g., Jira, Asana, Trello, etc.) ?" soc2_ticket_prompt="Does the PR description include a link to ticket in a project management system (e.g., Jira, Asana, Trello, etc.) ?"

View File

@ -49,6 +49,12 @@ Extra instructions from the user:
The output must be a YAML object equivalent to type $PRReview, according to the following Pydantic definitions: The output must be a YAML object equivalent to type $PRReview, according to the following Pydantic definitions:
===== =====
{%- if require_can_be_split_review %}
class SubPR(BaseModel):
relevant_files: List[str] = Field(description="The relevant files of the sub-PR")
title: str = Field(description="Short and concise title for an independent and meaningful sub-PR, composed only from the relevant files")
{%- endif %}
class Review(BaseModel): class Review(BaseModel):
{%- if require_estimate_effort_to_review %} {%- if require_estimate_effort_to_review %}
estimated_effort_to_review_[1-5]: str = Field(description="Estimate, on a scale of 1-5 (inclusive), the time and effort required to review this PR by an experienced and knowledgeable developer. 1 means short and easy review , 5 means long and hard review. Take into account the size, complexity, quality, and the needed changes of the PR code diff. Explain your answer in a short and concise manner.") estimated_effort_to_review_[1-5]: str = Field(description="Estimate, on a scale of 1-5 (inclusive), the time and effort required to review this PR by an experienced and knowledgeable developer. 1 means short and easy review , 5 means long and hard review. Take into account the size, complexity, quality, and the needed changes of the PR code diff. Explain your answer in a short and concise manner.")
@ -61,12 +67,12 @@ class Review(BaseModel):
{%- endif %} {%- endif %}
{%- if question_str %} {%- if question_str %}
insights_from_user_answers: str = Field(description="shortly summarize the insights you gained from the user's answers to the questions") insights_from_user_answers: str = Field(description="shortly summarize the insights you gained from the user's answers to the questions")
{%- endif %}
{%- if require_focused %}
focused_pr: str = Field(description="Is this a focused PR, in the sense that all the PR code diff changes are united under a single focused theme ? If the theme is too broad, or the PR code diff changes are too scattered, then the PR is not focused. Explain your answer shortly.")
{%- endif %} {%- endif %}
possible_issues: str = Field(description="Does this PR code introduce clear issues, bugs, or major performance concerns? If there are no apparent issues, respond with 'No'. If there are any issues, describe them briefly. Use bullet points if more than one issue. Be specific, and provide examples if possible. Start each bullet point with a short specific header, such as: "- Possible Bug: ...", etc.") possible_issues: str = Field(description="Does this PR code introduce clear issues, bugs, or major performance concerns? If there are no apparent issues, respond with 'No'. If there are any issues, describe them briefly. Use bullet points if more than one issue. Be specific, and provide examples if possible. Start each bullet point with a short specific header, such as: "- Possible Bug: ...", etc.")
security_concerns: str = Field(description="does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible") security_concerns: str = Field(description="does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible")
{%- if require_can_be_split_review %}
can_be_split: List[SubPR] = Field(min_items=0, max_items=3, description="Can this PR, which contains {{ num_pr_files }} changed files in total, be divided into smaller sub-PRs with distinct tasks that can be reviewed and merged independently, regardless of the order ? Make sure that the sub-PRs are indeed independent, with no code dependencies between them, and that each sub-PR represent a meaningfull independent task. Output an empty list if the PR code does not needd to be split.")
{%- endif %}
{%- if num_code_suggestions > 0 %} {%- if num_code_suggestions > 0 %}
class CodeSuggestion(BaseModel): class CodeSuggestion(BaseModel):
@ -100,14 +106,18 @@ review:
{%- endif %} {%- endif %}
relevant_tests: | relevant_tests: |
No No
{%- if require_focused %}
focused_pr: |
no, because ...
{%- endif %}
possible_issues: | possible_issues: |
No No
security_concerns: | security_concerns: |
No No
{%- if require_can_be_split_review %}
can_be_split: |
- relevant_files:
- ...
- ...
title: ...
- ...
{%- endif %}
{%- if num_code_suggestions > 0 %} {%- if num_code_suggestions > 0 %}
code_feedback code_feedback
- relevant_file: | - relevant_file: |

View File

@ -58,10 +58,11 @@ class PRReviewer:
"description": self.git_provider.get_pr_description(), "description": self.git_provider.get_pr_description(),
"language": self.main_language, "language": self.main_language,
"diff": "", # empty diff for initial calculation "diff": "", # empty diff for initial calculation
"num_pr_files": self.git_provider.get_num_of_files(),
"require_score": get_settings().pr_reviewer.require_score_review, "require_score": get_settings().pr_reviewer.require_score_review,
"require_tests": get_settings().pr_reviewer.require_tests_review, "require_tests": get_settings().pr_reviewer.require_tests_review,
"require_focused": get_settings().pr_reviewer.require_focused_review,
"require_estimate_effort_to_review": get_settings().pr_reviewer.require_estimate_effort_to_review, "require_estimate_effort_to_review": get_settings().pr_reviewer.require_estimate_effort_to_review,
'require_can_be_split_review': get_settings().pr_reviewer.require_can_be_split_review,
'num_code_suggestions': get_settings().pr_reviewer.num_code_suggestions, 'num_code_suggestions': get_settings().pr_reviewer.num_code_suggestions,
'question_str': question_str, 'question_str': question_str,
'answer_str': answer_str, 'answer_str': answer_str,