diff --git a/README.md b/README.md
index cdbc5f72..d6ab33b3 100644
--- a/README.md
+++ b/README.md
@@ -43,6 +43,12 @@ CodiumAI PR-Agent aims to help efficiently review and handle pull requests, by p
## News and Updates
+### October 10, 2024
+New ability for the `review` tool - **ticket compliance feedback**. If the PR contains a ticket number, PR-Agent will check if the PR code actually complies with the ticket requirements.
+
+
+
+
### September 21, 2024
Need help with PR-Agent? New feature - simply comment `/help "your question"` in a pull request, and PR-Agent will provide you with the [relevant documentation](https://github.com/Codium-ai/pr-agent/pull/1241#issuecomment-2365259334).
@@ -93,7 +99,6 @@ Supported commands per platform:
|-------|---------------------------------------------------------------------------------------------------------|:--------------------:|:--------------------:|:--------------------:|:------------:|
| TOOLS | Review | ✅ | ✅ | ✅ | ✅ |
| | ⮑ Incremental | ✅ | | | |
-| | ⮑ [SOC2 Compliance](https://pr-agent-docs.codium.ai/tools/review/#soc2-ticket-compliance) 💎 | ✅ | ✅ | ✅ | |
| | Describe | ✅ | ✅ | ✅ | ✅ |
| | ⮑ [Inline File Summary](https://pr-agent-docs.codium.ai/tools/describe#inline-file-summary) 💎 | ✅ | | | |
| | Improve | ✅ | ✅ | ✅ | ✅ |
diff --git a/docs/docs/index.md b/docs/docs/index.md
index 3bc1ed4b..e3f7518f 100644
--- a/docs/docs/index.md
+++ b/docs/docs/index.md
@@ -29,7 +29,6 @@ Qodo Merge offers extensive pull request functionalities across various git prov
|-------|-----------------------------------------------------------------------------------------------------------------------|:------:|:------:|:---------:|:------------:|
| TOOLS | Review | ✅ | ✅ | ✅ | ✅ |
| | ⮑ Incremental | ✅ | | | |
-| | ⮑ [SOC2 Compliance](https://qodo-merge-docs.qodo.ai/tools/review/#soc2-ticket-compliance){:target="_blank"} 💎 | ✅ | ✅ | ✅ | |
| | Ask | ✅ | ✅ | ✅ | ✅ |
| | Describe | ✅ | ✅ | ✅ | ✅ |
| | ⮑ [Inline file summary](https://qodo-merge-docs.qodo.ai/tools/describe/#inline-file-summary){:target="_blank"} 💎 | ✅ | ✅ | | |
diff --git a/docs/docs/overview/index.md b/docs/docs/overview/index.md
index d4330a95..f56783bc 100644
--- a/docs/docs/overview/index.md
+++ b/docs/docs/overview/index.md
@@ -29,7 +29,6 @@ Qodo Merge offers extensive pull request functionalities across various git prov
|-------|-----------------------------------------------------------------------------------------------------------------------|:------:|:------:|:---------:|:------------:|
| TOOLS | Review | ✅ | ✅ | ✅ | ✅ |
| | ⮑ Incremental | ✅ | | | |
-| | ⮑ [SOC2 Compliance](https://qodo-merge-docs.qodo.ai/tools/review/#soc2-ticket-compliance){:target="_blank"} 💎 | ✅ | ✅ | ✅ | ✅ |
| | Ask | ✅ | ✅ | ✅ | ✅ |
| | Describe | ✅ | ✅ | ✅ | ✅ |
| | ⮑ [Inline file summary](https://qodo-merge-docs.qodo.ai/tools/describe/#inline-file-summary){:target="_blank"} 💎 | ✅ | ✅ | | ✅ |
diff --git a/docs/docs/overview/pr_agent_pro.md b/docs/docs/overview/pr_agent_pro.md
index e6e859cc..731b0573 100644
--- a/docs/docs/overview/pr_agent_pro.md
+++ b/docs/docs/overview/pr_agent_pro.md
@@ -27,7 +27,6 @@ Here are some of the additional features and capabilities that Qodo Merge Pro of
| [**Advanced usage statistics**](https://www.codium.ai/contact/#/) | Qodo Merge Pro offers detailed statistics at user, repository, and company levels, including metrics about Qodo Merge usage, and also general statistics and insights |
| [**Incorporating companies' best practices**](https://qodo-merge-docs.qodo.ai/tools/improve/#best-practices) | Use the companies' best practices as reference to increase the effectiveness and the relevance of the code suggestions |
| [**Interactive triggering**](https://qodo-merge-docs.qodo.ai/tools/analyze/#example-usage) | Interactively apply different tools via the `analyze` command |
-| [**SOC2 compliance check**](https://qodo-merge-docs.qodo.ai/tools/review/#configuration-options) | Ensures the PR contains a ticket to a project management system (e.g., Jira, Asana, Trello, etc.)
| [**Custom labels**](https://qodo-merge-docs.qodo.ai/tools/describe/#handle-custom-labels-from-the-repos-labels-page) | Define custom labels for Qodo Merge to assign to the PR |
### Additional tools
diff --git a/docs/docs/tools/review.md b/docs/docs/tools/review.md
index 16765629..2fd774f6 100644
--- a/docs/docs/tools/review.md
+++ b/docs/docs/tools/review.md
@@ -138,20 +138,9 @@ num_code_suggestions = ...
require_soc2_ticket | -If set to true, the SOC2 ticket checker sub-tool will be enabled. Default is false. | -||
soc2_ticket_prompt | -The prompt for the SOC2 ticket review. Default is: `Does the PR description include a link to ticket in a project management system (e.g., Jira, Asana, Trello, etc.) ?`. Edit this field if your compliance requirements are different. | +require_ticket_analysis_review | +If set to true, and the PR contains a GitHub ticket number, the tool will add a section that checks if the PR in fact fulfilled the ticket requirements. Default is true. |
...
' in file_data:
+ pass # PR with many files. some did not get analyzed
+ else:
+ get_logger().error(f"Failed to parse description", artifact={'description': file_data})
except Exception as e:
get_logger().exception(f"Failed to process description: {e}", artifact={'description': file_data})
diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml
index add06bb6..d4ecf379 100644
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@@ -51,9 +51,7 @@ require_tests_review=true
require_estimate_effort_to_review=true
require_can_be_split_review=false
require_security_review=true
-# soc2
-require_soc2_ticket=false
-soc2_ticket_prompt="Does the PR description include a link to ticket in a project management system (e.g., Jira, Asana, Trello, etc.) ?"
+require_ticket_analysis_review=true
# general options
num_code_suggestions=0
inline_code_comments = false
diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml
index de7c3d54..364dd9af 100644
--- a/pr_agent/settings/pr_description_prompts.toml
+++ b/pr_agent/settings/pr_description_prompts.toml
@@ -78,9 +78,9 @@ pr_files:
...
...
{%- endif %}
-description: |-
+description: |
...
-title: |-
+title: |
...
{%- if enable_custom_labels %}
labels:
@@ -94,7 +94,26 @@ labels:
Answer should be a valid YAML, and nothing else. Each YAML output MUST be after a newline, with proper indent, and block scalar indicator ('|')
"""
-user="""PR Info:
+user="""
+{%- if related_tickets %}
+Related Ticket Info:
+{% for ticket in related_tickets %}
+=====
+Ticket Title: '{{ ticket.title }}'
+{%- if ticket.labels %}
+Ticket Labels: {{ ticket.labels }}
+{%- endif %}
+{%- if ticket.body %}
+Ticket Description:
+#####
+{{ ticket.body }}
+#####
+{%- endif %}
+=====
+{% endfor %}
+{%- endif %}
+
+PR Info:
Previous title: '{{title}}'
diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml
index e3b4bfe4..0b61e8ea 100644
--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@@ -85,7 +85,20 @@ class KeyIssuesComponentLink(BaseModel):
start_line: int = Field(description="The start line that corresponds to this issue in the relevant file")
end_line: int = Field(description="The end line that corresponds to this issue in the relevant file")
+{%- if related_tickets %}
+
+class TicketCompliance(BaseModel):
+ ticket_url: str = Field(description="Ticket URL or ID")
+ ticket_requirements: str = Field(description="Repeat, in your own words, all ticket requirements, in bullet points")
+ fully_compliant_requirements: str = Field(description="A list, in bullet points, of which requirements are met by the PR code. Don't explain how the requirements are met, just list them shortly. Can be empty")
+ not_compliant_requirements: str = Field(description="A list, in bullet points, of which requirements are not met by the PR code. Don't explain how the requirements are not met, just list them shortly. Can be empty")
+ overall_compliance_level: str = Field(description="Overall give this PR one of these three values in relation to the ticket: 'Fully compliant', 'Partially compliant', or 'Not compliant'")
+{%- endif %}
+
class Review(BaseModel):
+{%- if related_tickets %}
+ ticket_compliance_check: List[TicketCompliance] = Field(description="A list of compliance checks for the related tickets")
+{%- endif %}
{%- if require_estimate_effort_to_review %}
estimated_effort_to_review_[1-5]: int = Field(description="Estimate, on a scale of 1-5 (inclusive), the time and effort required to review this PR by an experienced and knowledgeable developer. 1 means short and easy review , 5 means long and hard review. Take into account the size, complexity, quality, and the needed changes of the PR code diff.")
{%- endif %}
@@ -130,6 +143,19 @@ class PRReview(BaseModel):
Example output:
```yaml
review:
+{%- if related_tickets %}
+ ticket_compliance_check:
+ - ticket_url: |
+ ...
+ ticket_requirements: |
+ ...
+ fully_compliant_requirements: |
+ ...
+ not_compliant_requirements: |
+ ...
+ overall_compliance_level: |
+ ...
+{%- endif %}
{%- if require_estimate_effort_to_review %}
estimated_effort_to_review_[1-5]: |
3
@@ -176,7 +202,33 @@ code_feedback:
Answer should be a valid YAML, and nothing else. Each YAML output MUST be after a newline, with proper indent, and block scalar indicator ('|')
"""
-user="""--PR Info--
+user="""
+{%- if related_tickets %}
+--PR Ticket Info--
+{%- for ticket in related_tickets %}
+=====
+Ticket URL: '{{ ticket.ticket_url }}'
+
+Ticket Title: '{{ ticket.title }}'
+
+{%- if ticket.labels %}
+
+Ticket Labels: {{ ticket.labels }}
+
+{%- endif %}
+{%- if ticket.body %}
+
+Ticket Description:
+#####
+{{ ticket.body }}
+#####
+{%- endif %}
+=====
+{% endfor %}
+{%- endif %}
+
+
+--PR Info--
Title: '{{title}}'
diff --git a/pr_agent/tools/pr_description.py b/pr_agent/tools/pr_description.py
index 9f7d79d3..c965d84e 100644
--- a/pr_agent/tools/pr_description.py
+++ b/pr_agent/tools/pr_description.py
@@ -20,6 +20,8 @@ from pr_agent.git_providers import get_git_provider, GithubProvider, get_git_pro
from pr_agent.git_providers.git_provider import get_main_pr_language
from pr_agent.log import get_logger
from pr_agent.servers.help import HelpMessage
+from pr_agent.tools.ticket_pr_compliance_check import extract_ticket_links_from_pr_description, extract_tickets, \
+ extract_and_cache_pr_tickets
class PRDescription:
@@ -38,6 +40,7 @@ class PRDescription:
self.git_provider.get_languages(), self.git_provider.get_files()
)
self.pr_id = self.git_provider.get_pr_id()
+ self.keys_fix = ["filename:", "language:", "changes_summary:", "changes_title:", "description:", "title:"]
if get_settings().pr_description.enable_semantic_files_types and not self.git_provider.is_supported(
"gfm_markdown"):
@@ -60,6 +63,7 @@ class PRDescription:
"enable_custom_labels": get_settings().config.enable_custom_labels,
"custom_labels_class": "", # will be filled if necessary in 'set_custom_labels' function
"enable_semantic_files_types": get_settings().pr_description.enable_semantic_files_types,
+ "related_tickets": "",
}
self.user_description = self.git_provider.get_user_description()
@@ -87,6 +91,9 @@ class PRDescription:
if get_settings().config.publish_output and not get_settings().config.get('is_auto_command', False):
self.git_provider.publish_comment("Preparing PR description...", is_temporary=True)
+ # ticket extraction if exists
+ await extract_and_cache_pr_tickets(self.git_provider, self.vars)
+
await retry_with_fallback_models(self._prepare_prediction, ModelType.TURBO)
if self.prediction:
@@ -226,7 +233,7 @@ class PRDescription:
file_description_str_list = []
for i, result in enumerate(results):
prediction_files = result.strip().removeprefix('```yaml').strip('`').strip()
- if load_yaml(prediction_files) and prediction_files.startswith('pr_files'):
+ if load_yaml(prediction_files, keys_fix_yaml=self.keys_fix) and prediction_files.startswith('pr_files'):
prediction_files = prediction_files.removeprefix('pr_files:').strip()
file_description_str_list.append(prediction_files)
else:
@@ -304,16 +311,16 @@ extra_file_yaml =
# final processing
self.prediction = prediction_headers + "\n" + "pr_files:\n" + files_walkthrough
- if not load_yaml(self.prediction):
+ if not load_yaml(self.prediction, keys_fix_yaml=self.keys_fix):
get_logger().error(f"Error getting valid YAML in large PR handling for describe {self.pr_id}")
- if load_yaml(prediction_headers):
+ if load_yaml(prediction_headers, keys_fix_yaml=self.keys_fix):
get_logger().debug(f"Using only headers for describe {self.pr_id}")
self.prediction = prediction_headers
async def extend_additional_files(self, remaining_files_list) -> str:
prediction = self.prediction
try:
- original_prediction_dict = load_yaml(self.prediction)
+ original_prediction_dict = load_yaml(self.prediction, keys_fix_yaml=self.keys_fix)
prediction_extra = "pr_files:"
for file in remaining_files_list:
extra_file_yaml = f"""\
@@ -327,12 +334,12 @@ extra_file_yaml =
additional files (token-limit)
"""
prediction_extra = prediction_extra + "\n" + extra_file_yaml.strip()
- prediction_extra_dict = load_yaml(prediction_extra)
+ prediction_extra_dict = load_yaml(prediction_extra, keys_fix_yaml=self.keys_fix)
# merge the two dictionaries
if isinstance(original_prediction_dict, dict) and isinstance(prediction_extra_dict, dict):
original_prediction_dict["pr_files"].extend(prediction_extra_dict["pr_files"])
new_yaml = yaml.dump(original_prediction_dict)
- if load_yaml(new_yaml):
+ if load_yaml(new_yaml, keys_fix_yaml=self.keys_fix):
prediction = new_yaml
return prediction
except Exception as e:
@@ -361,7 +368,7 @@ extra_file_yaml =
def _prepare_data(self):
# Load the AI prediction data into a dictionary
- self.data = load_yaml(self.prediction.strip())
+ self.data = load_yaml(self.prediction.strip(), keys_fix_yaml=self.keys_fix)
if get_settings().pr_description.add_original_user_description and self.user_description:
self.data["User Description"] = self.user_description
diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py
index 88799d98..f5f82e06 100644
--- a/pr_agent/tools/pr_reviewer.py
+++ b/pr_agent/tools/pr_reviewer.py
@@ -1,5 +1,6 @@
import copy
import datetime
+import traceback
from collections import OrderedDict
from functools import partial
from typing import List, Tuple
@@ -15,6 +16,7 @@ from pr_agent.git_providers import get_git_provider, get_git_provider_with_conte
from pr_agent.git_providers.git_provider import IncrementalPR, get_main_pr_language
from pr_agent.log import get_logger
from pr_agent.servers.help import HelpMessage
+from pr_agent.tools.ticket_pr_compliance_check import extract_tickets, extract_and_cache_pr_tickets
class PRReviewer:
@@ -84,6 +86,7 @@ class PRReviewer:
"custom_labels": "",
"enable_custom_labels": get_settings().config.enable_custom_labels,
"is_ai_metadata": get_settings().get("config.enable_ai_metadata", False),
+ "related_tickets": get_settings().get('related_tickets', []),
}
self.token_handler = TokenHandler(
@@ -121,6 +124,9 @@ class PRReviewer:
'config': dict(get_settings().config)}
get_logger().debug("Relevant configs", artifacts=relevant_configs)
+ # ticket extraction if exists
+ await extract_and_cache_pr_tickets(self.git_provider, self.vars)
+
if self.incremental.is_incremental and hasattr(self.git_provider, "unreviewed_files_set") and not self.git_provider.unreviewed_files_set:
get_logger().info(f"Incremental review is enabled for {self.pr_url} but there are no new files")
previous_review_url = ""
@@ -207,7 +213,7 @@ class PRReviewer:
first_key = 'review'
last_key = 'security_concerns'
data = load_yaml(self.prediction.strip(),
- keys_fix_yaml=["estimated_effort_to_review_[1-5]:", "security_concerns:", "key_issues_to_review:",
+ keys_fix_yaml=["ticket_compliance_check", "estimated_effort_to_review_[1-5]:", "security_concerns:", "key_issues_to_review:",
"relevant_file:", "relevant_line:", "suggestion:"],
first_key=first_key, last_key=last_key)
github_action_output(data, 'review')
@@ -282,7 +288,7 @@ class PRReviewer:
first_key = 'review'
last_key = 'security_concerns'
data = load_yaml(self.prediction.strip(),
- keys_fix_yaml=["estimated_effort_to_review_[1-5]:", "security_concerns:", "key_issues_to_review:",
+ keys_fix_yaml=["ticket_compliance_check", "estimated_effort_to_review_[1-5]:", "security_concerns:", "key_issues_to_review:",
"relevant_file:", "relevant_line:", "suggestion:"],
first_key=first_key, last_key=last_key)
comments: List[str] = []
@@ -401,7 +407,16 @@ class PRReviewer:
review_labels = []
if get_settings().pr_reviewer.enable_review_labels_effort:
estimated_effort = data['review']['estimated_effort_to_review_[1-5]']
- estimated_effort_number = int(estimated_effort.split(',')[0])
+ estimated_effort_number = 0
+ if isinstance(estimated_effort, str):
+ try:
+ estimated_effort_number = int(estimated_effort.split(',')[0])
+ except ValueError:
+ get_logger().warning(f"Invalid estimated_effort value: {estimated_effort}")
+ elif isinstance(estimated_effort, int):
+ estimated_effort_number = estimated_effort
+ else:
+ get_logger().warning(f"Unexpected type for estimated_effort: {type(estimated_effort)}")
if 1 <= estimated_effort_number <= 5: # 1, because ...
review_labels.append(f'Review effort [1-5]: {estimated_effort_number}')
if get_settings().pr_reviewer.enable_review_labels_security and get_settings().pr_reviewer.require_security_review:
diff --git a/pr_agent/tools/ticket_pr_compliance_check.py b/pr_agent/tools/ticket_pr_compliance_check.py
new file mode 100644
index 00000000..03fdc88b
--- /dev/null
+++ b/pr_agent/tools/ticket_pr_compliance_check.py
@@ -0,0 +1,113 @@
+import re
+import traceback
+
+from pr_agent.config_loader import get_settings
+from pr_agent.git_providers import GithubProvider
+from pr_agent.log import get_logger
+
+
+def find_jira_tickets(text):
+ # Regular expression patterns for JIRA tickets
+ patterns = [
+ r'\b[A-Z]{2,10}-\d{1,7}\b', # Standard JIRA ticket format (e.g., PROJ-123)
+ r'(?:https?://[^\s/]+/browse/)?([A-Z]{2,10}-\d{1,7})\b' # JIRA URL or just the ticket
+ ]
+
+ tickets = set()
+ for pattern in patterns:
+ matches = re.findall(pattern, text)
+ for match in matches:
+ if isinstance(match, tuple):
+ # If it's a tuple (from the URL pattern), take the last non-empty group
+ ticket = next((m for m in reversed(match) if m), None)
+ else:
+ ticket = match
+ if ticket:
+ tickets.add(ticket)
+
+ return list(tickets)
+
+
+def extract_ticket_links_from_pr_description(pr_description, repo_path):
+ """
+ Extract all ticket links from PR description
+ """
+
+ # example link to search for: https://github.com/Codium-ai/pr-agent-pro/issues/525
+ pattern = r'https://github[^/]+/[^/]+/[^/]+/issues/\d+' # should support also github server (for example 'https://github.company.ai/Codium-ai/pr-agent-pro/issues/525')
+
+ # Find all matches in the text
+ github_tickets = re.findall(pattern, pr_description)
+
+ # Find all issues referenced like #123 and add them as https://github.com/{repo_path}/issues/{issue_number}
+ # (unneeded, since when you pull the actual comment, it appears as a full link)
+ # issue_number_pattern = r'#\d+'
+ # issue_numbers = re.findall(issue_number_pattern, pr_description)
+ # for issue_number in issue_numbers:
+ # issue_number = issue_number[1:] # remove #
+ # # check if issue_number is a valid number and len(issue_number) < 5
+ # if issue_number.isdigit() and len(issue_number) < 5:
+ # github_tickets.append(f'https://github.com/{repo_path}/issues/{issue_number}')
+
+ return github_tickets
+
+
+async def extract_tickets(git_provider):
+ MAX_TICKET_CHARACTERS = 10000
+ try:
+ if isinstance(git_provider, GithubProvider):
+ user_description = git_provider.get_user_description()
+ tickets = extract_ticket_links_from_pr_description(user_description, git_provider.repo)
+ tickets_content = []
+ if tickets:
+ for ticket in tickets:
+ # extract ticket number and repo name
+ repo_name, original_issue_number = git_provider._parse_issue_url(ticket)
+
+ # get the ticket object
+ issue_main = git_provider.repo_obj.get_issue(original_issue_number)
+
+ # clip issue_main.body max length
+ issue_body = issue_main.body
+ if len(issue_main.body) > MAX_TICKET_CHARACTERS:
+ issue_body = issue_main.body[:MAX_TICKET_CHARACTERS] + "..."
+
+ # extract labels
+ labels = []
+ try:
+ for label in issue_main.labels:
+ if isinstance(label, str):
+ labels.append(label)
+ else:
+ labels.append(label.name)
+ except Exception as e:
+ get_logger().error(f"Error extracting labels error= {e}",
+ artifact={"traceback": traceback.format_exc()})
+ tickets_content.append(
+ {'ticket_id': issue_main.number,
+ 'ticket_url': ticket, 'title': issue_main.title, 'body': issue_body,
+ 'labels': ", ".join(labels)})
+ return tickets_content
+
+ except Exception as e:
+ get_logger().error(f"Error extracting tickets error= {e}",
+ artifact={"traceback": traceback.format_exc()})
+
+
+async def extract_and_cache_pr_tickets(git_provider, vars):
+ if get_settings().get('config.require_ticket_analysis_review', False):
+ return
+ related_tickets = get_settings().get('related_tickets', [])
+ if not related_tickets:
+ tickets_content = await extract_tickets(git_provider)
+ if tickets_content:
+ get_logger().info("Extracted tickets from PR description", artifact={"tickets": tickets_content})
+ vars['related_tickets'] = tickets_content
+ get_settings().set('related_tickets', tickets_content)
+ else: # if tickets are already cached
+ get_logger().info("Using cached tickets", artifact={"tickets": related_tickets})
+ vars['related_tickets'] = related_tickets
+
+
+def check_tickets_relevancy():
+ return True