diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index bd0781ae..98c69d00 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -1,18 +1,22 @@ from __future__ import annotations -import html2text -import html import copy import difflib +import hashlib +import html import json import os import re import textwrap import time +import traceback from datetime import datetime from enum import Enum from typing import Any, List, Tuple + +import html2text +import requests import yaml from pydantic import BaseModel from starlette_context import context @@ -110,6 +114,7 @@ def convert_to_markdown_v2(output_data: dict, "Insights from user's answers": "📝", "Code feedback": "🤖", "Estimated effort to review [1-5]": "⏱️", + "Ticket compliance check": "🎫", } markdown_text = "" if not incremental_review: @@ -165,6 +170,8 @@ def convert_to_markdown_v2(output_data: dict, markdown_text += f'### {emoji} No relevant tests\n\n' else: markdown_text += f"### PR contains tests\n\n" + elif 'ticket compliance check' in key_nice.lower(): + markdown_text = ticket_markdown_logic(emoji, markdown_text, value, gfm_supported) elif 'security concerns' in key_nice.lower(): if gfm_supported: markdown_text += f"
...
' in file_data:
+ pass # PR with many files. some did not get analyzed
+ else:
+ get_logger().error(f"Failed to parse description", artifact={'description': file_data})
except Exception as e:
get_logger().exception(f"Failed to process description: {e}", artifact={'description': file_data})
diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml
index add06bb6..d4ecf379 100644
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@@ -51,9 +51,7 @@ require_tests_review=true
require_estimate_effort_to_review=true
require_can_be_split_review=false
require_security_review=true
-# soc2
-require_soc2_ticket=false
-soc2_ticket_prompt="Does the PR description include a link to ticket in a project management system (e.g., Jira, Asana, Trello, etc.) ?"
+require_ticket_analysis_review=true
# general options
num_code_suggestions=0
inline_code_comments = false
diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml
index de7c3d54..364dd9af 100644
--- a/pr_agent/settings/pr_description_prompts.toml
+++ b/pr_agent/settings/pr_description_prompts.toml
@@ -78,9 +78,9 @@ pr_files:
...
...
{%- endif %}
-description: |-
+description: |
...
-title: |-
+title: |
...
{%- if enable_custom_labels %}
labels:
@@ -94,7 +94,26 @@ labels:
Answer should be a valid YAML, and nothing else. Each YAML output MUST be after a newline, with proper indent, and block scalar indicator ('|')
"""
-user="""PR Info:
+user="""
+{%- if related_tickets %}
+Related Ticket Info:
+{% for ticket in related_tickets %}
+=====
+Ticket Title: '{{ ticket.title }}'
+{%- if ticket.labels %}
+Ticket Labels: {{ ticket.labels }}
+{%- endif %}
+{%- if ticket.body %}
+Ticket Description:
+#####
+{{ ticket.body }}
+#####
+{%- endif %}
+=====
+{% endfor %}
+{%- endif %}
+
+PR Info:
Previous title: '{{title}}'
diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml
index e3b4bfe4..0b61e8ea 100644
--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@@ -85,7 +85,20 @@ class KeyIssuesComponentLink(BaseModel):
start_line: int = Field(description="The start line that corresponds to this issue in the relevant file")
end_line: int = Field(description="The end line that corresponds to this issue in the relevant file")
+{%- if related_tickets %}
+
+class TicketCompliance(BaseModel):
+ ticket_url: str = Field(description="Ticket URL or ID")
+ ticket_requirements: str = Field(description="Repeat, in your own words, all ticket requirements, in bullet points")
+ fully_compliant_requirements: str = Field(description="A list, in bullet points, of which requirements are met by the PR code. Don't explain how the requirements are met, just list them shortly. Can be empty")
+ not_compliant_requirements: str = Field(description="A list, in bullet points, of which requirements are not met by the PR code. Don't explain how the requirements are not met, just list them shortly. Can be empty")
+ overall_compliance_level: str = Field(description="Overall give this PR one of these three values in relation to the ticket: 'Fully compliant', 'Partially compliant', or 'Not compliant'")
+{%- endif %}
+
class Review(BaseModel):
+{%- if related_tickets %}
+ ticket_compliance_check: List[TicketCompliance] = Field(description="A list of compliance checks for the related tickets")
+{%- endif %}
{%- if require_estimate_effort_to_review %}
estimated_effort_to_review_[1-5]: int = Field(description="Estimate, on a scale of 1-5 (inclusive), the time and effort required to review this PR by an experienced and knowledgeable developer. 1 means short and easy review , 5 means long and hard review. Take into account the size, complexity, quality, and the needed changes of the PR code diff.")
{%- endif %}
@@ -130,6 +143,19 @@ class PRReview(BaseModel):
Example output:
```yaml
review:
+{%- if related_tickets %}
+ ticket_compliance_check:
+ - ticket_url: |
+ ...
+ ticket_requirements: |
+ ...
+ fully_compliant_requirements: |
+ ...
+ not_compliant_requirements: |
+ ...
+ overall_compliance_level: |
+ ...
+{%- endif %}
{%- if require_estimate_effort_to_review %}
estimated_effort_to_review_[1-5]: |
3
@@ -176,7 +202,33 @@ code_feedback:
Answer should be a valid YAML, and nothing else. Each YAML output MUST be after a newline, with proper indent, and block scalar indicator ('|')
"""
-user="""--PR Info--
+user="""
+{%- if related_tickets %}
+--PR Ticket Info--
+{%- for ticket in related_tickets %}
+=====
+Ticket URL: '{{ ticket.ticket_url }}'
+
+Ticket Title: '{{ ticket.title }}'
+
+{%- if ticket.labels %}
+
+Ticket Labels: {{ ticket.labels }}
+
+{%- endif %}
+{%- if ticket.body %}
+
+Ticket Description:
+#####
+{{ ticket.body }}
+#####
+{%- endif %}
+=====
+{% endfor %}
+{%- endif %}
+
+
+--PR Info--
Title: '{{title}}'
diff --git a/pr_agent/tools/ticket_pr_compliance_check.py b/pr_agent/tools/ticket_pr_compliance_check.py
new file mode 100644
index 00000000..03fdc88b
--- /dev/null
+++ b/pr_agent/tools/ticket_pr_compliance_check.py
@@ -0,0 +1,113 @@
+import re
+import traceback
+
+from pr_agent.config_loader import get_settings
+from pr_agent.git_providers import GithubProvider
+from pr_agent.log import get_logger
+
+
+def find_jira_tickets(text):
+ # Regular expression patterns for JIRA tickets
+ patterns = [
+ r'\b[A-Z]{2,10}-\d{1,7}\b', # Standard JIRA ticket format (e.g., PROJ-123)
+ r'(?:https?://[^\s/]+/browse/)?([A-Z]{2,10}-\d{1,7})\b' # JIRA URL or just the ticket
+ ]
+
+ tickets = set()
+ for pattern in patterns:
+ matches = re.findall(pattern, text)
+ for match in matches:
+ if isinstance(match, tuple):
+ # If it's a tuple (from the URL pattern), take the last non-empty group
+ ticket = next((m for m in reversed(match) if m), None)
+ else:
+ ticket = match
+ if ticket:
+ tickets.add(ticket)
+
+ return list(tickets)
+
+
+def extract_ticket_links_from_pr_description(pr_description, repo_path):
+ """
+ Extract all ticket links from PR description
+ """
+
+ # example link to search for: https://github.com/Codium-ai/pr-agent-pro/issues/525
+ pattern = r'https://github[^/]+/[^/]+/[^/]+/issues/\d+' # should support also github server (for example 'https://github.company.ai/Codium-ai/pr-agent-pro/issues/525')
+
+ # Find all matches in the text
+ github_tickets = re.findall(pattern, pr_description)
+
+ # Find all issues referenced like #123 and add them as https://github.com/{repo_path}/issues/{issue_number}
+ # (unneeded, since when you pull the actual comment, it appears as a full link)
+ # issue_number_pattern = r'#\d+'
+ # issue_numbers = re.findall(issue_number_pattern, pr_description)
+ # for issue_number in issue_numbers:
+ # issue_number = issue_number[1:] # remove #
+ # # check if issue_number is a valid number and len(issue_number) < 5
+ # if issue_number.isdigit() and len(issue_number) < 5:
+ # github_tickets.append(f'https://github.com/{repo_path}/issues/{issue_number}')
+
+ return github_tickets
+
+
+async def extract_tickets(git_provider):
+ MAX_TICKET_CHARACTERS = 10000
+ try:
+ if isinstance(git_provider, GithubProvider):
+ user_description = git_provider.get_user_description()
+ tickets = extract_ticket_links_from_pr_description(user_description, git_provider.repo)
+ tickets_content = []
+ if tickets:
+ for ticket in tickets:
+ # extract ticket number and repo name
+ repo_name, original_issue_number = git_provider._parse_issue_url(ticket)
+
+ # get the ticket object
+ issue_main = git_provider.repo_obj.get_issue(original_issue_number)
+
+ # clip issue_main.body max length
+ issue_body = issue_main.body
+ if len(issue_main.body) > MAX_TICKET_CHARACTERS:
+ issue_body = issue_main.body[:MAX_TICKET_CHARACTERS] + "..."
+
+ # extract labels
+ labels = []
+ try:
+ for label in issue_main.labels:
+ if isinstance(label, str):
+ labels.append(label)
+ else:
+ labels.append(label.name)
+ except Exception as e:
+ get_logger().error(f"Error extracting labels error= {e}",
+ artifact={"traceback": traceback.format_exc()})
+ tickets_content.append(
+ {'ticket_id': issue_main.number,
+ 'ticket_url': ticket, 'title': issue_main.title, 'body': issue_body,
+ 'labels': ", ".join(labels)})
+ return tickets_content
+
+ except Exception as e:
+ get_logger().error(f"Error extracting tickets error= {e}",
+ artifact={"traceback": traceback.format_exc()})
+
+
+async def extract_and_cache_pr_tickets(git_provider, vars):
+ if get_settings().get('config.require_ticket_analysis_review', False):
+ return
+ related_tickets = get_settings().get('related_tickets', [])
+ if not related_tickets:
+ tickets_content = await extract_tickets(git_provider)
+ if tickets_content:
+ get_logger().info("Extracted tickets from PR description", artifact={"tickets": tickets_content})
+ vars['related_tickets'] = tickets_content
+ get_settings().set('related_tickets', tickets_content)
+ else: # if tickets are already cached
+ get_logger().info("Using cached tickets", artifact={"tickets": related_tickets})
+ vars['related_tickets'] = related_tickets
+
+
+def check_tickets_relevancy():
+ return True