From 213ced7e18b0da472048a47488cbc500c045dec0 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Tue, 4 Jun 2024 16:37:35 +0300 Subject: [PATCH 1/5] Add PR evaluation prompt and link to fine-tuning benchmark documentation --- pr_agent/servers/gitlab_webhook.py | 46 +++++++++++++++++++++++++++- pr_agent/settings/configuration.toml | 5 +++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/pr_agent/servers/gitlab_webhook.py b/pr_agent/servers/gitlab_webhook.py index aad3d19e..51e9c938 100644 --- a/pr_agent/servers/gitlab_webhook.py +++ b/pr_agent/servers/gitlab_webhook.py @@ -24,6 +24,25 @@ router = APIRouter() secret_provider = get_secret_provider() if get_settings().get("CONFIG.SECRET_PROVIDER") else None +async def get_mr_url_from_commit_sha(commit_sha, gitlab_token, project_id): + try: + import requests + # Replace 'your_access_token' with your GitLab personal access token + headers = { + 'Private-Token': f'{gitlab_token}' + } + # API endpoint to find MRs containing the commit + response = requests.get( + f'https://gitlab.com/api/v4/projects/{project_id}/repository/commits/{commit_sha}/merge_requests', + headers=headers + ) + merge_requests = response.json() + pr_url = merge_requests[0]['web_url'] + return pr_url + except Exception as e: + get_logger().error(f"Failed to get MR url from commit sha: {e}") + return None + async def handle_request(api_url: str, body: str, log_context: dict, sender_id: str): log_context["action"] = body log_context["event"] = "pull_request" if body == "/review" else "comment" @@ -51,6 +70,7 @@ async def _perform_commands_gitlab(commands_conf: str, agent: PRAgent, api_url: get_logger().error(f"Failed to perform command {command}: {e}") + @router.post("/webhook") async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request): start_time = datetime.now() @@ -87,12 +107,19 @@ async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request): get_logger().info("GitLab data", artifact=data) sender = data.get("user", {}).get("username", "unknown") sender_id = data.get("user", {}).get("id", "unknown") + + # logic to ignore bot users (unlike Github, no direct flag for bot users in gitlab) + sender_name = data.get("user", {}).get("name", "unknown").lower() + if 'codium' in sender_name or 'bot_' in sender_name or 'bot-' in sender_name or '_bot' in sender_name or '-bot' in sender_name: + get_logger().info(f"Skipping bot user: {sender_name}") + return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"})) + log_context["sender"] = sender if data.get('object_kind') == 'merge_request' and data['object_attributes'].get('action') in ['open', 'reopen']: url = data['object_attributes'].get('url') get_logger().info(f"New merge request: {url}") await _perform_commands_gitlab("pr_commands", PRAgent(), url, log_context) - elif data.get('object_kind') == 'note' and data['event_type'] == 'note': # comment on MR + elif data.get('object_kind') == 'note' and data.get('event_type') == 'note': # comment on MR if 'merge_request' in data: mr = data['merge_request'] url = mr.get('url') @@ -102,6 +129,23 @@ async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request): body = handle_ask_line(body, data) await handle_request(url, body, log_context, sender_id) + elif data.get('object_kind') == 'push' and data.get('event_name') == 'push': + commands_on_push = get_settings().get(f"gitlab.push_commands", {}) + handle_push_trigger = get_settings().get(f"gitlab.handle_push_trigger", False) + if not commands_on_push or not handle_push_trigger: + get_logger().info("Push event, but no push commands found or push trigger is disabled") + return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"})) + try: + project_id = data['project_id'] + commit_sha = data['checkout_sha'] + url = await get_mr_url_from_commit_sha(commit_sha, gitlab_token, project_id) + if not url: + get_logger().info(f"No MR found for commit: {commit_sha}") + return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"})) + get_logger().debug(f'A push event has been received: {url}') + await _perform_commands_gitlab("push_commands", PRAgent(), url, log_context) + except Exception as e: + get_logger().error(f"Failed to handle push event: {e}") background_tasks.add_task(inner, request_json) end_time = datetime.now() diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 98d14414..b7ca3f78 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -178,6 +178,11 @@ pr_commands = [ "/review --pr_reviewer.num_code_suggestions=0", "/improve", ] +handle_push_trigger = false +commands = [ + "/describe", + "/review --pr_reviewer.num_code_suggestions=0", +] [bitbucket_app] pr_commands = [ From 8e12787fc853d7657de345a78e6ef8c27e88dfc7 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Tue, 4 Jun 2024 16:42:54 +0300 Subject: [PATCH 2/5] Clear secret provider configuration in configuration.toml --- pr_agent/settings/configuration.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index b7ca3f78..c682cf6d 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -15,7 +15,7 @@ max_description_tokens = 500 max_commits_tokens = 500 max_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities. patch_extra_lines = 1 -secret_provider="google_cloud_storage" +secret_provider="" cli_mode=false ai_disclaimer_title="" # Pro feature, title for a collapsible disclaimer to AI outputs ai_disclaimer="" # Pro feature, full text for the AI disclaimer From 0b7dcf03a57db832422218bae22108647afa62e5 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Tue, 4 Jun 2024 16:47:20 +0300 Subject: [PATCH 3/5] Improve error handling in get_mr_url_from_commit_sha function in gitlab_webhook.py --- pr_agent/servers/gitlab_webhook.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pr_agent/servers/gitlab_webhook.py b/pr_agent/servers/gitlab_webhook.py index 51e9c938..c2b8b921 100644 --- a/pr_agent/servers/gitlab_webhook.py +++ b/pr_agent/servers/gitlab_webhook.py @@ -27,7 +27,6 @@ secret_provider = get_secret_provider() if get_settings().get("CONFIG.SECRET_PRO async def get_mr_url_from_commit_sha(commit_sha, gitlab_token, project_id): try: import requests - # Replace 'your_access_token' with your GitLab personal access token headers = { 'Private-Token': f'{gitlab_token}' } @@ -37,8 +36,12 @@ async def get_mr_url_from_commit_sha(commit_sha, gitlab_token, project_id): headers=headers ) merge_requests = response.json() - pr_url = merge_requests[0]['web_url'] - return pr_url + if merge_requests and response.status_code == 200: + pr_url = merge_requests[0]['web_url'] + return pr_url + else: + get_logger().info(f"No merge requests found for commit: {commit_sha}") + return None except Exception as e: get_logger().error(f"Failed to get MR url from commit sha: {e}") return None From faf4576f03138f14e9fdba730dd823ed52c794f5 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Tue, 4 Jun 2024 17:08:24 +0300 Subject: [PATCH 4/5] enterprise --- pr_agent/servers/gitlab_webhook.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pr_agent/servers/gitlab_webhook.py b/pr_agent/servers/gitlab_webhook.py index c2b8b921..57e00f88 100644 --- a/pr_agent/servers/gitlab_webhook.py +++ b/pr_agent/servers/gitlab_webhook.py @@ -31,8 +31,9 @@ async def get_mr_url_from_commit_sha(commit_sha, gitlab_token, project_id): 'Private-Token': f'{gitlab_token}' } # API endpoint to find MRs containing the commit + gitlab_url = get_settings().get("GITLAB.URL", 'https://gitlab.com') response = requests.get( - f'https://gitlab.com/api/v4/projects/{project_id}/repository/commits/{commit_sha}/merge_requests', + f'{gitlab_url}/api/v4/projects/{project_id}/repository/commits/{commit_sha}/merge_requests', headers=headers ) merge_requests = response.json() From 4d96d11ba57b2c0cecad9f36243c5fa8edb8e044 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Tue, 4 Jun 2024 20:15:22 +0300 Subject: [PATCH 5/5] enterprise --- docs/docs/tools/review.md | 4 ++++ pr_agent/settings/configuration.toml | 1 + pr_agent/settings/pr_reviewer_prompts.toml | 2 ++ pr_agent/tools/pr_reviewer.py | 3 ++- 4 files changed, 9 insertions(+), 1 deletion(-) diff --git a/docs/docs/tools/review.md b/docs/docs/tools/review.md index 97ea5e9f..3f69186e 100644 --- a/docs/docs/tools/review.md +++ b/docs/docs/tools/review.md @@ -112,6 +112,10 @@ The tool will first ask the author questions about the PR, and will guide the re require_can_be_split_review If set to true, the tool will add a section that checks if the PR contains several themes, and can be split into smaller PRs. Default is false. + + require_security_review + If set to true, the tool will add a section that checks if the PR contains a possible security or vulnerability issue. Default is true. + !!! example "SOC2 ticket compliance 💎" diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index c682cf6d..86ab7a00 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -28,6 +28,7 @@ require_score_review=false require_tests_review=true require_estimate_effort_to_review=true require_can_be_split_review=false +require_security_review=true # soc2 require_soc2_ticket=false soc2_ticket_prompt="Does the PR description include a link to ticket in a project management system (e.g., Jira, Asana, Trello, etc.) ?" diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index aeeef5f8..d7d06f41 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -69,7 +69,9 @@ class Review(BaseModel): insights_from_user_answers: str = Field(description="shortly summarize the insights you gained from the user's answers to the questions") {%- endif %} possible_issues: str = Field(description="Does this PR code introduce clear issues, bugs, or major performance concerns? If there are no apparent issues, respond with 'No'. If there are any issues, describe them briefly. Use bullet points if more than one issue. Be specific, and provide examples if possible. Start each bullet point with a short specific header, such as: "- Possible Bug: ...", etc.") +{%- if require_security_review %} security_concerns: str = Field(description="does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible") +{%- endif %} {%- if require_can_be_split_review %} can_be_split: List[SubPR] = Field(min_items=0, max_items=3, description="Can this PR, which contains {{ num_pr_files }} changed files in total, be divided into smaller sub-PRs with distinct tasks that can be reviewed and merged independently, regardless of the order ? Make sure that the sub-PRs are indeed independent, with no code dependencies between them, and that each sub-PR represent a meaningfull independent task. Output an empty list if the PR code does not needd to be split.") {%- endif %} diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index 074023d2..baa0ccc4 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -65,6 +65,7 @@ class PRReviewer: "require_tests": get_settings().pr_reviewer.require_tests_review, "require_estimate_effort_to_review": get_settings().pr_reviewer.require_estimate_effort_to_review, 'require_can_be_split_review': get_settings().pr_reviewer.require_can_be_split_review, + 'require_security_review': get_settings().pr_reviewer.require_security_review, 'num_code_suggestions': get_settings().pr_reviewer.num_code_suggestions, 'question_str': question_str, 'answer_str': answer_str, @@ -375,7 +376,7 @@ class PRReviewer: estimated_effort_number = int(estimated_effort.split(',')[0]) if 1 <= estimated_effort_number <= 5: # 1, because ... review_labels.append(f'Review effort [1-5]: {estimated_effort_number}') - if get_settings().pr_reviewer.enable_review_labels_security: + if get_settings().pr_reviewer.enable_review_labels_security and get_settings().pr_reviewer.require_security_review: security_concerns = data['review']['security_concerns'] # yes, because ... security_concerns_bool = 'yes' in security_concerns.lower() or 'true' in security_concerns.lower() if security_concerns_bool: