From 213ced7e18b0da472048a47488cbc500c045dec0 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Tue, 4 Jun 2024 16:37:35 +0300 Subject: [PATCH 1/8] Add PR evaluation prompt and link to fine-tuning benchmark documentation --- pr_agent/servers/gitlab_webhook.py | 46 +++++++++++++++++++++++++++- pr_agent/settings/configuration.toml | 5 +++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/pr_agent/servers/gitlab_webhook.py b/pr_agent/servers/gitlab_webhook.py index aad3d19e..51e9c938 100644 --- a/pr_agent/servers/gitlab_webhook.py +++ b/pr_agent/servers/gitlab_webhook.py @@ -24,6 +24,25 @@ router = APIRouter() secret_provider = get_secret_provider() if get_settings().get("CONFIG.SECRET_PROVIDER") else None +async def get_mr_url_from_commit_sha(commit_sha, gitlab_token, project_id): + try: + import requests + # Replace 'your_access_token' with your GitLab personal access token + headers = { + 'Private-Token': f'{gitlab_token}' + } + # API endpoint to find MRs containing the commit + response = requests.get( + f'https://gitlab.com/api/v4/projects/{project_id}/repository/commits/{commit_sha}/merge_requests', + headers=headers + ) + merge_requests = response.json() + pr_url = merge_requests[0]['web_url'] + return pr_url + except Exception as e: + get_logger().error(f"Failed to get MR url from commit sha: {e}") + return None + async def handle_request(api_url: str, body: str, log_context: dict, sender_id: str): log_context["action"] = body log_context["event"] = "pull_request" if body == "/review" else "comment" @@ -51,6 +70,7 @@ async def _perform_commands_gitlab(commands_conf: str, agent: PRAgent, api_url: get_logger().error(f"Failed to perform command {command}: {e}") + @router.post("/webhook") async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request): start_time = datetime.now() @@ -87,12 +107,19 @@ async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request): get_logger().info("GitLab data", artifact=data) sender = data.get("user", {}).get("username", "unknown") sender_id = data.get("user", {}).get("id", "unknown") + + # logic to ignore bot users (unlike Github, no direct flag for bot users in gitlab) + sender_name = data.get("user", {}).get("name", "unknown").lower() + if 'codium' in sender_name or 'bot_' in sender_name or 'bot-' in sender_name or '_bot' in sender_name or '-bot' in sender_name: + get_logger().info(f"Skipping bot user: {sender_name}") + return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"})) + log_context["sender"] = sender if data.get('object_kind') == 'merge_request' and data['object_attributes'].get('action') in ['open', 'reopen']: url = data['object_attributes'].get('url') get_logger().info(f"New merge request: {url}") await _perform_commands_gitlab("pr_commands", PRAgent(), url, log_context) - elif data.get('object_kind') == 'note' and data['event_type'] == 'note': # comment on MR + elif data.get('object_kind') == 'note' and data.get('event_type') == 'note': # comment on MR if 'merge_request' in data: mr = data['merge_request'] url = mr.get('url') @@ -102,6 +129,23 @@ async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request): body = handle_ask_line(body, data) await handle_request(url, body, log_context, sender_id) + elif data.get('object_kind') == 'push' and data.get('event_name') == 'push': + commands_on_push = get_settings().get(f"gitlab.push_commands", {}) + handle_push_trigger = get_settings().get(f"gitlab.handle_push_trigger", False) + if not commands_on_push or not handle_push_trigger: + get_logger().info("Push event, but no push commands found or push trigger is disabled") + return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"})) + try: + project_id = data['project_id'] + commit_sha = data['checkout_sha'] + url = await get_mr_url_from_commit_sha(commit_sha, gitlab_token, project_id) + if not url: + get_logger().info(f"No MR found for commit: {commit_sha}") + return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"})) + get_logger().debug(f'A push event has been received: {url}') + await _perform_commands_gitlab("push_commands", PRAgent(), url, log_context) + except Exception as e: + get_logger().error(f"Failed to handle push event: {e}") background_tasks.add_task(inner, request_json) end_time = datetime.now() diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 98d14414..b7ca3f78 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -178,6 +178,11 @@ pr_commands = [ "/review --pr_reviewer.num_code_suggestions=0", "/improve", ] +handle_push_trigger = false +commands = [ + "/describe", + "/review --pr_reviewer.num_code_suggestions=0", +] [bitbucket_app] pr_commands = [ From 8e12787fc853d7657de345a78e6ef8c27e88dfc7 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Tue, 4 Jun 2024 16:42:54 +0300 Subject: [PATCH 2/8] Clear secret provider configuration in configuration.toml --- pr_agent/settings/configuration.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index b7ca3f78..c682cf6d 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -15,7 +15,7 @@ max_description_tokens = 500 max_commits_tokens = 500 max_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities. patch_extra_lines = 1 -secret_provider="google_cloud_storage" +secret_provider="" cli_mode=false ai_disclaimer_title="" # Pro feature, title for a collapsible disclaimer to AI outputs ai_disclaimer="" # Pro feature, full text for the AI disclaimer From 0b7dcf03a57db832422218bae22108647afa62e5 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Tue, 4 Jun 2024 16:47:20 +0300 Subject: [PATCH 3/8] Improve error handling in get_mr_url_from_commit_sha function in gitlab_webhook.py --- pr_agent/servers/gitlab_webhook.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pr_agent/servers/gitlab_webhook.py b/pr_agent/servers/gitlab_webhook.py index 51e9c938..c2b8b921 100644 --- a/pr_agent/servers/gitlab_webhook.py +++ b/pr_agent/servers/gitlab_webhook.py @@ -27,7 +27,6 @@ secret_provider = get_secret_provider() if get_settings().get("CONFIG.SECRET_PRO async def get_mr_url_from_commit_sha(commit_sha, gitlab_token, project_id): try: import requests - # Replace 'your_access_token' with your GitLab personal access token headers = { 'Private-Token': f'{gitlab_token}' } @@ -37,8 +36,12 @@ async def get_mr_url_from_commit_sha(commit_sha, gitlab_token, project_id): headers=headers ) merge_requests = response.json() - pr_url = merge_requests[0]['web_url'] - return pr_url + if merge_requests and response.status_code == 200: + pr_url = merge_requests[0]['web_url'] + return pr_url + else: + get_logger().info(f"No merge requests found for commit: {commit_sha}") + return None except Exception as e: get_logger().error(f"Failed to get MR url from commit sha: {e}") return None From faf4576f03138f14e9fdba730dd823ed52c794f5 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Tue, 4 Jun 2024 17:08:24 +0300 Subject: [PATCH 4/8] enterprise --- pr_agent/servers/gitlab_webhook.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pr_agent/servers/gitlab_webhook.py b/pr_agent/servers/gitlab_webhook.py index c2b8b921..57e00f88 100644 --- a/pr_agent/servers/gitlab_webhook.py +++ b/pr_agent/servers/gitlab_webhook.py @@ -31,8 +31,9 @@ async def get_mr_url_from_commit_sha(commit_sha, gitlab_token, project_id): 'Private-Token': f'{gitlab_token}' } # API endpoint to find MRs containing the commit + gitlab_url = get_settings().get("GITLAB.URL", 'https://gitlab.com') response = requests.get( - f'https://gitlab.com/api/v4/projects/{project_id}/repository/commits/{commit_sha}/merge_requests', + f'{gitlab_url}/api/v4/projects/{project_id}/repository/commits/{commit_sha}/merge_requests', headers=headers ) merge_requests = response.json() From 4d96d11ba57b2c0cecad9f36243c5fa8edb8e044 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Tue, 4 Jun 2024 20:15:22 +0300 Subject: [PATCH 5/8] enterprise --- docs/docs/tools/review.md | 4 ++++ pr_agent/settings/configuration.toml | 1 + pr_agent/settings/pr_reviewer_prompts.toml | 2 ++ pr_agent/tools/pr_reviewer.py | 3 ++- 4 files changed, 9 insertions(+), 1 deletion(-) diff --git a/docs/docs/tools/review.md b/docs/docs/tools/review.md index 97ea5e9f..3f69186e 100644 --- a/docs/docs/tools/review.md +++ b/docs/docs/tools/review.md @@ -112,6 +112,10 @@ The tool will first ask the author questions about the PR, and will guide the re require_can_be_split_review If set to true, the tool will add a section that checks if the PR contains several themes, and can be split into smaller PRs. Default is false. + + require_security_review + If set to true, the tool will add a section that checks if the PR contains a possible security or vulnerability issue. Default is true. + !!! example "SOC2 ticket compliance 💎" diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index c682cf6d..86ab7a00 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -28,6 +28,7 @@ require_score_review=false require_tests_review=true require_estimate_effort_to_review=true require_can_be_split_review=false +require_security_review=true # soc2 require_soc2_ticket=false soc2_ticket_prompt="Does the PR description include a link to ticket in a project management system (e.g., Jira, Asana, Trello, etc.) ?" diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index aeeef5f8..d7d06f41 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -69,7 +69,9 @@ class Review(BaseModel): insights_from_user_answers: str = Field(description="shortly summarize the insights you gained from the user's answers to the questions") {%- endif %} possible_issues: str = Field(description="Does this PR code introduce clear issues, bugs, or major performance concerns? If there are no apparent issues, respond with 'No'. If there are any issues, describe them briefly. Use bullet points if more than one issue. Be specific, and provide examples if possible. Start each bullet point with a short specific header, such as: "- Possible Bug: ...", etc.") +{%- if require_security_review %} security_concerns: str = Field(description="does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible") +{%- endif %} {%- if require_can_be_split_review %} can_be_split: List[SubPR] = Field(min_items=0, max_items=3, description="Can this PR, which contains {{ num_pr_files }} changed files in total, be divided into smaller sub-PRs with distinct tasks that can be reviewed and merged independently, regardless of the order ? Make sure that the sub-PRs are indeed independent, with no code dependencies between them, and that each sub-PR represent a meaningfull independent task. Output an empty list if the PR code does not needd to be split.") {%- endif %} diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index 074023d2..baa0ccc4 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -65,6 +65,7 @@ class PRReviewer: "require_tests": get_settings().pr_reviewer.require_tests_review, "require_estimate_effort_to_review": get_settings().pr_reviewer.require_estimate_effort_to_review, 'require_can_be_split_review': get_settings().pr_reviewer.require_can_be_split_review, + 'require_security_review': get_settings().pr_reviewer.require_security_review, 'num_code_suggestions': get_settings().pr_reviewer.num_code_suggestions, 'question_str': question_str, 'answer_str': answer_str, @@ -375,7 +376,7 @@ class PRReviewer: estimated_effort_number = int(estimated_effort.split(',')[0]) if 1 <= estimated_effort_number <= 5: # 1, because ... review_labels.append(f'Review effort [1-5]: {estimated_effort_number}') - if get_settings().pr_reviewer.enable_review_labels_security: + if get_settings().pr_reviewer.enable_review_labels_security and get_settings().pr_reviewer.require_security_review: security_concerns = data['review']['security_concerns'] # yes, because ... security_concerns_bool = 'yes' in security_concerns.lower() or 'true' in security_concerns.lower() if security_concerns_bool: From d046c2a939fb8ac80f3e990d559a30fd5df47467 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Wed, 5 Jun 2024 11:25:10 +0300 Subject: [PATCH 6/8] gitlab push_commands will work --- pr_agent/servers/gitlab_webhook.py | 18 ++++++++++++------ pr_agent/settings/configuration.toml | 2 +- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/pr_agent/servers/gitlab_webhook.py b/pr_agent/servers/gitlab_webhook.py index 57e00f88..e81044b8 100644 --- a/pr_agent/servers/gitlab_webhook.py +++ b/pr_agent/servers/gitlab_webhook.py @@ -134,18 +134,24 @@ async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request): await handle_request(url, body, log_context, sender_id) elif data.get('object_kind') == 'push' and data.get('event_name') == 'push': - commands_on_push = get_settings().get(f"gitlab.push_commands", {}) - handle_push_trigger = get_settings().get(f"gitlab.handle_push_trigger", False) - if not commands_on_push or not handle_push_trigger: - get_logger().info("Push event, but no push commands found or push trigger is disabled") - return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"})) try: project_id = data['project_id'] commit_sha = data['checkout_sha'] url = await get_mr_url_from_commit_sha(commit_sha, gitlab_token, project_id) if not url: get_logger().info(f"No MR found for commit: {commit_sha}") - return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"})) + return JSONResponse(status_code=status.HTTP_200_OK, + content=jsonable_encoder({"message": "success"})) + + # we need first to apply_repo_settings + apply_repo_settings(url) + commands_on_push = get_settings().get(f"gitlab.push_commands", {}) + handle_push_trigger = get_settings().get(f"gitlab.handle_push_trigger", False) + if not commands_on_push or not handle_push_trigger: + get_logger().info("Push event, but no push commands found or push trigger is disabled") + return JSONResponse(status_code=status.HTTP_200_OK, + content=jsonable_encoder({"message": "success"})) + get_logger().debug(f'A push event has been received: {url}') await _perform_commands_gitlab("push_commands", PRAgent(), url, log_context) except Exception as e: diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 86ab7a00..a3d6c6f9 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -180,7 +180,7 @@ pr_commands = [ "/improve", ] handle_push_trigger = false -commands = [ +push_commands = [ "/describe", "/review --pr_reviewer.num_code_suggestions=0", ] From 75a27d64b4cca2315480ae8f03f9977d4ee3993e Mon Sep 17 00:00:00 2001 From: mrT23 Date: Wed, 5 Jun 2024 20:26:45 +0300 Subject: [PATCH 7/8] gitlab push_commands --- docs/docs/usage-guide/automations_and_usage.md | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/docs/docs/usage-guide/automations_and_usage.md b/docs/docs/usage-guide/automations_and_usage.md index 35c95099..9c6185b2 100644 --- a/docs/docs/usage-guide/automations_and_usage.md +++ b/docs/docs/usage-guide/automations_and_usage.md @@ -136,7 +136,7 @@ publish_labels = false to prevent PR-Agent from publishing labels when running the `describe` tool. ## GitLab Webhook -After setting up a GitLab webhook, to control which commands will run automatically when a new PR is opened, you can set the `pr_commands` parameter in the configuration file, similar to the GitHub App: +After setting up a GitLab webhook, to control which commands will run automatically when a new MR is opened, you can set the `pr_commands` parameter in the configuration file, similar to the GitHub App: ``` [gitlab] pr_commands = [ @@ -146,6 +146,20 @@ pr_commands = [ ] ``` +the GitLab webhook can also respond to new code that is pushed to an open MR. +The configuration toggle `handle_push_trigger` can be used to enable this feature. +The configuration parameter `push_commands` defines the list of tools that will be **run automatically** when new code is pushed to the MR. +``` +[gitlab] +handle_push_trigger = true +push_commands = [ + "/describe", + "/review --pr_reviewer.num_code_suggestions=0 --pr_reviewer.final_update_message=false", +] +``` + +Note that to use the 'handle_push_trigger' feature, you need give the gitlab webhook also the "Push events" scope. + ## BitBucket App Similar to GitHub app, when running PR-Agent from BitBucket App, the default [configuration file](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml) from a pre-built docker will be initially loaded. From 9c6a363a5168c90a4da409844a76cdbf0a124efa Mon Sep 17 00:00:00 2001 From: mrT23 Date: Wed, 5 Jun 2024 20:31:35 +0300 Subject: [PATCH 8/8] gitlab push_commands --- docs/docs/usage-guide/automations_and_usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/usage-guide/automations_and_usage.md b/docs/docs/usage-guide/automations_and_usage.md index 9c6185b2..f72afe9e 100644 --- a/docs/docs/usage-guide/automations_and_usage.md +++ b/docs/docs/usage-guide/automations_and_usage.md @@ -158,7 +158,7 @@ push_commands = [ ] ``` -Note that to use the 'handle_push_trigger' feature, you need give the gitlab webhook also the "Push events" scope. +Note that to use the 'handle_push_trigger' feature, you need to give the gitlab webhook also the "Push events" scope. ## BitBucket App Similar to GitHub app, when running PR-Agent from BitBucket App, the default [configuration file](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml) from a pre-built docker will be initially loaded.