Merge pull request #944 from Codium-ai/tr/gitlab_commits

handle_push_trigger for gitlab
2025-07-21 04:50:39 +08:00 · 2024-06-04 20:17:27 +03:00
parent 6d6fb67306 4d96d11ba5
commit aed4ed41cc
5 changed files with 64 additions and 3 deletions
--- a/docs/docs/tools/review.md
+++ b/docs/docs/tools/review.md
@ -112,6 +112,10 @@ The tool will first ask the author questions about the PR, and will guide the re
    <td><b>require_can_be_split_review</b></td>
    <td>If set to true, the tool will add a section that checks if the PR contains several themes, and can be split into smaller PRs. Default is false.</td>
  </tr>
+  <tr>
+    <td><b>require_security_review</b></td>
+    <td>If set to true, the tool will add a section that checks if the PR contains a possible security or vulnerability issue. Default is true.</td>
+  </tr>
 </table>

 !!! example "SOC2 ticket compliance 💎"
--- a/pr_agent/servers/gitlab_webhook.py
+++ b/pr_agent/servers/gitlab_webhook.py
@ -24,6 +24,29 @@ router = APIRouter()
 secret_provider = get_secret_provider() if get_settings().get("CONFIG.SECRET_PROVIDER") else None


+async def get_mr_url_from_commit_sha(commit_sha, gitlab_token, project_id):
+    try:
+        import requests
+        headers = {
+            'Private-Token': f'{gitlab_token}'
+        }
+        # API endpoint to find MRs containing the commit
+        gitlab_url = get_settings().get("GITLAB.URL", 'https://gitlab.com')
+        response = requests.get(
+            f'{gitlab_url}/api/v4/projects/{project_id}/repository/commits/{commit_sha}/merge_requests',
+            headers=headers
+        )
+        merge_requests = response.json()
+        if merge_requests and response.status_code == 200:
+            pr_url = merge_requests[0]['web_url']
+            return pr_url
+        else:
+            get_logger().info(f"No merge requests found for commit: {commit_sha}")
+            return None
+    except Exception as e:
+        get_logger().error(f"Failed to get MR url from commit sha: {e}")
+        return None
+
 async def handle_request(api_url: str, body: str, log_context: dict, sender_id: str):
    log_context["action"] = body
    log_context["event"] = "pull_request" if body == "/review" else "comment"
@ -51,6 +74,7 @@ async def _perform_commands_gitlab(commands_conf: str, agent: PRAgent, api_url:
            get_logger().error(f"Failed to perform command {command}: {e}")


+
@router.post("/webhook")
 async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request):
    start_time = datetime.now()
@ -87,12 +111,19 @@ async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request):
        get_logger().info("GitLab data", artifact=data)
        sender = data.get("user", {}).get("username", "unknown")
        sender_id = data.get("user", {}).get("id", "unknown")
+
+        # logic to ignore bot users (unlike Github, no direct flag for bot users in gitlab)
+        sender_name = data.get("user", {}).get("name", "unknown").lower()
+        if 'codium' in sender_name or 'bot_' in sender_name or 'bot-' in sender_name or '_bot' in sender_name or '-bot' in sender_name:
+            get_logger().info(f"Skipping bot user: {sender_name}")
+            return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"}))
+
        log_context["sender"] = sender
        if data.get('object_kind') == 'merge_request' and data['object_attributes'].get('action') in ['open', 'reopen']:
            url = data['object_attributes'].get('url')
            get_logger().info(f"New merge request: {url}")
            await _perform_commands_gitlab("pr_commands", PRAgent(), url, log_context)
-        elif data.get('object_kind') == 'note' and data['event_type'] == 'note': # comment on MR
+        elif data.get('object_kind') == 'note' and data.get('event_type') == 'note': # comment on MR
            if 'merge_request' in data:
                mr = data['merge_request']
                url = mr.get('url')
@ -102,6 +133,23 @@ async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request):
                    body = handle_ask_line(body, data)

                await handle_request(url, body, log_context, sender_id)
+        elif data.get('object_kind') == 'push' and data.get('event_name') == 'push':
+            commands_on_push = get_settings().get(f"gitlab.push_commands", {})
+            handle_push_trigger = get_settings().get(f"gitlab.handle_push_trigger", False)
+            if not commands_on_push or not handle_push_trigger:
+                get_logger().info("Push event, but no push commands found or push trigger is disabled")
+                return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"}))
+            try:
+                project_id = data['project_id']
+                commit_sha = data['checkout_sha']
+                url = await get_mr_url_from_commit_sha(commit_sha, gitlab_token, project_id)
+                if not url:
+                    get_logger().info(f"No MR found for commit: {commit_sha}")
+                    return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"}))
+                get_logger().debug(f'A push event has been received: {url}')
+                await _perform_commands_gitlab("push_commands", PRAgent(), url, log_context)
+            except Exception as e:
+                get_logger().error(f"Failed to handle push event: {e}")

    background_tasks.add_task(inner, request_json)
    end_time = datetime.now()
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@ -15,7 +15,7 @@ max_description_tokens = 500
 max_commits_tokens = 500
 max_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities.
 patch_extra_lines = 1
-secret_provider="google_cloud_storage"
+secret_provider=""
 cli_mode=false
 ai_disclaimer_title=""  # Pro feature, title for a collapsible disclaimer to AI outputs
 ai_disclaimer=""  # Pro feature, full text for the AI disclaimer
@ -28,6 +28,7 @@ require_score_review=false
 require_tests_review=true
 require_estimate_effort_to_review=true
 require_can_be_split_review=false
+require_security_review=true
 # soc2
 require_soc2_ticket=false
 soc2_ticket_prompt="Does the PR description include a link to ticket in a project management system (e.g., Jira, Asana, Trello, etc.) ?"
@ -178,6 +179,11 @@ pr_commands = [
    "/review --pr_reviewer.num_code_suggestions=0",
    "/improve",
 ]
+handle_push_trigger = false
+commands = [
+    "/describe",
+    "/review --pr_reviewer.num_code_suggestions=0",
+]

 [bitbucket_app]
 pr_commands = [
--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@ -69,7 +69,9 @@ class Review(BaseModel):
    insights_from_user_answers: str = Field(description="shortly summarize the insights you gained from the user's answers to the questions")
 {%- endif %}
    possible_issues: str = Field(description="Does this PR code introduce clear issues, bugs, or major performance concerns? If there are no apparent issues, respond with 'No'. If there are any issues, describe them briefly. Use bullet points if more than one issue. Be specific, and provide examples if possible. Start each bullet point with a short specific header, such as: "- Possible Bug: ...", etc.")
+{%- if require_security_review %}
    security_concerns: str = Field(description="does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible")
+{%- endif %}
 {%- if require_can_be_split_review %}
    can_be_split: List[SubPR] = Field(min_items=0, max_items=3, description="Can this PR, which contains {{ num_pr_files }} changed files in total, be divided into smaller sub-PRs with distinct tasks that can be reviewed and merged independently, regardless of the order ? Make sure that the sub-PRs are indeed independent, with no code dependencies between them, and that each sub-PR represent a meaningfull independent task. Output an empty list if the PR code does not needd to be split.")
 {%- endif %}
--- a/pr_agent/tools/pr_reviewer.py
+++ b/pr_agent/tools/pr_reviewer.py
@ -65,6 +65,7 @@ class PRReviewer:
            "require_tests": get_settings().pr_reviewer.require_tests_review,
            "require_estimate_effort_to_review": get_settings().pr_reviewer.require_estimate_effort_to_review,
            'require_can_be_split_review': get_settings().pr_reviewer.require_can_be_split_review,
+            'require_security_review': get_settings().pr_reviewer.require_security_review,
            'num_code_suggestions': get_settings().pr_reviewer.num_code_suggestions,
            'question_str': question_str,
            'answer_str': answer_str,
@ -375,7 +376,7 @@ class PRReviewer:
                    estimated_effort_number = int(estimated_effort.split(',')[0])
                    if 1 <= estimated_effort_number <= 5:  # 1, because ...
                        review_labels.append(f'Review effort [1-5]: {estimated_effort_number}')
-                if get_settings().pr_reviewer.enable_review_labels_security:
+                if get_settings().pr_reviewer.enable_review_labels_security and get_settings().pr_reviewer.require_security_review:
                    security_concerns = data['review']['security_concerns']  # yes, because ...
                    security_concerns_bool = 'yes' in security_concerns.lower() or 'true' in security_concerns.lower()
                    if security_concerns_bool: