Merge remote-tracking branch 'origin/main'

2025-07-21 04:50:39 +08:00 · 2023-10-19 17:10:30 +03:00
parent 5daaaf2c1d e8f207691e
commit abbce60f18
14 changed files with 103 additions and 35 deletions
--- a/docs/DESCRIBE.md
+++ b/docs/DESCRIBE.md
@ -27,18 +27,14 @@ Under the section 'pr_description', the [configuration file](./../pr_agent/setti

 - `extra_instructions`: Optional extra instructions to the tool. For example: "focus on the changes in the file X. Ignore change in ...".

-#### Markers template
+### Markers template

 markers enable to easily integrate user's content and auto-generated content, with a template-like mechanism.

- `use_description_markers`: if set to true, the tool will use markers template. It replaces every marker of the form `pr_agent:marker_name` with the relevant content. Default is false.
-
 For example, if the PR original description was:
 ```
 User content...

-## PR Type:
-pr_agent:pr_type

 ## PR Description:
 pr_agent:summary
@ -46,6 +42,21 @@ pr_agent:summary
 ## PR Walkthrough:
 pr_agent:walkthrough
 ```
-The marker `pr_agent:pr_type` will be replaced with the PR type, `pr_agent:summary` will be replaced with the PR summary, and `pr_agent:walkthrough` will be replaced with the PR walkthrough.
+The marker `pr_agent:summary` will be replaced with the PR summary, and `pr_agent:walkthrough` will be replaced with the PR walkthrough.

+##### Example:
+```
+        env:
+          pr_description.use_description_markers: 'true'
+```          
+
+<kbd><img src=./../pics/describe_markers_before.png width="768"></kbd>
+
+==>
+
+<kbd><img src=./../pics/describe_markers_after.png width="768"></kbd>
+
+##### Configuration params:
+
+- `use_description_markers`: if set to true, the tool will use markers template. It replaces every marker of the form `pr_agent:marker_name` with the relevant content. Default is false.
 - `include_generated_by_header`: if set to true, the tool will add a dedicated header: 'Generated by PR Agent at ...' to any automatic content. Default is true.
--- a/docs/IMPROVE.md
+++ b/docs/IMPROVE.md
@ -31,4 +31,15 @@ Under the section 'pr_code_suggestions', the [configuration file](./../pr_agent/
 - `num_code_suggestions_per_chunk`: number of code suggestions provided by the 'improve' tool, per chunk. Default is 8.
 - `rank_extended_suggestions`: if set to true, the tool will rank the suggestions, based on importance. Default is true.
 - `max_number_of_calls`: maximum number of chunks. Default is 5.
- `final_clip_factor`: factor to remove suggestions with low confidence. Default is 0.9.
+- `final_clip_factor`: factor to remove suggestions with low confidence. Default is 0.9.
+
+
+#### A note on code suggestions quality
+
+- With current level of AI for code (GPT-4), mistakes can happen. Not all the suggestions will be perfect, and a user should not accept all of them automatically.
+
+- Suggestions are not meant to be [simplistic](./../pr_agent/settings/pr_code_suggestions_prompts.toml#L34). Instead, they aim to give deep feedback and raise questions, ideas and thoughts to the user, who can then use his judgment, experience, and understanding of the code base.
+
+- Recommended to use the 'extra_instructions' field to guide the model to suggestions that are more relevant to the specific needs of the project.
+
+- Best quality will be obtained by using 'improve --extended' mode.
--- a/docs/REVIEW.md
+++ b/docs/REVIEW.md
@ -43,4 +43,15 @@ The tool will first ask the author questions about the PR, and will guide the re

 <kbd><img src=./../pics/reflection_questions.png width="768"></kbd>
 <kbd><img src=./../pics/reflection_answers.png width="768"></kbd>
-<kbd><img src=./../pics/reflection_insights.png width="768"></kbd>
+<kbd><img src=./../pics/reflection_insights.png width="768"></kbd>
+
+
+#### A note on code suggestions quality
+
+- With current level of AI for code (GPT-4), mistakes can happen. Not all the suggestions will be perfect, and a user should not accept all of them automatically.
+
+- Suggestions are not meant to be [simplistic](./../pr_agent/settings/pr_reviewer_prompts.toml#L29). Instead, they aim to give deep feedback and raise questions, ideas and thoughts to the user, who can then use his judgment, experience, and understanding of the code base.
+
+- Recommended to use the 'extra_instructions' field to guide the model to suggestions that are more relevant to the specific needs of the project.
+
+- Unlike the 'review' feature, which does a lot of things, the ['improve --extended'](./IMPROVE.md) feature is dedicated only to suggestions, and usually gives better results.
--- a/pics/describe_markers_after.png
+++ b/pics/describe_markers_after.png
--- a/pics/describe_markers_before.png
+++ b/pics/describe_markers_before.png
--- a/pr_agent/secret_providers/google_cloud_storage_secret_provider.py
+++ b/pr_agent/secret_providers/google_cloud_storage_secret_provider.py
@ -1,9 +1,8 @@
 import ujson
-
 from google.cloud import storage

 from pr_agent.config_loader import get_settings
-from pr_agent.git_providers.gitlab_provider import logger
+from pr_agent.log import get_logger
 from pr_agent.secret_providers.secret_provider import SecretProvider


@ -15,7 +14,7 @@ class GoogleCloudStorageSecretProvider(SecretProvider):
            self.bucket_name = get_settings().google_cloud_storage.bucket_name
            self.bucket = self.client.bucket(self.bucket_name)
        except Exception as e:
-            logger.error(f"Failed to initialize Google Cloud Storage Secret Provider: {e}")
+            get_logger().error(f"Failed to initialize Google Cloud Storage Secret Provider: {e}")
            raise e

    def get_secret(self, secret_name: str) -> str:
@ -23,7 +22,7 @@ class GoogleCloudStorageSecretProvider(SecretProvider):
            blob = self.bucket.blob(secret_name)
            return blob.download_as_string()
        except Exception as e:
-            logger.error(f"Failed to get secret {secret_name} from Google Cloud Storage: {e}")
+            get_logger().error(f"Failed to get secret {secret_name} from Google Cloud Storage: {e}")
            return ""

    def store_secret(self, secret_name: str, secret_value: str):
@ -31,5 +30,5 @@ class GoogleCloudStorageSecretProvider(SecretProvider):
            blob = self.bucket.blob(secret_name)
            blob.upload_from_string(secret_value)
        except Exception as e:
-            logger.error(f"Failed to store secret {secret_name} in Google Cloud Storage: {e}")
+            get_logger().error(f"Failed to store secret {secret_name} in Google Cloud Storage: {e}")
            raise e
--- a/pr_agent/servers/bitbucket_app.py
+++ b/pr_agent/servers/bitbucket_app.py
@ -65,15 +65,17 @@ async def handle_manifest(request: Request, response: Response):

@router.post("/webhook")
 async def handle_github_webhooks(background_tasks: BackgroundTasks, request: Request):
-    print(request.headers)
+    log_context = {"server_type": "bitbucket_app"}
+    get_logger().debug(request.headers)
    jwt_header = request.headers.get("authorization", None)
    if jwt_header:
        input_jwt = jwt_header.split(" ")[1]
    data = await request.json()
-    print(data)
+    get_logger().debug(data)
    async def inner():
        try:
            owner = data["data"]["repository"]["owner"]["username"]
+            log_context["sender"] = owner
            secrets = json.loads(secret_provider.get_secret(owner))
            shared_secret = secrets["shared_secret"]
            client_key = secrets["client_key"]
@ -85,11 +87,17 @@ async def handle_github_webhooks(background_tasks: BackgroundTasks, request: Req
            agent = PRAgent()
            if event == "pullrequest:created":
                pr_url = data["data"]["pullrequest"]["links"]["html"]["href"]
-                await agent.handle_request(pr_url, "review")
+                log_context["api_url"] = pr_url
+                log_context["event"] = "pull_request"
+                with get_logger().contextualize(**log_context):
+                    await agent.handle_request(pr_url, "review")
            elif event == "pullrequest:comment_created":
                pr_url = data["data"]["pullrequest"]["links"]["html"]["href"]
+                log_context["api_url"] = pr_url
+                log_context["event"] = "comment"
                comment_body = data["data"]["comment"]["content"]["raw"]
-                await agent.handle_request(pr_url, comment_body)
+                with get_logger().contextualize(**log_context):
+                    await agent.handle_request(pr_url, comment_body)
        except Exception as e:
            get_logger().error(f"Failed to handle webhook: {e}")
    background_tasks.add_task(inner)
@ -102,9 +110,10 @@ async def handle_github_webhooks(request: Request, response: Response):
@router.post("/installed")
 async def handle_installed_webhooks(request: Request, response: Response):
    try:
-        print(request.headers)
+        get_logger().info("handle_installed_webhooks")
+        get_logger().info(request.headers)
        data = await request.json()
-        print(data)
+        get_logger().info(data)
        shared_secret = data["sharedSecret"]
        client_key = data["clientKey"]
        username = data["principal"]["username"]
@ -119,8 +128,10 @@ async def handle_installed_webhooks(request: Request, response: Response):

@router.post("/uninstalled")
 async def handle_uninstalled_webhooks(request: Request, response: Response):
+    get_logger().info("handle_uninstalled_webhooks")
+
    data = await request.json()
-    print(data)
+    get_logger().info(data)


 def start():
--- a/pr_agent/servers/gitlab_webhook.py
+++ b/pr_agent/servers/gitlab_webhook.py
@ -12,23 +12,33 @@ from starlette_context.middleware import RawContextMiddleware

 from pr_agent.agent.pr_agent import PRAgent
 from pr_agent.config_loader import get_settings, global_settings
-from pr_agent.log import get_logger, setup_logger
+from pr_agent.log import LoggingFormat, get_logger, setup_logger
 from pr_agent.secret_providers import get_secret_provider

-setup_logger()
+setup_logger(fmt=LoggingFormat.JSON)
 router = APIRouter()

 secret_provider = get_secret_provider() if get_settings().get("CONFIG.SECRET_PROVIDER") else None


+def handle_request(background_tasks: BackgroundTasks, url: str, body: str, log_context: dict):
+    log_context["action"] = body
+    log_context["event"] = "pull_request" if body == "/review" else "comment"
+    log_context["api_url"] = url
+    with get_logger().contextualize(**log_context):
+        background_tasks.add_task(PRAgent().handle_request, url, body)
+
+
@router.post("/webhook")
 async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request):
+    log_context = {"server_type": "gitlab_app"}
    if request.headers.get("X-Gitlab-Token") and secret_provider:
        request_token = request.headers.get("X-Gitlab-Token")
        secret = secret_provider.get_secret(request_token)
        try:
            secret_dict = json.loads(secret)
            gitlab_token = secret_dict["gitlab_token"]
+            log_context["sender"] = secret_dict["id"]
            context["settings"] = copy.deepcopy(global_settings)
            context["settings"].gitlab.personal_access_token = gitlab_token
        except Exception as e:
@ -48,13 +58,13 @@ async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request):
    if data.get('object_kind') == 'merge_request' and data['object_attributes'].get('action') in ['open', 'reopen']:
        get_logger().info(f"A merge request has been opened: {data['object_attributes'].get('title')}")
        url = data['object_attributes'].get('url')
-        background_tasks.add_task(PRAgent().handle_request, url, "/review")
+        handle_request(background_tasks, url, "/review")
    elif data.get('object_kind') == 'note' and data['event_type'] == 'note':
        if 'merge_request' in data:
            mr = data['merge_request']
            url = mr.get('url')
            body = data.get('object_attributes', {}).get('note')
-            background_tasks.add_task(PRAgent().handle_request, url, body)
+            handle_request(background_tasks, url, body)
    return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"}))


--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@ -31,6 +31,7 @@ publish_labels=true
 publish_description_as_comment=false
 add_original_user_description=false
 keep_original_user_title=false
+use_bullet_points=true
 extra_instructions = ""
 # markers
 use_description_markers=false
--- a/pr_agent/settings/language_extensions.toml
+++ b/pr_agent/settings/language_extensions.toml
@ -433,3 +433,6 @@ reStructuredText = [".rst", ".rest", ".rest.txt", ".rst.txt", ]
 wisp = [".wisp", ]
 xBase = [".prg", ".prw", ]

+[docs_blacklist_extensions]
+# Disable docs for these extensions of text files and scripts that are not programming languages of function, classes and methods
+docs_blacklist = ['sql', 'txt', 'yaml', 'json', 'xml', 'md', 'rst', 'rest', 'rest.txt', 'rst.txt', 'mdpolicy', 'mdown', 'markdown', 'mdwn', 'mkd', 'mkdn', 'mkdown', 'sh']
--- a/pr_agent/settings/pr_description_prompts.toml
+++ b/pr_agent/settings/pr_description_prompts.toml
@ -31,7 +31,8 @@ PR Type:
      - Other
 PR Description:
  type: string
-  description: an informative and concise description of the PR
+  description: an informative and concise description of the PR.
+  {%- if use_bullet_points %} Use bullet points. {% endif %}
 PR Main Files Walkthrough:
  type: array
  maxItems: 10
--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@ -25,7 +25,7 @@ code line that already existed in the file....
 The review should focus on new code added in the PR (lines starting with '+'), and not on code that already existed in the file (lines starting with '-', or without prefix).

 {%- if num_code_suggestions > 0 %}
- Provide up to {{ num_code_suggestions }} code suggestions.
+- Provide up to {{ num_code_suggestions }} code suggestions. Try to provide diverse and insightful suggestions.
 - Focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningful code improvements, like performance, vulnerability, modularity, and best practices.
 - Avoid making suggestions that have already been implemented in the PR code. For example, if you want to add logs, or change a variable to const, or anything else, make sure it isn't already in the PR code.
 - Don't suggest to add docstring, type hints, or comments.
@ -99,10 +99,10 @@ PR Feedback:
  General suggestions:
    type: string
    description: |-
-      General suggestions and feedback for the contributors and maintainers of
-      this PR. May include important suggestions for the overall structure,
-      primary purpose, best practices, critical bugs, and other aspects of the
-      PR. Don't address PR title and description, or lack of tests. Explain your suggestions.
+      General suggestions and feedback for the contributors and maintainers of this PR.
+      May include important suggestions for the overall structure,
+      primary purpose, best practices, critical bugs, and other aspects of the PR.
+      Don't address PR title and description, or lack of tests. Explain your suggestions.
 {%- if num_code_suggestions > 0 %}
  Code feedback:
    type: array
@ -115,11 +115,10 @@ PR Feedback:
      suggestion:
        type: string
        description: |-
-          a concrete suggestion for meaningfully improving the new PR code. Also
-          describe how, specifically, the suggestion can be applied to new PR
-          code. Add tags with importance measure that matches each suggestion
-          ('important' or 'medium'). Do not make suggestions for updating or
-          adding docstrings, renaming PR title and description, or linter like.
+          a concrete suggestion for meaningfully improving the new PR code.
+          Also describe how, specifically, the suggestion can be applied to new PR code.
+          Add tags with importance measure that matches each suggestion ('important' or 'medium').
+          Do not make suggestions for updating or adding docstrings, renaming PR title and description, or linter like.
      relevant line:
        type: string
        description: |-
--- a/pr_agent/tools/pr_add_docs.py
+++ b/pr_agent/tools/pr_add_docs.py
@ -65,6 +65,11 @@ class PRAddDocs:

    async def _prepare_prediction(self, model: str):
        get_logger().info('Getting PR diff...')
+
+        # Disable adding docs to scripts and other non-relevant text files
+        from pr_agent.algo.language_handler import bad_extensions
+        bad_extensions += get_settings().docs_blacklist_extensions.docs_blacklist
+
        self.patches_diff = get_pr_diff(self.git_provider,
                                        self.token_handler,
                                        model,
--- a/pr_agent/tools/pr_description.py
+++ b/pr_agent/tools/pr_description.py
@ -40,6 +40,7 @@ class PRDescription:
            "description": self.git_provider.get_pr_description(full=False),
            "language": self.main_pr_language,
            "diff": "",  # empty diff for initial calculation
+            "use_bullet_points": get_settings().pr_description.use_bullet_points,
            "extra_instructions": get_settings().pr_description.extra_instructions,
            "commit_messages_str": self.git_provider.get_commit_messages()
        }
@ -185,6 +186,11 @@ class PRDescription:
        else:
            ai_header = ""

+        ai_type = self.data.get('PR Type')
+        if ai_type and not re.search(r'<!--\s*pr_agent:type\s*-->', body):
+            pr_type = f"{ai_header}{ai_type}"
+            body = body.replace('pr_agent:type', pr_type)
+
        ai_summary = self.data.get('PR Description')
        if ai_summary and not re.search(r'<!--\s*pr_agent:summary\s*-->', body):
            summary = f"{ai_header}{ai_summary}"