diff --git a/docs/DESCRIBE.md b/docs/DESCRIBE.md index 05d1608a..3db21a13 100644 --- a/docs/DESCRIBE.md +++ b/docs/DESCRIBE.md @@ -27,18 +27,14 @@ Under the section 'pr_description', the [configuration file](./../pr_agent/setti - `extra_instructions`: Optional extra instructions to the tool. For example: "focus on the changes in the file X. Ignore change in ...". -#### Markers template +### Markers template markers enable to easily integrate user's content and auto-generated content, with a template-like mechanism. -- `use_description_markers`: if set to true, the tool will use markers template. It replaces every marker of the form `pr_agent:marker_name` with the relevant content. Default is false. - For example, if the PR original description was: ``` User content... -## PR Type: -pr_agent:pr_type ## PR Description: pr_agent:summary @@ -46,6 +42,21 @@ pr_agent:summary ## PR Walkthrough: pr_agent:walkthrough ``` -The marker `pr_agent:pr_type` will be replaced with the PR type, `pr_agent:summary` will be replaced with the PR summary, and `pr_agent:walkthrough` will be replaced with the PR walkthrough. +The marker `pr_agent:summary` will be replaced with the PR summary, and `pr_agent:walkthrough` will be replaced with the PR walkthrough. +##### Example: +``` + env: + pr_description.use_description_markers: 'true' +``` + + + +==> + + + +##### Configuration params: + +- `use_description_markers`: if set to true, the tool will use markers template. It replaces every marker of the form `pr_agent:marker_name` with the relevant content. Default is false. - `include_generated_by_header`: if set to true, the tool will add a dedicated header: 'Generated by PR Agent at ...' to any automatic content. Default is true. diff --git a/docs/IMPROVE.md b/docs/IMPROVE.md index 5c55e26f..f60cec53 100644 --- a/docs/IMPROVE.md +++ b/docs/IMPROVE.md @@ -31,4 +31,15 @@ Under the section 'pr_code_suggestions', the [configuration file](./../pr_agent/ - `num_code_suggestions_per_chunk`: number of code suggestions provided by the 'improve' tool, per chunk. Default is 8. - `rank_extended_suggestions`: if set to true, the tool will rank the suggestions, based on importance. Default is true. - `max_number_of_calls`: maximum number of chunks. Default is 5. -- `final_clip_factor`: factor to remove suggestions with low confidence. Default is 0.9. \ No newline at end of file +- `final_clip_factor`: factor to remove suggestions with low confidence. Default is 0.9. + + +#### A note on code suggestions quality + +- With current level of AI for code (GPT-4), mistakes can happen. Not all the suggestions will be perfect, and a user should not accept all of them automatically. + +- Suggestions are not meant to be [simplistic](./../pr_agent/settings/pr_code_suggestions_prompts.toml#L34). Instead, they aim to give deep feedback and raise questions, ideas and thoughts to the user, who can then use his judgment, experience, and understanding of the code base. + +- Recommended to use the 'extra_instructions' field to guide the model to suggestions that are more relevant to the specific needs of the project. + +- Best quality will be obtained by using 'improve --extended' mode. \ No newline at end of file diff --git a/docs/REVIEW.md b/docs/REVIEW.md index 71e36707..5445ccdd 100644 --- a/docs/REVIEW.md +++ b/docs/REVIEW.md @@ -43,4 +43,15 @@ The tool will first ask the author questions about the PR, and will guide the re - \ No newline at end of file + + + +#### A note on code suggestions quality + +- With current level of AI for code (GPT-4), mistakes can happen. Not all the suggestions will be perfect, and a user should not accept all of them automatically. + +- Suggestions are not meant to be [simplistic](./../pr_agent/settings/pr_reviewer_prompts.toml#L29). Instead, they aim to give deep feedback and raise questions, ideas and thoughts to the user, who can then use his judgment, experience, and understanding of the code base. + +- Recommended to use the 'extra_instructions' field to guide the model to suggestions that are more relevant to the specific needs of the project. + +- Unlike the 'review' feature, which does a lot of things, the ['improve --extended'](./IMPROVE.md) feature is dedicated only to suggestions, and usually gives better results. \ No newline at end of file diff --git a/pics/describe_markers_after.png b/pics/describe_markers_after.png new file mode 100644 index 00000000..933cf924 Binary files /dev/null and b/pics/describe_markers_after.png differ diff --git a/pics/describe_markers_before.png b/pics/describe_markers_before.png new file mode 100644 index 00000000..2c86653a Binary files /dev/null and b/pics/describe_markers_before.png differ diff --git a/pr_agent/secret_providers/google_cloud_storage_secret_provider.py b/pr_agent/secret_providers/google_cloud_storage_secret_provider.py index 18db5c4b..2dfb71f6 100644 --- a/pr_agent/secret_providers/google_cloud_storage_secret_provider.py +++ b/pr_agent/secret_providers/google_cloud_storage_secret_provider.py @@ -1,9 +1,8 @@ import ujson - from google.cloud import storage from pr_agent.config_loader import get_settings -from pr_agent.git_providers.gitlab_provider import logger +from pr_agent.log import get_logger from pr_agent.secret_providers.secret_provider import SecretProvider @@ -15,7 +14,7 @@ class GoogleCloudStorageSecretProvider(SecretProvider): self.bucket_name = get_settings().google_cloud_storage.bucket_name self.bucket = self.client.bucket(self.bucket_name) except Exception as e: - logger.error(f"Failed to initialize Google Cloud Storage Secret Provider: {e}") + get_logger().error(f"Failed to initialize Google Cloud Storage Secret Provider: {e}") raise e def get_secret(self, secret_name: str) -> str: @@ -23,7 +22,7 @@ class GoogleCloudStorageSecretProvider(SecretProvider): blob = self.bucket.blob(secret_name) return blob.download_as_string() except Exception as e: - logger.error(f"Failed to get secret {secret_name} from Google Cloud Storage: {e}") + get_logger().error(f"Failed to get secret {secret_name} from Google Cloud Storage: {e}") return "" def store_secret(self, secret_name: str, secret_value: str): @@ -31,5 +30,5 @@ class GoogleCloudStorageSecretProvider(SecretProvider): blob = self.bucket.blob(secret_name) blob.upload_from_string(secret_value) except Exception as e: - logger.error(f"Failed to store secret {secret_name} in Google Cloud Storage: {e}") + get_logger().error(f"Failed to store secret {secret_name} in Google Cloud Storage: {e}") raise e diff --git a/pr_agent/servers/bitbucket_app.py b/pr_agent/servers/bitbucket_app.py index 00f83cfe..e147fbdd 100644 --- a/pr_agent/servers/bitbucket_app.py +++ b/pr_agent/servers/bitbucket_app.py @@ -65,15 +65,17 @@ async def handle_manifest(request: Request, response: Response): @router.post("/webhook") async def handle_github_webhooks(background_tasks: BackgroundTasks, request: Request): - print(request.headers) + log_context = {"server_type": "bitbucket_app"} + get_logger().debug(request.headers) jwt_header = request.headers.get("authorization", None) if jwt_header: input_jwt = jwt_header.split(" ")[1] data = await request.json() - print(data) + get_logger().debug(data) async def inner(): try: owner = data["data"]["repository"]["owner"]["username"] + log_context["sender"] = owner secrets = json.loads(secret_provider.get_secret(owner)) shared_secret = secrets["shared_secret"] client_key = secrets["client_key"] @@ -85,11 +87,17 @@ async def handle_github_webhooks(background_tasks: BackgroundTasks, request: Req agent = PRAgent() if event == "pullrequest:created": pr_url = data["data"]["pullrequest"]["links"]["html"]["href"] - await agent.handle_request(pr_url, "review") + log_context["api_url"] = pr_url + log_context["event"] = "pull_request" + with get_logger().contextualize(**log_context): + await agent.handle_request(pr_url, "review") elif event == "pullrequest:comment_created": pr_url = data["data"]["pullrequest"]["links"]["html"]["href"] + log_context["api_url"] = pr_url + log_context["event"] = "comment" comment_body = data["data"]["comment"]["content"]["raw"] - await agent.handle_request(pr_url, comment_body) + with get_logger().contextualize(**log_context): + await agent.handle_request(pr_url, comment_body) except Exception as e: get_logger().error(f"Failed to handle webhook: {e}") background_tasks.add_task(inner) @@ -102,9 +110,10 @@ async def handle_github_webhooks(request: Request, response: Response): @router.post("/installed") async def handle_installed_webhooks(request: Request, response: Response): try: - print(request.headers) + get_logger().info("handle_installed_webhooks") + get_logger().info(request.headers) data = await request.json() - print(data) + get_logger().info(data) shared_secret = data["sharedSecret"] client_key = data["clientKey"] username = data["principal"]["username"] @@ -119,8 +128,10 @@ async def handle_installed_webhooks(request: Request, response: Response): @router.post("/uninstalled") async def handle_uninstalled_webhooks(request: Request, response: Response): + get_logger().info("handle_uninstalled_webhooks") + data = await request.json() - print(data) + get_logger().info(data) def start(): diff --git a/pr_agent/servers/gitlab_webhook.py b/pr_agent/servers/gitlab_webhook.py index 9cddc27e..63bf99ce 100644 --- a/pr_agent/servers/gitlab_webhook.py +++ b/pr_agent/servers/gitlab_webhook.py @@ -12,23 +12,33 @@ from starlette_context.middleware import RawContextMiddleware from pr_agent.agent.pr_agent import PRAgent from pr_agent.config_loader import get_settings, global_settings -from pr_agent.log import get_logger, setup_logger +from pr_agent.log import LoggingFormat, get_logger, setup_logger from pr_agent.secret_providers import get_secret_provider -setup_logger() +setup_logger(fmt=LoggingFormat.JSON) router = APIRouter() secret_provider = get_secret_provider() if get_settings().get("CONFIG.SECRET_PROVIDER") else None +def handle_request(background_tasks: BackgroundTasks, url: str, body: str, log_context: dict): + log_context["action"] = body + log_context["event"] = "pull_request" if body == "/review" else "comment" + log_context["api_url"] = url + with get_logger().contextualize(**log_context): + background_tasks.add_task(PRAgent().handle_request, url, body) + + @router.post("/webhook") async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request): + log_context = {"server_type": "gitlab_app"} if request.headers.get("X-Gitlab-Token") and secret_provider: request_token = request.headers.get("X-Gitlab-Token") secret = secret_provider.get_secret(request_token) try: secret_dict = json.loads(secret) gitlab_token = secret_dict["gitlab_token"] + log_context["sender"] = secret_dict["id"] context["settings"] = copy.deepcopy(global_settings) context["settings"].gitlab.personal_access_token = gitlab_token except Exception as e: @@ -48,13 +58,13 @@ async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request): if data.get('object_kind') == 'merge_request' and data['object_attributes'].get('action') in ['open', 'reopen']: get_logger().info(f"A merge request has been opened: {data['object_attributes'].get('title')}") url = data['object_attributes'].get('url') - background_tasks.add_task(PRAgent().handle_request, url, "/review") + handle_request(background_tasks, url, "/review") elif data.get('object_kind') == 'note' and data['event_type'] == 'note': if 'merge_request' in data: mr = data['merge_request'] url = mr.get('url') body = data.get('object_attributes', {}).get('note') - background_tasks.add_task(PRAgent().handle_request, url, body) + handle_request(background_tasks, url, body) return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"})) diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 9a03055f..6f44bc53 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -31,6 +31,7 @@ publish_labels=true publish_description_as_comment=false add_original_user_description=false keep_original_user_title=false +use_bullet_points=true extra_instructions = "" # markers use_description_markers=false diff --git a/pr_agent/settings/language_extensions.toml b/pr_agent/settings/language_extensions.toml index 0cc31f0e..eadb80c8 100644 --- a/pr_agent/settings/language_extensions.toml +++ b/pr_agent/settings/language_extensions.toml @@ -433,3 +433,6 @@ reStructuredText = [".rst", ".rest", ".rest.txt", ".rst.txt", ] wisp = [".wisp", ] xBase = [".prg", ".prw", ] +[docs_blacklist_extensions] +# Disable docs for these extensions of text files and scripts that are not programming languages of function, classes and methods +docs_blacklist = ['sql', 'txt', 'yaml', 'json', 'xml', 'md', 'rst', 'rest', 'rest.txt', 'rst.txt', 'mdpolicy', 'mdown', 'markdown', 'mdwn', 'mkd', 'mkdn', 'mkdown', 'sh'] \ No newline at end of file diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml index c2c8e654..7d1621b0 100644 --- a/pr_agent/settings/pr_description_prompts.toml +++ b/pr_agent/settings/pr_description_prompts.toml @@ -31,7 +31,8 @@ PR Type: - Other PR Description: type: string - description: an informative and concise description of the PR + description: an informative and concise description of the PR. + {%- if use_bullet_points %} Use bullet points. {% endif %} PR Main Files Walkthrough: type: array maxItems: 10 diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index 657027af..cb49b5d0 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -25,7 +25,7 @@ code line that already existed in the file.... The review should focus on new code added in the PR (lines starting with '+'), and not on code that already existed in the file (lines starting with '-', or without prefix). {%- if num_code_suggestions > 0 %} -- Provide up to {{ num_code_suggestions }} code suggestions. +- Provide up to {{ num_code_suggestions }} code suggestions. Try to provide diverse and insightful suggestions. - Focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningful code improvements, like performance, vulnerability, modularity, and best practices. - Avoid making suggestions that have already been implemented in the PR code. For example, if you want to add logs, or change a variable to const, or anything else, make sure it isn't already in the PR code. - Don't suggest to add docstring, type hints, or comments. @@ -99,10 +99,10 @@ PR Feedback: General suggestions: type: string description: |- - General suggestions and feedback for the contributors and maintainers of - this PR. May include important suggestions for the overall structure, - primary purpose, best practices, critical bugs, and other aspects of the - PR. Don't address PR title and description, or lack of tests. Explain your suggestions. + General suggestions and feedback for the contributors and maintainers of this PR. + May include important suggestions for the overall structure, + primary purpose, best practices, critical bugs, and other aspects of the PR. + Don't address PR title and description, or lack of tests. Explain your suggestions. {%- if num_code_suggestions > 0 %} Code feedback: type: array @@ -115,11 +115,10 @@ PR Feedback: suggestion: type: string description: |- - a concrete suggestion for meaningfully improving the new PR code. Also - describe how, specifically, the suggestion can be applied to new PR - code. Add tags with importance measure that matches each suggestion - ('important' or 'medium'). Do not make suggestions for updating or - adding docstrings, renaming PR title and description, or linter like. + a concrete suggestion for meaningfully improving the new PR code. + Also describe how, specifically, the suggestion can be applied to new PR code. + Add tags with importance measure that matches each suggestion ('important' or 'medium'). + Do not make suggestions for updating or adding docstrings, renaming PR title and description, or linter like. relevant line: type: string description: |- diff --git a/pr_agent/tools/pr_add_docs.py b/pr_agent/tools/pr_add_docs.py index c6499bb5..eec75b9c 100644 --- a/pr_agent/tools/pr_add_docs.py +++ b/pr_agent/tools/pr_add_docs.py @@ -65,6 +65,11 @@ class PRAddDocs: async def _prepare_prediction(self, model: str): get_logger().info('Getting PR diff...') + + # Disable adding docs to scripts and other non-relevant text files + from pr_agent.algo.language_handler import bad_extensions + bad_extensions += get_settings().docs_blacklist_extensions.docs_blacklist + self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model, diff --git a/pr_agent/tools/pr_description.py b/pr_agent/tools/pr_description.py index d74ee665..c1bd03fd 100644 --- a/pr_agent/tools/pr_description.py +++ b/pr_agent/tools/pr_description.py @@ -40,6 +40,7 @@ class PRDescription: "description": self.git_provider.get_pr_description(full=False), "language": self.main_pr_language, "diff": "", # empty diff for initial calculation + "use_bullet_points": get_settings().pr_description.use_bullet_points, "extra_instructions": get_settings().pr_description.extra_instructions, "commit_messages_str": self.git_provider.get_commit_messages() } @@ -185,6 +186,11 @@ class PRDescription: else: ai_header = "" + ai_type = self.data.get('PR Type') + if ai_type and not re.search(r'', body): + pr_type = f"{ai_header}{ai_type}" + body = body.replace('pr_agent:type', pr_type) + ai_summary = self.data.get('PR Description') if ai_summary and not re.search(r'', body): summary = f"{ai_header}{ai_summary}"