diff --git a/.pr_agent.toml b/.pr_agent.toml index 6937b547..7fc8ea8a 100644 --- a/.pr_agent.toml +++ b/.pr_agent.toml @@ -1,2 +1,6 @@ [pr_reviewer] -enable_review_labels_effort = true \ No newline at end of file +enable_review_labels_effort = true + + +[pr_code_suggestions] +summarize=true diff --git a/INSTALL.md b/INSTALL.md index 0e13bcb7..b766bdc8 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -25,6 +25,7 @@ There are several ways to use PR-Agent: **BitBucket specific methods** - [Run as a Bitbucket Pipeline](INSTALL.md#run-as-a-bitbucket-pipeline) - [Run on a hosted app](INSTALL.md#run-on-a-hosted-bitbucket-app) +- [Bitbucket server and data center](INSTALL.md#bitbucket-server-and-data-center) --- ### Use Docker image (no installation required) @@ -418,4 +419,41 @@ Note that comments on a PR are not supported in Bitbucket Pipeline. Please contact or visit [CodiumAI pricing page](https://www.codium.ai/pricing/) if you're interested in a hosted BitBucket app solution that provides full functionality including PR reviews and comment handling. It's based on the [bitbucket_app.py](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/git_providers/bitbucket_provider.py) implementation. +### Bitbucket Server and Data Center + +Login into your on-prem instance of Bitbucket with your service account username and password. +Navigate to `Manage account`, `HTTP Access tokens`, `Create Token`. +Generate the token and add it to .secret.toml under `bitbucket_server` section + +```toml +[bitbucket_server] +bearer_token = "" +``` + +#### Run it as CLI + +Modify `configuration.toml`: + +```toml +git_provider="bitbucket_server" +``` + +and pass the Pull request URL: +```shell +python cli.py --pr_url https://git.onpreminstanceofbitbucket.com/projects/PROJECT/repos/REPO/pull-requests/1 review +``` + +#### Run it as service + +To run pr-agent as webhook, build the docker image: +``` +docker build . -t codiumai/pr-agent:bitbucket_server_webhook --target bitbucket_server_webhook -f docker/Dockerfile +docker push codiumai/pr-agent:bitbucket_server_webhook # Push to your Docker repository +``` + +Navigate to `Projects` or `Repositories`, `Settings`, `Webhooks`, `Create Webhook`. +Fill the name and URL, Authentication None select the Pull Request Opened checkbox to receive that event as webhook. + +The url should be ends with `/webhook`, example: https://domain.com/webhook + ======= diff --git a/Usage.md b/Usage.md index d4a7b230..95707773 100644 --- a/Usage.md +++ b/Usage.md @@ -117,7 +117,7 @@ Any configuration value in [configuration file](pr_agent/settings/configuration. ### Working with GitHub App When running PR-Agent from GitHub App, the default [configuration file](pr_agent/settings/configuration.toml) from a pre-built docker will be initially loaded. -By uploading a local `.pr_agent.toml` file to the root of the repo's main branch, you can edit and customize any configuration parameter. +By uploading a local `.pr_agent.toml` file to the root of the repo's main branch, you can edit and customize any configuration parameter. Note that you need to upload `.pr_agent.toml` prior to creating a PR, in order for the configuration to take effect. For example, if you set in `.pr_agent.toml`: diff --git a/docker/Dockerfile b/docker/Dockerfile index 951f846c..0f669e89 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -14,6 +14,10 @@ FROM base as bitbucket_app ADD pr_agent pr_agent CMD ["python", "pr_agent/servers/bitbucket_app.py"] +FROM base as bitbucket_server_webhook +ADD pr_agent pr_agent +CMD ["python", "pr_agent/servers/bitbucket_server_webhook.py"] + FROM base as github_polling ADD pr_agent pr_agent CMD ["python", "pr_agent/servers/github_polling.py"] diff --git a/docs/IMPROVE.md b/docs/IMPROVE.md index f60cec53..81be15e5 100644 --- a/docs/IMPROVE.md +++ b/docs/IMPROVE.md @@ -1,6 +1,6 @@ # Improve Tool -The `improve` tool scans the PR code changes, and automatically generate committable suggestions for improving the PR code. +The `improve` tool scans the PR code changes, and automatically generates committable suggestions for improving the PR code. It can be invoked manually by commenting on any PR: ``` /improve @@ -17,7 +17,7 @@ An extended mode, which does not involve PR Compression and provides more compre /improve --extended ``` Note that the extended mode divides the PR code changes into chunks, up to the token limits, where each chunk is handled separately (multiple calls to GPT-4). -Hence, the total number of suggestions is proportional to the number of chunks, i.e. the size of the PR. +Hence, the total number of suggestions is proportional to the number of chunks, i.e., the size of the PR. ### Configuration options @@ -33,13 +33,23 @@ Under the section 'pr_code_suggestions', the [configuration file](./../pr_agent/ - `max_number_of_calls`: maximum number of chunks. Default is 5. - `final_clip_factor`: factor to remove suggestions with low confidence. Default is 0.9. +#### summarize mode +- `summarize`: if set to true, the tool will present the code suggestions in a compact way. Default is false. + +In this mode, instead of presenting committable suggestions, the different suggestions will be combined into a single compact comment, with significantly smaller PR footprint. + +For example: + +`/improve --pr_code_suggestions.summarize=true` + + #### A note on code suggestions quality -- With current level of AI for code (GPT-4), mistakes can happen. Not all the suggestions will be perfect, and a user should not accept all of them automatically. +- With the current level of AI for code (GPT-4), mistakes can happen. Not all the suggestions will be perfect, and a user should not accept all of them automatically. - Suggestions are not meant to be [simplistic](./../pr_agent/settings/pr_code_suggestions_prompts.toml#L34). Instead, they aim to give deep feedback and raise questions, ideas and thoughts to the user, who can then use his judgment, experience, and understanding of the code base. - Recommended to use the 'extra_instructions' field to guide the model to suggestions that are more relevant to the specific needs of the project. -- Best quality will be obtained by using 'improve --extended' mode. \ No newline at end of file +- Best quality will be obtained by using 'improve --extended' mode. diff --git a/pics/improved_summerize_closed.png b/pics/improved_summerize_closed.png new file mode 100644 index 00000000..f33d87d3 Binary files /dev/null and b/pics/improved_summerize_closed.png differ diff --git a/pics/improved_summerize_open.png b/pics/improved_summerize_open.png new file mode 100644 index 00000000..a2bd35c2 Binary files /dev/null and b/pics/improved_summerize_open.png differ diff --git a/pr_agent/algo/language_handler.py b/pr_agent/algo/language_handler.py index 66e85025..b4c02bee 100644 --- a/pr_agent/algo/language_handler.py +++ b/pr_agent/algo/language_handler.py @@ -3,8 +3,7 @@ from typing import Dict from pr_agent.config_loader import get_settings -language_extension_map_org = get_settings().language_extension_map_org -language_extension_map = {k.lower(): v for k, v in language_extension_map_org.items()} + # Bad Extensions, source: https://github.com/EleutherAI/github-downloader/blob/345e7c4cbb9e0dc8a0615fd995a08bf9d73b3fe6/download_repo_text.py # noqa: E501 bad_extensions = get_settings().bad_extensions.default @@ -29,6 +28,8 @@ def sort_files_by_main_languages(languages: Dict, files: list): # languages_sorted = sorted(languages, key=lambda x: x[1], reverse=True) # get all extensions for the languages main_extensions = [] + language_extension_map_org = get_settings().language_extension_map_org + language_extension_map = {k.lower(): v for k, v in language_extension_map_org.items()} for language in languages_sorted_list: if language.lower() in language_extension_map: main_extensions.append(language_extension_map[language.lower()]) diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index 6063dece..4c1352f0 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -10,7 +10,7 @@ from github import RateLimitExceededException from pr_agent.algo.git_patch_processing import convert_to_hunks_with_lines_numbers, extend_patch, handle_patch_deletions from pr_agent.algo.language_handler import sort_files_by_main_languages from pr_agent.algo.file_filter import filter_ignored -from pr_agent.algo.token_handler import TokenHandler, get_token_encoder +from pr_agent.algo.token_handler import TokenHandler from pr_agent.algo.utils import get_max_tokens from pr_agent.config_loader import get_settings from pr_agent.git_providers.git_provider import FilePatchInfo, GitProvider, EDIT_TYPE @@ -326,35 +326,6 @@ def find_line_number_of_relevant_line_in_file(diff_files: List[FilePatchInfo], return position, absolute_position -def clip_tokens(text: str, max_tokens: int) -> str: - """ - Clip the number of tokens in a string to a maximum number of tokens. - - Args: - text (str): The string to clip. - max_tokens (int): The maximum number of tokens allowed in the string. - - Returns: - str: The clipped string. - """ - if not text: - return text - - try: - encoder = get_token_encoder() - num_input_tokens = len(encoder.encode(text)) - if num_input_tokens <= max_tokens: - return text - num_chars = len(text) - chars_per_token = num_chars / num_input_tokens - num_output_chars = int(chars_per_token * max_tokens) - clipped_text = text[:num_output_chars] - return clipped_text - except Exception as e: - get_logger().warning(f"Failed to clip tokens: {e}") - return text - - def get_pr_multi_diffs(git_provider: GitProvider, token_handler: TokenHandler, model: str, diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index d3377dee..7a6e666c 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -11,6 +11,7 @@ import yaml from starlette_context import context from pr_agent.algo import MAX_TOKENS +from pr_agent.algo.token_handler import get_token_encoder from pr_agent.config_loader import get_settings, global_settings from pr_agent.log import get_logger @@ -57,7 +58,8 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool=True) -> str: emoji = emojis.get(key, "") if key.lower() == 'code feedback': if gfm_supported: - markdown_text += f"\n\n- **
{ emoji } Code feedback:**\n\n" + markdown_text += f"\n\n- " + markdown_text += f"
{ emoji } Code feedback:\n\n" else: markdown_text += f"\n\n- **{emoji} Code feedback:**\n\n" else: @@ -98,9 +100,9 @@ def parse_code_suggestion(code_suggestions: dict, gfm_supported: bool=True) -> s markdown_text += f" - **{code_key}:**\n{code_str_indented}\n" else: if "relevant file" in sub_key.lower(): - markdown_text += f"\n - **{sub_key}:** {sub_value}\n" + markdown_text += f"\n - **{sub_key}:** {sub_value} \n" else: - markdown_text += f" **{sub_key}:** {sub_value}\n" + markdown_text += f" **{sub_key}:** {sub_value} \n" if not gfm_supported: if "relevant line" not in sub_key.lower(): # nicer presentation # markdown_text = markdown_text.rstrip('\n') + "\\\n" # works for gitlab @@ -338,12 +340,15 @@ def set_custom_labels(variables): labels_list = f" - {labels_list}" if labels_list else "" variables["custom_labels"] = labels_list return - final_labels = "" + #final_labels = "" + #for k, v in labels.items(): + # final_labels += f" - {k} ({v['description']})\n" + #variables["custom_labels"] = final_labels + #variables["custom_labels_examples"] = f" - {list(labels.keys())[0]}" + variables["custom_labels_class"] = "class Label(str, Enum):" for k, v in labels.items(): - final_labels += f" - {k} ({v['description']})\n" - variables["custom_labels"] = final_labels - variables["custom_labels_examples"] = f" - {list(labels.keys())[0]}" - + description = v['description'].strip('\n').replace('\n', '\\n') + variables["custom_labels_class"] += f"\n {k.lower().replace(' ', '_')} = '{k}' # {description}" def get_user_labels(current_labels: List[str] = None): """ @@ -375,3 +380,34 @@ def get_max_tokens(model): max_tokens_model = min(settings.config.max_model_tokens, max_tokens_model) # get_logger().debug(f"limiting max tokens to {max_tokens_model}") return max_tokens_model + + +def clip_tokens(text: str, max_tokens: int, add_three_dots=True) -> str: + """ + Clip the number of tokens in a string to a maximum number of tokens. + + Args: + text (str): The string to clip. + max_tokens (int): The maximum number of tokens allowed in the string. + add_three_dots (bool, optional): A boolean indicating whether to add three dots at the end of the clipped + Returns: + str: The clipped string. + """ + if not text: + return text + + try: + encoder = get_token_encoder() + num_input_tokens = len(encoder.encode(text)) + if num_input_tokens <= max_tokens: + return text + num_chars = len(text) + chars_per_token = num_chars / num_input_tokens + num_output_chars = int(chars_per_token * max_tokens) + clipped_text = text[:num_output_chars] + if add_three_dots: + clipped_text += "...(truncated)" + return clipped_text + except Exception as e: + get_logger().warning(f"Failed to clip tokens: {e}") + return text diff --git a/pr_agent/cli.py b/pr_agent/cli.py index 91d4889c..5a6a6640 100644 --- a/pr_agent/cli.py +++ b/pr_agent/cli.py @@ -23,18 +23,22 @@ For example: - cli.py --issue_url=... similar_issue Supported commands: --review / review_pr - Add a review that includes a summary of the PR and specific suggestions for improvement. +- review / review_pr - Add a review that includes a summary of the PR and specific suggestions for improvement. --ask / ask_question [question] - Ask a question about the PR. +- ask / ask_question [question] - Ask a question about the PR. --describe / describe_pr - Modify the PR title and description based on the PR's contents. +- describe / describe_pr - Modify the PR title and description based on the PR's contents. --improve / improve_code - Suggest improvements to the code in the PR as pull request comments ready to commit. +- improve / improve_code - Suggest improvements to the code in the PR as pull request comments ready to commit. Extended mode ('improve --extended') employs several calls, and provides a more thorough feedback --reflect - Ask the PR author questions about the PR. +- reflect - Ask the PR author questions about the PR. --update_changelog - Update the changelog based on the PR's contents. +- update_changelog - Update the changelog based on the PR's contents. + +- add_docs + +- generate_labels Configuration: diff --git a/pr_agent/git_providers/__init__.py b/pr_agent/git_providers/__init__.py index 968f0dfc..14103a95 100644 --- a/pr_agent/git_providers/__init__.py +++ b/pr_agent/git_providers/__init__.py @@ -1,5 +1,6 @@ from pr_agent.config_loader import get_settings from pr_agent.git_providers.bitbucket_provider import BitbucketProvider +from pr_agent.git_providers.bitbucket_server_provider import BitbucketServerProvider from pr_agent.git_providers.codecommit_provider import CodeCommitProvider from pr_agent.git_providers.github_provider import GithubProvider from pr_agent.git_providers.gitlab_provider import GitLabProvider @@ -12,6 +13,7 @@ _GIT_PROVIDERS = { 'github': GithubProvider, 'gitlab': GitLabProvider, 'bitbucket': BitbucketProvider, + 'bitbucket_server': BitbucketServerProvider, 'azure': AzureDevopsProvider, 'codecommit': CodeCommitProvider, 'local' : LocalGitProvider, diff --git a/pr_agent/git_providers/azuredevops_provider.py b/pr_agent/git_providers/azuredevops_provider.py index 6a404532..ca11b9d8 100644 --- a/pr_agent/git_providers/azuredevops_provider.py +++ b/pr_agent/git_providers/azuredevops_provider.py @@ -14,9 +14,8 @@ try: except ImportError: AZURE_DEVOPS_AVAILABLE = False -from ..algo.pr_processing import clip_tokens from ..config_loader import get_settings -from ..algo.utils import load_large_diff +from ..algo.utils import load_large_diff, clip_tokens from ..algo.language_handler import is_valid_file from .git_provider import EDIT_TYPE, FilePatchInfo diff --git a/pr_agent/git_providers/bitbucket_provider.py b/pr_agent/git_providers/bitbucket_provider.py index e2431645..d13a708c 100644 --- a/pr_agent/git_providers/bitbucket_provider.py +++ b/pr_agent/git_providers/bitbucket_provider.py @@ -228,6 +228,10 @@ class BitbucketProvider(GitProvider): ) return response + def get_line_link(self, relevant_file: str, relevant_line_start: int, relevant_line_end: int = None) -> str: + link = f"{self.pr_url}/#L{relevant_file}T{relevant_line_start}" + return link + def generate_link_to_relevant_line_number(self, suggestion) -> str: try: relevant_file = suggestion['relevant file'].strip('`').strip("'") diff --git a/pr_agent/git_providers/bitbucket_server_provider.py b/pr_agent/git_providers/bitbucket_server_provider.py new file mode 100644 index 00000000..44347850 --- /dev/null +++ b/pr_agent/git_providers/bitbucket_server_provider.py @@ -0,0 +1,351 @@ +import json +from typing import Optional, Tuple +from urllib.parse import urlparse + +import requests +from atlassian.bitbucket import Bitbucket +from starlette_context import context + +from .git_provider import FilePatchInfo, GitProvider, EDIT_TYPE +from ..algo.pr_processing import find_line_number_of_relevant_line_in_file +from ..algo.utils import load_large_diff +from ..config_loader import get_settings +from ..log import get_logger + + +class BitbucketServerProvider(GitProvider): + def __init__( + self, pr_url: Optional[str] = None, incremental: Optional[bool] = False + ): + s = requests.Session() + try: + bearer = context.get("bitbucket_bearer_token", None) + s.headers["Authorization"] = f"Bearer {bearer}" + except Exception: + s.headers[ + "Authorization" + ] = f'Bearer {get_settings().get("BITBUCKET_SERVER.BEARER_TOKEN", None)}' + + s.headers["Content-Type"] = "application/json" + self.headers = s.headers + self.bitbucket_server_url = None + self.workspace_slug = None + self.repo_slug = None + self.repo = None + self.pr_num = None + self.pr = None + self.pr_url = pr_url + self.temp_comments = [] + self.incremental = incremental + self.diff_files = None + self.bitbucket_pull_request_api_url = pr_url + + self.bitbucket_server_url = self._parse_bitbucket_server(url=pr_url) + self.bitbucket_client = Bitbucket(url=self.bitbucket_server_url, + token=get_settings().get("BITBUCKET_SERVER.BEARER_TOKEN", None)) + + if pr_url: + self.set_pr(pr_url) + + def get_repo_settings(self): + try: + url = (f"{self.bitbucket_server_url}/projects/{self.workspace_slug}/repos/{self.repo_slug}/src/" + f"{self.pr.destination_branch}/.pr_agent.toml") + response = requests.request("GET", url, headers=self.headers) + if response.status_code == 404: # not found + return "" + contents = response.text.encode('utf-8') + return contents + except Exception: + return "" + + def publish_code_suggestions(self, code_suggestions: list) -> bool: + """ + Publishes code suggestions as comments on the PR. + """ + post_parameters_list = [] + for suggestion in code_suggestions: + body = suggestion["body"] + relevant_file = suggestion["relevant_file"] + relevant_lines_start = suggestion["relevant_lines_start"] + relevant_lines_end = suggestion["relevant_lines_end"] + + if not relevant_lines_start or relevant_lines_start == -1: + if get_settings().config.verbosity_level >= 2: + get_logger().exception( + f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}" + ) + continue + + if relevant_lines_end < relevant_lines_start: + if get_settings().config.verbosity_level >= 2: + get_logger().exception( + f"Failed to publish code suggestion, " + f"relevant_lines_end is {relevant_lines_end} and " + f"relevant_lines_start is {relevant_lines_start}" + ) + continue + + if relevant_lines_end > relevant_lines_start: + post_parameters = { + "body": body, + "path": relevant_file, + "line": relevant_lines_end, + "start_line": relevant_lines_start, + "start_side": "RIGHT", + } + else: # API is different for single line comments + post_parameters = { + "body": body, + "path": relevant_file, + "line": relevant_lines_start, + "side": "RIGHT", + } + post_parameters_list.append(post_parameters) + + try: + self.publish_inline_comments(post_parameters_list) + return True + except Exception as e: + if get_settings().config.verbosity_level >= 2: + get_logger().error(f"Failed to publish code suggestion, error: {e}") + return False + + def is_supported(self, capability: str) -> bool: + if capability in ['get_issue_comments', 'get_labels', 'gfm_markdown']: + return False + return True + + def set_pr(self, pr_url: str): + self.workspace_slug, self.repo_slug, self.pr_num = self._parse_pr_url(pr_url) + self.pr = self._get_pr() + + def get_file(self, path: str, commit_id: str): + file_content = "" + try: + file_content = self.bitbucket_client.get_content_of_file(self.workspace_slug, + self.repo_slug, + path, + commit_id) + except requests.HTTPError as e: + get_logger().debug(f"File {path} not found at commit id: {commit_id}") + return file_content + + def get_files(self): + changes = self.bitbucket_client.get_pull_requests_changes(self.workspace_slug, self.repo_slug, self.pr_num) + diffstat = [change["path"]['toString'] for change in changes] + return diffstat + + def get_diff_files(self) -> list[FilePatchInfo]: + if self.diff_files: + return self.diff_files + + commits_in_pr = self.bitbucket_client.get_pull_requests_commits( + self.workspace_slug, + self.repo_slug, + self.pr_num + ) + + commit_list = list(commits_in_pr) + base_sha, head_sha = commit_list[0]['parents'][0]['id'], commit_list[-1]['id'] + + diff_files = [] + original_file_content_str = "" + new_file_content_str = "" + + changes = self.bitbucket_client.get_pull_requests_changes(self.workspace_slug, self.repo_slug, self.pr_num) + for change in changes: + file_path = change['path']['toString'] + match change['type']: + case 'ADD': + edit_type = EDIT_TYPE.ADDED + new_file_content_str = self.get_file(file_path, head_sha) + if isinstance(new_file_content_str, (bytes, bytearray)): + new_file_content_str = new_file_content_str.decode("utf-8") + original_file_content_str = "" + case 'DELETE': + edit_type = EDIT_TYPE.DELETED + new_file_content_str = "" + original_file_content_str = self.get_file(file_path, base_sha) + if isinstance(original_file_content_str, (bytes, bytearray)): + original_file_content_str = original_file_content_str.decode("utf-8") + case 'RENAME': + edit_type = EDIT_TYPE.RENAMED + case _: + edit_type = EDIT_TYPE.MODIFIED + original_file_content_str = self.get_file(file_path, base_sha) + if isinstance(original_file_content_str, (bytes, bytearray)): + original_file_content_str = original_file_content_str.decode("utf-8") + new_file_content_str = self.get_file(file_path, head_sha) + if isinstance(new_file_content_str, (bytes, bytearray)): + new_file_content_str = new_file_content_str.decode("utf-8") + + patch = load_large_diff(file_path, new_file_content_str, original_file_content_str) + + diff_files.append( + FilePatchInfo( + original_file_content_str, + new_file_content_str, + patch, + file_path, + edit_type=edit_type, + ) + ) + + self.diff_files = diff_files + return diff_files + + def publish_comment(self, pr_comment: str, is_temporary: bool = False): + if not is_temporary: + self.bitbucket_client.add_pull_request_comment(self.workspace_slug, self.repo_slug, self.pr_num, pr_comment) + + def remove_initial_comment(self): + try: + for comment in self.temp_comments: + self.remove_comment(comment) + except ValueError as e: + get_logger().exception(f"Failed to remove temp comments, error: {e}") + + def remove_comment(self, comment): + pass + + # funtion to create_inline_comment + def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str): + position, absolute_position = find_line_number_of_relevant_line_in_file( + self.get_diff_files(), + relevant_file.strip('`'), + relevant_line_in_file + ) + if position == -1: + if get_settings().config.verbosity_level >= 2: + get_logger().info(f"Could not find position for {relevant_file} {relevant_line_in_file}") + subject_type = "FILE" + else: + subject_type = "LINE" + path = relevant_file.strip() + return dict(body=body, path=path, position=absolute_position) if subject_type == "LINE" else {} + + def publish_inline_comment(self, comment: str, from_line: int, file: str): + payload = { + "text": comment, + "severity": "NORMAL", + "anchor": { + "diffType": "EFFECTIVE", + "path": file, + "lineType": "ADDED", + "line": from_line, + "fileType": "TO" + } + } + + response = requests.post(url=self._get_pr_comments_url(), json=payload, headers=self.headers) + return response + + def generate_link_to_relevant_line_number(self, suggestion) -> str: + try: + relevant_file = suggestion['relevant file'].strip('`').strip("'") + relevant_line_str = suggestion['relevant line'] + if not relevant_line_str: + return "" + + diff_files = self.get_diff_files() + position, absolute_position = find_line_number_of_relevant_line_in_file \ + (diff_files, relevant_file, relevant_line_str) + + if absolute_position != -1 and self.pr_url: + link = f"{self.pr_url}/#L{relevant_file}T{absolute_position}" + return link + except Exception as e: + if get_settings().config.verbosity_level >= 2: + get_logger().info(f"Failed adding line link, error: {e}") + + return "" + + def publish_inline_comments(self, comments: list[dict]): + for comment in comments: + self.publish_inline_comment(comment['body'], comment['position'], comment['path']) + + def get_title(self): + return self.pr.title + + def get_languages(self): + return {"yaml": 0} # devops LOL + + def get_pr_branch(self): + return self.pr.fromRef['displayId'] + + def get_pr_description_full(self): + return self.pr.description + + def get_user_id(self): + return 0 + + def get_issue_comments(self): + raise NotImplementedError( + "Bitbucket provider does not support issue comments yet" + ) + + def add_eyes_reaction(self, issue_comment_id: int) -> Optional[int]: + return True + + def remove_reaction(self, issue_comment_id: int, reaction_id: int) -> bool: + return True + + @staticmethod + def _parse_bitbucket_server(url: str) -> str: + parsed_url = urlparse(url) + return f"{parsed_url.scheme}://{parsed_url.netloc}" + + @staticmethod + def _parse_pr_url(pr_url: str) -> Tuple[str, str, int]: + parsed_url = urlparse(pr_url) + path_parts = parsed_url.path.strip("/").split("/") + if len(path_parts) < 6 or path_parts[4] != "pull-requests": + raise ValueError( + "The provided URL does not appear to be a Bitbucket PR URL" + ) + + workspace_slug = path_parts[1] + repo_slug = path_parts[3] + try: + pr_number = int(path_parts[5]) + except ValueError as e: + raise ValueError("Unable to convert PR number to integer") from e + + return workspace_slug, repo_slug, pr_number + + def _get_repo(self): + if self.repo is None: + self.repo = self.bitbucket_client.get_repo(self.workspace_slug, self.repo_slug) + return self.repo + + def _get_pr(self): + pr = self.bitbucket_client.get_pull_request(self.workspace_slug, self.repo_slug, pull_request_id=self.pr_num) + return type('new_dict', (object,), pr) + + def _get_pr_file_content(self, remote_link: str): + return "" + + def get_commit_messages(self): + def get_commit_messages(self): + raise NotImplementedError("Get commit messages function not implemented yet.") + # bitbucket does not support labels + def publish_description(self, pr_title: str, description: str): + payload = json.dumps({ + "description": description, + "title": pr_title + }) + + response = requests.put(url=self.bitbucket_pull_request_api_url, headers=self.headers, data=payload) + return response + + # bitbucket does not support labels + def publish_labels(self, pr_types: list): + pass + + # bitbucket does not support labels + def get_labels(self): + pass + + def _get_pr_comments_url(self): + return f"{self.bitbucket_server_url}/rest/api/latest/projects/{self.workspace_slug}/repos/{self.repo_slug}/pull-requests/{self.pr_num}/comments" diff --git a/pr_agent/git_providers/codecommit_provider.py b/pr_agent/git_providers/codecommit_provider.py index a4836849..399f0a94 100644 --- a/pr_agent/git_providers/codecommit_provider.py +++ b/pr_agent/git_providers/codecommit_provider.py @@ -6,9 +6,9 @@ from urllib.parse import urlparse from pr_agent.git_providers.codecommit_client import CodeCommitClient -from ..algo.language_handler import is_valid_file, language_extension_map from ..algo.utils import load_large_diff from .git_provider import EDIT_TYPE, FilePatchInfo, GitProvider +from ..config_loader import get_settings from ..log import get_logger @@ -269,6 +269,8 @@ class CodeCommitProvider(GitProvider): # where each dictionary item is a language name. # We build that language->extension dictionary here in main_extensions_flat. main_extensions_flat = {} + language_extension_map_org = get_settings().language_extension_map_org + language_extension_map = {k.lower(): v for k, v in language_extension_map_org.items()} for language, extensions in language_extension_map.items(): for ext in extensions: main_extensions_flat[ext] = language diff --git a/pr_agent/git_providers/git_provider.py b/pr_agent/git_providers/git_provider.py index 05122f9c..3ad1b2e0 100644 --- a/pr_agent/git_providers/git_provider.py +++ b/pr_agent/git_providers/git_provider.py @@ -5,6 +5,7 @@ from dataclasses import dataclass from enum import Enum from typing import Optional +from pr_agent.config_loader import get_settings from pr_agent.log import get_logger @@ -62,7 +63,7 @@ class GitProvider(ABC): def get_pr_description(self, *, full: bool = True) -> str: from pr_agent.config_loader import get_settings - from pr_agent.algo.pr_processing import clip_tokens + from pr_agent.algo.utils import clip_tokens max_tokens_description = get_settings().get("CONFIG.MAX_DESCRIPTION_TOKENS", None) description = self.get_pr_description_full() if full else self.get_user_description() if max_tokens_description: @@ -88,6 +89,9 @@ class GitProvider(ABC): def get_pr_id(self): return "" + def get_line_link(self, relevant_file: str, relevant_line_start: int, relevant_line_end: int = None) -> str: + return "" + #### comments operations #### @abstractmethod def publish_comment(self, pr_comment: str, is_temporary: bool = False): @@ -173,26 +177,42 @@ def get_main_pr_language(languages, files) -> str: extension_list.append(file.filename.rsplit('.')[-1]) # get the most common extension - most_common_extension = max(set(extension_list), key=extension_list.count) + most_common_extension = '.' + max(set(extension_list), key=extension_list.count) + try: + language_extension_map_org = get_settings().language_extension_map_org + language_extension_map = {k.lower(): v for k, v in language_extension_map_org.items()} - # look for a match. TBD: add more languages, do this systematically - if most_common_extension == 'py' and top_language == 'python' or \ - most_common_extension == 'js' and top_language == 'javascript' or \ - most_common_extension == 'ts' and top_language == 'typescript' or \ - most_common_extension == 'go' and top_language == 'go' or \ - most_common_extension == 'java' and top_language == 'java' or \ - most_common_extension == 'c' and top_language == 'c' or \ - most_common_extension == 'cpp' and top_language == 'c++' or \ - most_common_extension == 'cs' and top_language == 'c#' or \ - most_common_extension == 'swift' and top_language == 'swift' or \ - most_common_extension == 'php' and top_language == 'php' or \ - most_common_extension == 'rb' and top_language == 'ruby' or \ - most_common_extension == 'rs' and top_language == 'rust' or \ - most_common_extension == 'scala' and top_language == 'scala' or \ - most_common_extension == 'kt' and top_language == 'kotlin' or \ - most_common_extension == 'pl' and top_language == 'perl' or \ - most_common_extension == top_language: - main_language_str = top_language + if top_language in language_extension_map and most_common_extension in language_extension_map[top_language]: + main_language_str = top_language + else: + for language, extensions in language_extension_map.items(): + if most_common_extension in extensions: + main_language_str = language + break + except Exception as e: + get_logger().exception(f"Failed to get main language: {e}") + pass + + ## old approach: + # most_common_extension = max(set(extension_list), key=extension_list.count) + # if most_common_extension == 'py' and top_language == 'python' or \ + # most_common_extension == 'js' and top_language == 'javascript' or \ + # most_common_extension == 'ts' and top_language == 'typescript' or \ + # most_common_extension == 'tsx' and top_language == 'typescript' or \ + # most_common_extension == 'go' and top_language == 'go' or \ + # most_common_extension == 'java' and top_language == 'java' or \ + # most_common_extension == 'c' and top_language == 'c' or \ + # most_common_extension == 'cpp' and top_language == 'c++' or \ + # most_common_extension == 'cs' and top_language == 'c#' or \ + # most_common_extension == 'swift' and top_language == 'swift' or \ + # most_common_extension == 'php' and top_language == 'php' or \ + # most_common_extension == 'rb' and top_language == 'ruby' or \ + # most_common_extension == 'rs' and top_language == 'rust' or \ + # most_common_extension == 'scala' and top_language == 'scala' or \ + # most_common_extension == 'kt' and top_language == 'kotlin' or \ + # most_common_extension == 'pl' and top_language == 'perl' or \ + # most_common_extension == top_language: + # main_language_str = top_language except Exception as e: get_logger().exception(e) diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 634b8694..1fb85164 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -8,8 +8,8 @@ from retry import retry from starlette_context import context from ..algo.language_handler import is_valid_file -from ..algo.pr_processing import clip_tokens, find_line_number_of_relevant_line_in_file -from ..algo.utils import load_large_diff +from ..algo.pr_processing import find_line_number_of_relevant_line_in_file +from ..algo.utils import load_large_diff, clip_tokens from ..config_loader import get_settings from ..log import get_logger from ..servers.utils import RateLimitExceeded @@ -501,6 +501,15 @@ class GithubProvider(GitProvider): return "" + def get_line_link(self, relevant_file: str, relevant_line_start: int, relevant_line_end: int = None) -> str: + sha_file = hashlib.sha256(relevant_file.encode('utf-8')).hexdigest() + if relevant_line_end: + link = f"https://github.com/{self.repo}/pull/{self.pr_num}/files#diff-{sha_file}R{relevant_line_start}-R{relevant_line_end}" + else: + link = f"https://github.com/{self.repo}/pull/{self.pr_num}/files#diff-{sha_file}R{relevant_line_start}" + return link + + def get_pr_id(self): try: pr_id = f"{self.repo}/{self.pr_num}" diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index 078ca9dd..5d110359 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -7,8 +7,8 @@ import gitlab from gitlab import GitlabGetError from ..algo.language_handler import is_valid_file -from ..algo.pr_processing import clip_tokens, find_line_number_of_relevant_line_in_file -from ..algo.utils import load_large_diff +from ..algo.pr_processing import find_line_number_of_relevant_line_in_file +from ..algo.utils import load_large_diff, clip_tokens from ..config_loader import get_settings from .git_provider import EDIT_TYPE, FilePatchInfo, GitProvider from ..log import get_logger @@ -43,7 +43,7 @@ class GitLabProvider(GitProvider): self.incremental = incremental def is_supported(self, capability: str) -> bool: - if capability in ['get_issue_comments', 'create_inline_comment', 'publish_inline_comments', 'gfm_markdown']: + if capability in ['get_issue_comments', 'create_inline_comment', 'publish_inline_comments']: # gfm_markdown is supported in gitlab ! return False return True @@ -422,6 +422,14 @@ class GitLabProvider(GitProvider): except: return "" + def get_line_link(self, relevant_file: str, relevant_line_start: int, relevant_line_end: int = None) -> str: + if relevant_line_end: + link = f"https://gitlab.com/codiumai/pr-agent/-/blob/{self.mr.source_branch}/{relevant_file}?ref_type=heads#L{relevant_line_start}-L{relevant_line_end}" + else: + link = f"https://gitlab.com/codiumai/pr-agent/-/blob/{self.mr.source_branch}/{relevant_file}?ref_type=heads#L{relevant_line_start}" + return link + + def generate_link_to_relevant_line_number(self, suggestion) -> str: try: relevant_file = suggestion['relevant file'].strip('`').strip("'") diff --git a/pr_agent/servers/bitbucket_server_webhook.py b/pr_agent/servers/bitbucket_server_webhook.py new file mode 100644 index 00000000..c6ce8353 --- /dev/null +++ b/pr_agent/servers/bitbucket_server_webhook.py @@ -0,0 +1,64 @@ +import json + +import uvicorn +from fastapi import APIRouter, FastAPI +from fastapi.encoders import jsonable_encoder +from starlette import status +from starlette.background import BackgroundTasks +from starlette.middleware import Middleware +from starlette.requests import Request +from starlette.responses import JSONResponse +from starlette_context.middleware import RawContextMiddleware + +from pr_agent.agent.pr_agent import PRAgent +from pr_agent.config_loader import get_settings +from pr_agent.log import get_logger + +router = APIRouter() + + +def handle_request(background_tasks: BackgroundTasks, url: str, body: str, log_context: dict): + log_context["action"] = body + log_context["event"] = "pull_request" if body == "review" else "comment" + log_context["api_url"] = url + with get_logger().contextualize(**log_context): + background_tasks.add_task(PRAgent().handle_request, url, body) + + +@router.post("/webhook") +async def handle_webhook(background_tasks: BackgroundTasks, request: Request): + log_context = {"server_type": "bitbucket_server"} + data = await request.json() + get_logger().info(json.dumps(data)) + + pr_id = data['pullRequest']['id'] + repository_name = data['pullRequest']['toRef']['repository']['slug'] + project_name = data['pullRequest']['toRef']['repository']['project']['key'] + bitbucket_server = get_settings().get("BITBUCKET_SERVER.URL") + pr_url = f"{bitbucket_server}/projects/{project_name}/repos/{repository_name}/pull-requests/{pr_id}" + + log_context["api_url"] = pr_url + log_context["event"] = "pull_request" + + handle_request(background_tasks, pr_url, "review", log_context) + return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"})) + + +@router.get("/") +async def root(): + return {"status": "ok"} + + +def start(): + bitbucket_server_url = get_settings().get("BITBUCKET_SERVER.URL", None) + if not bitbucket_server_url: + raise ValueError("BITBUCKET_SERVER.URL is not set") + get_settings().config.git_provider = "bitbucket_server" + middleware = [Middleware(RawContextMiddleware)] + app = FastAPI(middleware=middleware) + app.include_router(router) + uvicorn.run(app, host="0.0.0.0", port=3000) + + +if __name__ == '__main__': + start() diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 38e96fd1..51de6693 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -57,6 +57,7 @@ include_generated_by_header=true [pr_code_suggestions] # /improve # num_code_suggestions=4 +summarize = false extra_instructions = "" rank_suggestions = false # params for '/improve --extended' mode diff --git a/pr_agent/settings/custom_labels.toml b/pr_agent/settings/custom_labels.toml index 43e14b0e..9c751d0e 100644 --- a/pr_agent/settings/custom_labels.toml +++ b/pr_agent/settings/custom_labels.toml @@ -3,16 +3,16 @@ enable_custom_labels=false ## template for custom labels #[custom_labels."Bug fix"] -#description = "Fixes a bug in the code" +#description = """Fixes a bug in the code""" #[custom_labels."Tests"] -#description = "Adds or modifies tests" +#description = """Adds or modifies tests""" #[custom_labels."Bug fix with tests"] -#description = "Fixes a bug in the code and adds or modifies tests" +#description = """Fixes a bug in the code and adds or modifies tests""" #[custom_labels."Refactoring"] -#description = "Code refactoring without changing functionality" +#description = """Code refactoring without changing functionality""" #[custom_labels."Enhancement"] -#description = "Adds new features or functionality" +#description = """Adds new features or functionality""" #[custom_labels."Documentation"] -#description = "Adds or modifies documentation" +#description = """Adds or modifies documentation""" #[custom_labels."Other"] -#description = "Other changes that do not fit in any of the above categories" \ No newline at end of file +#description = """Other changes that do not fit in any of the above categories""" \ No newline at end of file diff --git a/pr_agent/settings/pr_add_docs.toml b/pr_agent/settings/pr_add_docs.toml index 31b7195c..fbf4b475 100644 --- a/pr_agent/settings/pr_add_docs.toml +++ b/pr_agent/settings/pr_add_docs.toml @@ -1,6 +1,6 @@ [pr_add_docs_prompt] system="""You are a language model called PR-Code-Documentation Agent, that specializes in generating documentation for code. -Your task is to generate meaningfull {{ docs_for_language }} to a PR (the '+' lines). +Your task is to generate meaningfull {{ docs_for_language }} to a PR (lines starting with '+'). Example for a PR Diff input: ' @@ -103,7 +103,7 @@ Description: '{{description}}' {%- if language %} -Main language: {{language}} +Main PR language: '{{language}}' {%- endif %} diff --git a/pr_agent/settings/pr_code_suggestions_prompts.toml b/pr_agent/settings/pr_code_suggestions_prompts.toml index 42ec7441..4b752272 100644 --- a/pr_agent/settings/pr_code_suggestions_prompts.toml +++ b/pr_agent/settings/pr_code_suggestions_prompts.toml @@ -1,6 +1,6 @@ [pr_code_suggestions_prompt] -system="""You are a language model called PR-Code-Reviewer, that specializes in suggesting code improvements for Pull Request (PR). -Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR (the '+' lines in the diff). +system="""You are PR-Reviewer, a language model that specializes in suggesting code improvements for a Pull Request (PR). +Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR diff (lines starting with '+'). Example for a PR Diff input: ' @@ -120,7 +120,7 @@ Description: '{{description}}' {%- if language %} -Main language: {{language}} +Main PR language: '{{ language }}' {%- endif %} diff --git a/pr_agent/settings/pr_custom_labels.toml b/pr_agent/settings/pr_custom_labels.toml index 1dbb6f8d..ddcc8cb0 100644 --- a/pr_agent/settings/pr_custom_labels.toml +++ b/pr_agent/settings/pr_custom_labels.toml @@ -1,8 +1,10 @@ [pr_custom_labels_prompt] -system="""You are CodiumAI-PR-Reviewer, a language model designed to review git pull requests. -Your task is to label the type of the PR content. -- Make sure not to focus the new PR code (the '+' lines). -- If needed, each YAML output should be in block scalar format ('|-') +system="""You are PR-Reviewer, a language model designed to review a git Pull Request (PR). +Your task is to provide labels that describe the PR content. +{%- if enable_custom_labels %} +Thoroughly read the labels name and the provided description, and decide whether the label is relevant to the PR. +{%- endif %} + {%- if extra_instructions %} Extra instructions from the user: @@ -11,52 +13,56 @@ Extra instructions from the user: ' {% endif %} -You must use the following YAML schema to format your answer: -```yaml -PR Type: - type: array + +The output must be a YAML object equivalent to type $Labels, according to the following Pydantic definitions: +' {%- if enable_custom_labels %} - description: Labels that are applicable to the Pull Request. Don't output the description in the parentheses. If none of the labels is relevant to the PR, output an empty array. -{%- endif %} - items: - type: string - enum: -{%- if enable_custom_labels %} -{{ custom_labels }} + +{{ custom_labels_class }} + {%- else %} - - Bug fix - - Tests - - Refactoring - - Enhancement - - Documentation - - Other +class Label(str, Enum): + bug_fix = "Bug fix" + tests = "Tests" + refactoring = "Refactoring" + enhancement = "Enhancement" + documentation = "Documentation" + other = "Other" {%- endif %} +class Labels(BaseModel): + labels: List[Label] = Field(min_items=0, description="custom labels that describe the PR. Return the label value, not the name.") +' + + Example output: ```yaml -PR Type: -{%- if enable_custom_labels %} -{{ custom_labels_examples }} -{%- else %} - - Bug fix -{%- endif %} +labels: +- ... +- ... ``` -Make sure to output a valid YAML. Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields. +Answer should be a valid YAML, and nothing else. """ user="""PR Info: + Previous title: '{{title}}' -Previous description: '{{description}}' -Branch: '{{branch}}' + +Branch: '{{ branch }}' + +Description: '{{ description }}' + {%- if language %} -Main language: {{language}} +Main PR language: '{{ language }}' {%- endif %} {%- if commit_messages_str %} Commit messages: -{{commit_messages_str}} +' +{{ commit_messages_str }} +' {%- endif %} diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml index 761c36c1..9aefe0da 100644 --- a/pr_agent/settings/pr_description_prompts.toml +++ b/pr_agent/settings/pr_description_prompts.toml @@ -1,9 +1,9 @@ [pr_description_prompt] -system="""You are CodiumAI-PR-Reviewer, a language model designed to review git pull requests. -Your task is to provide full description of a Pull Request (PR) content. -- Make sure to focus on the new PR code (the '+' lines). -- Notice that the 'Previous title', 'Previous description' and 'Commit messages' sections may be partial, simplistic, non-informative or not up-to-date. Hence, compare them to the PR diff code, and use them only as a reference. -- Emphasize first the most important changes, and then the less important ones. +system="""You are PR-Reviewer, a language model designed to review a git Pull Request (PR). +Your task is to provide a full description for the PR content. +- Make sure to focus on the new PR code (lines starting with '+'). +- Keep in mind that the 'Previous title', 'Previous description' and 'Commit messages' sections may be partial, simplistic, non-informative or out of date. Hence, compare them to the PR diff code, and use them only as a reference. +- Prioritize the most significant PR changes first, followed by the minor ones. - If needed, each YAML output should be in block scalar format ('|-') {%- if extra_instructions %} @@ -13,81 +13,83 @@ Extra instructions from the user: ' {% endif %} -You must use the following YAML schema to format your answer: -```yaml -PR Title: - type: string - description: an informative title for the PR, describing its main theme -PR Type: - type: string - enum: - - Bug fix - - Tests - - Refactoring - - Enhancement - - Documentation - - Other + +The output must be a YAML object equivalent to type $PRDescription, according to the following Pydantic definitions: +' +class PRType(str, Enum): + bug_fix = "Bug fix" + tests = "Tests" + refactoring = "Refactoring" + enhancement = "Enhancement" + documentation = "Documentation" + other = "Other" + {%- if enable_custom_labels %} -PR Labels: - type: array - description: Labels that are applicable to the Pull Request. Don't output the description in the parentheses. If none of the labels is relevant to the PR, output an empty array. - items: - type: string - enum: -{{ custom_labels }} + +{{ custom_labels_class }} + {%- endif %} -PR Description: - type: string - description: an informative and concise description of the PR. - {%- if use_bullet_points %} Use bullet points. {% endif %} -PR Main Files Walkthrough: - type: array - maxItems: 10 - description: |- - a walkthrough of the PR changes. Review main files, and shortly describe the changes in each file (up to 10 most important files). - items: - filename: - type: string - description: the relevant file full path - changes in file: - type: string - description: minimal and concise description of the changes in the relevant file -``` + +class FileWalkthrough(BaseModel): + filename: str = Field(description="the relevant file full path") + changes_in_file: str = Field(description="minimal and concise description of the changes in the relevant file") + +Class PRDescription(BaseModel): + title: str = Field(description="an informative title for the PR, describing its main theme") + type: List[PRType] = Field(description="one or more types that describe the PR type. . Return the label value, not the name.") + description: str = Field(description="an informative and concise description of the PR. {%- if use_bullet_points %} Use bullet points. {% endif %}") +{%- if enable_custom_labels %} + labels: List[Label] = Field(min_items=0, description="custom labels that describe the PR. Return the label value, not the name.") +{%- endif %} + main_files_walkthrough: List[FileWalkthrough] = Field(max_items=10) +' Example output: ```yaml -PR Title: |- - ... -PR Type: +title: |- ... +type: +- ... +- ... {%- if enable_custom_labels %} -PR Labels: +labels: - ... - ... {%- endif %} -PR Description: |- +description: |- ... -PR Main Files Walkthrough: - - ... - - ... +main_files_walkthrough: +- ... +- ... ``` -Make sure to output a valid YAML. Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields. +Answer should be a valid YAML, and nothing else. Each YAML output MUST be after a newline, with proper indent, and block scalar indicator ('|-') """ user="""PR Info: + Previous title: '{{title}}' -Previous description: '{{description}}' + +{%- if description %} + +Previous description: +' +{{ description }} +' +{%- endif %} + Branch: '{{branch}}' {%- if language %} -Main language: {{language}} +Main PR language: '{{ language }}' {%- endif %} {%- if commit_messages_str %} Commit messages: -{{commit_messages_str}} +' +{{ commit_messages_str }} +' {%- endif %} @@ -95,6 +97,8 @@ The PR Git Diff: ``` {{diff}} ``` + + Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines. Response (should be a valid YAML, and nothing else): diff --git a/pr_agent/settings/pr_information_from_user_prompts.toml b/pr_agent/settings/pr_information_from_user_prompts.toml index 8d628f7a..ca4cbe3c 100644 --- a/pr_agent/settings/pr_information_from_user_prompts.toml +++ b/pr_agent/settings/pr_information_from_user_prompts.toml @@ -1,5 +1,5 @@ [pr_information_from_user_prompt] -system="""You are CodiumAI-PR-Reviewer, a language model designed to review git pull requests. +system="""You are PR-Reviewer, a language model designed to review a git Pull Request (PR). Given the PR Info and the PR Git Diff, generate 3 short questions about the PR code for the PR author. The goal of the questions is to help the language model understand the PR better, so the questions should be insightful, informative, non-trivial, and relevant to the PR. You should prefer asking yes\\no questions, or multiple choice questions. Also add at least one open-ended question, but make sure they are not too difficult, and can be answered in a sentence or two. @@ -16,15 +16,21 @@ Questions to better understand the PR: user="""PR Info: Title: '{{title}}' + Branch: '{{branch}}' + Description: '{{description}}' + {%- if language %} -Main language: {{language}} + +Main PR language: '{{ language }}' {%- endif %} {%- if commit_messages_str %} Commit messages: +' {{commit_messages_str}} +' {%- endif %} diff --git a/pr_agent/settings/pr_questions_prompts.toml b/pr_agent/settings/pr_questions_prompts.toml index e306684d..63569197 100644 --- a/pr_agent/settings/pr_questions_prompts.toml +++ b/pr_agent/settings/pr_questions_prompts.toml @@ -1,22 +1,29 @@ [pr_questions_prompt] -system="""You are CodiumAI-PR-Reviewer, a language model designed to review git pull requests. -Your task is to answer questions about the new PR code (the '+' lines), and provide feedback. +system="""You are PR-Reviewer, a language model designed to review a git Pull Request (PR). +Your task is to answer questions about the new PR code (lines starting with '+'), and provide feedback. Be informative, constructive, and give examples. Try to be as specific as possible. Don't avoid answering the questions. You must answer the questions, as best as you can, without adding unrelated content. Make sure not to repeat modifications already implemented in the new PR code (the '+' lines). """ user="""PR Info: + Title: '{{title}}' + Branch: '{{branch}}' + Description: '{{description}}' + {%- if language %} -Main language: {{language}} + +Main PR language: '{{ language }}' {%- endif %} {%- if commit_messages_str %} Commit messages: -{{commit_messages_str}} +' +{{ commit_messages_str }} +' {%- endif %} diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index b75c296a..b3e8f9b4 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -1,6 +1,7 @@ [pr_review_prompt] -system="""You are PR-Reviewer, a language model designed to review git pull requests. +system="""You are PR-Reviewer, a language model designed to review a git Pull Request (PR). Your task is to provide constructive and concise feedback for the PR, and also provide meaningful code suggestions. +The review should focus on new code added in the PR diff (lines starting with '+') Example PR Diff input: ' @@ -22,14 +23,14 @@ code line that already existed in the file.... ... ' -The review should focus on new code added in the PR (lines starting with '+'), and not on code that already existed in the file (lines starting with '-', or without prefix). - {%- if num_code_suggestions > 0 %} + +Code suggestions guidelines: - Provide up to {{ num_code_suggestions }} code suggestions. Try to provide diverse and insightful suggestions. - Focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningful code improvements, like performance, vulnerability, modularity, and best practices. - Avoid making suggestions that have already been implemented in the PR code. For example, if you want to add logs, or change a variable to const, or anything else, make sure it isn't already in the PR code. - Don't suggest to add docstring, type hints, or comments. -- Suggestions should focus on improving the new code added in the PR (lines starting with '+') +- Suggestions should focus on the new code added in the PR diff (lines starting with '+') {%- endif %} {%- if extra_instructions %} @@ -179,16 +180,29 @@ Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'desc """ user="""PR Info: + Title: '{{title}}' + Branch: '{{branch}}' -Description: '{{description}}' + +{%- if description %} + +Description: +' +{{description}} +' +{%- endif %} + {%- if language %} -Main language: {{language}} + +Main PR language: '{{ language }}' {%- endif %} {%- if commit_messages_str %} Commit messages: +' {{commit_messages_str}} +' {%- endif %} {%- if question_str %} @@ -208,7 +222,7 @@ The PR Git Diff: ``` {{diff}} ``` -Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions. Focus on the '+' lines. + Response (should be a valid YAML, and nothing else): ```yaml diff --git a/pr_agent/settings/pr_sort_code_suggestions_prompts.toml b/pr_agent/settings/pr_sort_code_suggestions_prompts.toml index 16b6e861..f4a3f5bf 100644 --- a/pr_agent/settings/pr_sort_code_suggestions_prompts.toml +++ b/pr_agent/settings/pr_sort_code_suggestions_prompts.toml @@ -2,10 +2,10 @@ system=""" """ -user="""You are given a list of code suggestions to improve a PR: - +user="""You are given a list of code suggestions to improve a git Pull Request (PR): +' {{ suggestion_str|trim }} - +' Your task is to sort the code suggestions by their order of importance, and return a list with sorting order. The sorting order is a list of pairs, where each pair contains the index of the suggestion in the original list. diff --git a/pr_agent/settings/pr_update_changelog_prompts.toml b/pr_agent/settings/pr_update_changelog_prompts.toml index e9133e34..9d00f251 100644 --- a/pr_agent/settings/pr_update_changelog_prompts.toml +++ b/pr_agent/settings/pr_update_changelog_prompts.toml @@ -15,16 +15,23 @@ Extra instructions from the user: """ user="""PR Info: + Title: '{{title}}' + Branch: '{{branch}}' + Description: '{{description}}' + {%- if language %} -Main language: {{language}} + +Main PR language: '{{ language }}' {%- endif %} {%- if commit_messages_str %} Commit messages: -{{commit_messages_str}} +' +{{ commit_messages_str }} +' {%- endif %} diff --git a/pr_agent/tools/pr_code_suggestions.py b/pr_agent/tools/pr_code_suggestions.py index 9e8d7f15..a65659d3 100644 --- a/pr_agent/tools/pr_code_suggestions.py +++ b/pr_agent/tools/pr_code_suggestions.py @@ -1,7 +1,6 @@ import copy import textwrap from typing import Dict, List - from jinja2 import Environment, StrictUndefined from pr_agent.algo.ai_handler import AiHandler @@ -55,9 +54,9 @@ class PRCodeSuggestions: try: get_logger().info('Generating code suggestions for PR...') if get_settings().config.publish_output: - self.git_provider.publish_comment("Preparing review...", is_temporary=True) + self.git_provider.publish_comment("Preparing suggestions...", is_temporary=True) - get_logger().info('Preparing PR review...') + get_logger().info('Preparing PR code suggestions...') if not self.is_extended: await retry_with_fallback_models(self._prepare_prediction) data = self._prepare_pr_code_suggestions() @@ -73,10 +72,14 @@ class PRCodeSuggestions: data['Code suggestions'] = await self.rank_suggestions(data['Code suggestions']) if get_settings().config.publish_output: - get_logger().info('Pushing PR review...') + get_logger().info('Pushing PR code suggestions...') self.git_provider.remove_initial_comment() - get_logger().info('Pushing inline code suggestions...') - self.push_inline_code_suggestions(data) + if get_settings().pr_code_suggestions.summarize: + get_logger().info('Pushing summarize code suggestions...') + self.publish_summarizes_suggestions(data) + else: + get_logger().info('Pushing inline code suggestions...') + self.push_inline_code_suggestions(data) except Exception as e: get_logger().error(f"Failed to generate code suggestions for PR, error: {e}") @@ -244,4 +247,27 @@ class PRCodeSuggestions: return data_sorted + def publish_summarizes_suggestions(self, data: Dict): + try: + data_markdown = "## PR Code Suggestions\n\n" + for s in data['Code suggestions']: + code_snippet_link = self.git_provider.get_line_link(s['relevant file'], s['relevant lines start'], + s['relevant lines end']) + data_markdown += f"\n๐Ÿ’ก Suggestion:\n\n**{s['suggestion content']}**\n\n" + if code_snippet_link: + data_markdown += f" File: [{s['relevant file']} ({s['relevant lines start']}-{s['relevant lines end']})]({code_snippet_link})\n\n" + else: + data_markdown += f"File: {s['relevant file']} ({s['relevant lines start']}-{s['relevant lines end']})\n\n" + if self.git_provider.is_supported("gfm_markdown"): + data_markdown += "
Example code:\n\n" + data_markdown += f"___\n\n" + data_markdown += f"Existing code:\n```{self.main_language}\n{s['existing code']}\n```\n" + data_markdown += f"Improved code:\n```{self.main_language}\n{s['improved code']}\n```\n" + if self.git_provider.is_supported("gfm_markdown"): + data_markdown += "
\n" + data_markdown += "\n___\n\n" + self.git_provider.publish_comment(data_markdown) + except Exception as e: + get_logger().info(f"Failed to publish summarized code suggestions, error: {e}") + diff --git a/pr_agent/tools/pr_description.py b/pr_agent/tools/pr_description.py index 47e3f03f..0e7244d3 100644 --- a/pr_agent/tools/pr_description.py +++ b/pr_agent/tools/pr_description.py @@ -44,8 +44,7 @@ class PRDescription: "extra_instructions": get_settings().pr_description.extra_instructions, "commit_messages_str": self.git_provider.get_commit_messages(), "enable_custom_labels": get_settings().config.enable_custom_labels, - "custom_labels": "", - "custom_labels_examples": "", + "custom_labels_class": "", # will be filled if necessary in 'set_custom_labels' function } self.user_description = self.git_provider.get_user_description() @@ -175,16 +174,16 @@ class PRDescription: pr_types = [] # If the 'PR Type' key is present in the dictionary, split its value by comma and assign it to 'pr_types' - if 'PR Labels' in self.data: - if type(self.data['PR Labels']) == list: - pr_types = self.data['PR Labels'] - elif type(self.data['PR Labels']) == str: - pr_types = self.data['PR Labels'].split(',') - elif 'PR Type' in self.data: - if type(self.data['PR Type']) == list: - pr_types = self.data['PR Type'] - elif type(self.data['PR Type']) == str: - pr_types = self.data['PR Type'].split(',') + if 'labels' in self.data: + if type(self.data['labels']) == list: + pr_types = self.data['labels'] + elif type(self.data['labels']) == str: + pr_types = self.data['labels'].split(',') + elif 'type' in self.data: + if type(self.data['type']) == list: + pr_types = self.data['type'] + elif type(self.data['type']) == str: + pr_types = self.data['type'].split(',') return pr_types def _prepare_pr_answer_with_markers(self) -> Tuple[str, str]: @@ -196,12 +195,12 @@ class PRDescription: else: ai_header = "" - ai_type = self.data.get('PR Type') + ai_type = self.data.get('type') if ai_type and not re.search(r'', body): pr_type = f"{ai_header}{ai_type}" body = body.replace('pr_agent:type', pr_type) - ai_summary = self.data.get('PR Description') + ai_summary = self.data.get('description') if ai_summary and not re.search(r'', body): summary = f"{ai_header}{ai_summary}" body = body.replace('pr_agent:summary', summary) @@ -231,16 +230,16 @@ class PRDescription: # Iterate over the dictionary items and append the key and value to 'markdown_text' in a markdown format markdown_text = "" # Don't display 'PR Labels' - if 'PR Labels' in self.data and self.git_provider.is_supported("get_labels"): - self.data.pop('PR Labels') + if 'labels' in self.data and self.git_provider.is_supported("get_labels"): + self.data.pop('labels') if not get_settings().pr_description.enable_pr_type: - self.data.pop('PR Type') + self.data.pop('type') for key, value in self.data.items(): markdown_text += f"## {key}\n\n" markdown_text += f"{value}\n\n" # Remove the 'PR Title' key from the dictionary - ai_title = self.data.pop('PR Title', self.vars["title"]) + ai_title = self.data.pop('title', self.vars["title"]) if get_settings().pr_description.keep_original_user_title: # Assign the original PR title to the 'title' variable title = self.vars["title"] @@ -259,7 +258,7 @@ class PRDescription: pr_body += "
files:\n\n" for file in value: filename = file['filename'].replace("'", "`") - description = file['changes in file'] + description = file['changes_in_file'] pr_body += f'- `{filename}`: {description}\n' if self.git_provider.is_supported("gfm_markdown"): pr_body +="
\n" diff --git a/pr_agent/tools/pr_generate_labels.py b/pr_agent/tools/pr_generate_labels.py index e413e96f..6ea322a4 100644 --- a/pr_agent/tools/pr_generate_labels.py +++ b/pr_agent/tools/pr_generate_labels.py @@ -43,9 +43,8 @@ class PRGenerateLabels: "use_bullet_points": get_settings().pr_description.use_bullet_points, "extra_instructions": get_settings().pr_description.extra_instructions, "commit_messages_str": self.git_provider.get_commit_messages(), - "custom_labels": "", - "custom_labels_examples": "", "enable_custom_labels": get_settings().config.enable_custom_labels, + "custom_labels_class": "", # will be filled if necessary in 'set_custom_labels' function } # Initialize the token handler @@ -148,6 +147,9 @@ class PRGenerateLabels: user=user_prompt ) + if get_settings().config.verbosity_level >= 2: + get_logger().info(f"\nAI response:\n{response}") + return response def _prepare_data(self): @@ -159,11 +161,11 @@ class PRGenerateLabels: def _prepare_labels(self) -> List[str]: pr_types = [] - # If the 'PR Type' key is present in the dictionary, split its value by comma and assign it to 'pr_types' - if 'PR Type' in self.data: - if type(self.data['PR Type']) == list: - pr_types = self.data['PR Type'] - elif type(self.data['PR Type']) == str: - pr_types = self.data['PR Type'].split(',') + # If the 'labels' key is present in the dictionary, split its value by comma and assign it to 'pr_types' + if 'labels' in self.data: + if type(self.data['labels']) == list: + pr_types = self.data['labels'] + elif type(self.data['labels']) == str: + pr_types = self.data['labels'].split(',') return pr_types diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index c3e35295..8e4be390 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -250,7 +250,7 @@ class PRReviewer: if not get_settings().get("CONFIG.CLI_MODE", False): markdown_text += "\n### How to use\n" if self.git_provider.is_supported("gfm_markdown"): - markdown_text += "\n**
Instructions**\n" + markdown_text += "\n
Instructions\n\n" bot_user = "[bot]" if get_settings().github_app.override_deployment_type else get_settings().github_app.bot_user if user and bot_user not in user: markdown_text += bot_help_text(user) diff --git a/tests/unittest/test_bitbucket_provider.py b/tests/unittest/test_bitbucket_provider.py index 3bb64a0c..e17a26ce 100644 --- a/tests/unittest/test_bitbucket_provider.py +++ b/tests/unittest/test_bitbucket_provider.py @@ -1,3 +1,4 @@ +from pr_agent.git_providers import BitbucketServerProvider from pr_agent.git_providers.bitbucket_provider import BitbucketProvider @@ -8,3 +9,10 @@ class TestBitbucketProvider: assert workspace_slug == "WORKSPACE_XYZ" assert repo_slug == "MY_TEST_REPO" assert pr_number == 321 + + def test_bitbucket_server_pr_url(self): + url = "https://git.onpreminstance.com/projects/AAA/repos/my-repo/pull-requests/1" + workspace_slug, repo_slug, pr_number = BitbucketServerProvider._parse_pr_url(url) + assert workspace_slug == "AAA" + assert repo_slug == "my-repo" + assert pr_number == 1 diff --git a/tests/unittest/test_clip_tokens.py b/tests/unittest/test_clip_tokens.py new file mode 100644 index 00000000..cc52ab7e --- /dev/null +++ b/tests/unittest/test_clip_tokens.py @@ -0,0 +1,19 @@ + +# Generated by CodiumAI + +import pytest + +from pr_agent.algo.utils import clip_tokens + + +class TestClipTokens: + def test_clip(self): + text = "line1\nline2\nline3\nline4\nline5\nline6" + max_tokens = 25 + result = clip_tokens(text, max_tokens) + assert result == text + + max_tokens = 10 + result = clip_tokens(text, max_tokens) + expected_results = 'line1\nline2\nline3\nli...(truncated)' + assert result == expected_results diff --git a/tests/unittest/test_convert_to_markdown.py b/tests/unittest/test_convert_to_markdown.py index bb6f2268..b03c4fde 100644 --- a/tests/unittest/test_convert_to_markdown.py +++ b/tests/unittest/test_convert_to_markdown.py @@ -71,7 +71,7 @@ class TestConvertToMarkdown: - ๐Ÿ“Œ **Type of PR:** Test type\n\ - ๐Ÿงช **Relevant tests added:** no\n\ - โœจ **Focused PR:** Yes\n\ -- **General PR suggestions:** general suggestion...\n\n\n- **
๐Ÿค– Code feedback:**\n\n - **Code example:**\n - **Before:**\n ```\n Code before\n ```\n - **After:**\n ```\n Code after\n ```\n\n - **Code example:**\n - **Before:**\n ```\n Code before 2\n ```\n - **After:**\n ```\n Code after 2\n ```\n\n
\ +- **General PR suggestions:** general suggestion...\n\n\n-
๐Ÿค– Code feedback:\n\n - **Code example:**\n - **Before:**\n ```\n Code before\n ```\n - **After:**\n ```\n Code after\n ```\n\n - **Code example:**\n - **Before:**\n ```\n Code before 2\n ```\n - **After:**\n ```\n Code after 2\n ```\n\n
\ """ assert convert_to_markdown(input_data).strip() == expected_output.strip() diff --git a/tests/unittest/test_parse_code_suggestion.py b/tests/unittest/test_parse_code_suggestion.py index aaa03f72..a7a5ecc2 100644 --- a/tests/unittest/test_parse_code_suggestion.py +++ b/tests/unittest/test_parse_code_suggestion.py @@ -61,7 +61,7 @@ class TestParseCodeSuggestion: 'before': 'Before 1', 'after': 'After 1' } - expected_output = " **suggestion:** Suggestion 1\n **description:** Description 1\n **before:** Before 1\n **after:** After 1\n\n" # noqa: E501 + expected_output = ' **suggestion:** Suggestion 1 \n **description:** Description 1 \n **before:** Before 1 \n **after:** After 1 \n\n' # noqa: E501 assert parse_code_suggestion(code_suggestions) == expected_output # Tests that function returns correct output when input dictionary has 'code example' key @@ -74,5 +74,5 @@ class TestParseCodeSuggestion: 'after': 'After 2' } } - expected_output = " **suggestion:** Suggestion 2\n **description:** Description 2\n - **code example:**\n - **before:**\n ```\n Before 2\n ```\n - **after:**\n ```\n After 2\n ```\n\n" # noqa: E501 + expected_output = ' **suggestion:** Suggestion 2 \n **description:** Description 2 \n - **code example:**\n - **before:**\n ```\n Before 2\n ```\n - **after:**\n ```\n After 2\n ```\n\n' # noqa: E501 assert parse_code_suggestion(code_suggestions) == expected_output