diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index ded1b52c..d5e1a3c6 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -343,7 +343,7 @@ def set_custom_labels(variables): labels = get_settings().custom_labels if not labels: # set default labels - labels = ['Bug fix', 'Tests', 'Bug fix with tests', 'Refactoring', 'Enhancement', 'Documentation', 'Other'] + labels = ['Bug fix', 'Tests', 'Bug fix with tests', 'Enhancement', 'Documentation', 'Other'] labels_list = "\n - ".join(labels) if labels else "" labels_list = f" - {labels_list}" if labels_list else "" variables["custom_labels"] = labels_list @@ -367,7 +367,7 @@ def get_user_labels(current_labels: List[str] = None): current_labels = [] user_labels = [] for label in current_labels: - if label.lower() in ['bug fix', 'tests', 'refactoring', 'enhancement', 'documentation', 'other']: + if label.lower() in ['bug fix', 'tests', 'enhancement', 'documentation', 'other']: continue if get_settings().config.enable_custom_labels: if label in get_settings().custom_labels: diff --git a/pr_agent/git_providers/bitbucket_provider.py b/pr_agent/git_providers/bitbucket_provider.py index 386577a2..ee8ad48f 100644 --- a/pr_agent/git_providers/bitbucket_provider.py +++ b/pr_agent/git_providers/bitbucket_provider.py @@ -229,7 +229,10 @@ class BitbucketProvider(GitProvider): return response def get_line_link(self, relevant_file: str, relevant_line_start: int, relevant_line_end: int = None) -> str: - link = f"{self.pr_url}/#L{relevant_file}T{relevant_line_start}" + if relevant_line_start == -1: + link = f"{self.pr_url}/#L{relevant_file}" + else: + link = f"{self.pr_url}/#L{relevant_file}T{relevant_line_start}" return link def generate_link_to_relevant_line_number(self, suggestion) -> str: diff --git a/pr_agent/git_providers/git_provider.py b/pr_agent/git_providers/git_provider.py index 3ad1b2e0..deb5df3d 100644 --- a/pr_agent/git_providers/git_provider.py +++ b/pr_agent/git_providers/git_provider.py @@ -26,6 +26,8 @@ class FilePatchInfo: tokens: int = -1 edit_type: EDIT_TYPE = EDIT_TYPE.UNKNOWN old_filename: str = None + num_plus_lines: int = -1 + num_minus_lines: int = -1 class GitProvider(ABC): diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index c001f81e..3ae97742 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -143,8 +143,15 @@ class GithubProvider(GitProvider): else: get_logger().error(f"Unknown edit type: {file.status}") edit_type = EDIT_TYPE.UNKNOWN + + # count number of lines added and removed + patch_lines = patch.splitlines(keepends=True) + num_plus_lines = len([line for line in patch_lines if line.startswith('+')]) + num_minus_lines = len([line for line in patch_lines if line.startswith('-')]) file_patch_canonical_structure = FilePatchInfo(original_file_content_str, new_file_content_str, patch, - file.filename, edit_type=edit_type) + file.filename, edit_type=edit_type, + num_plus_lines=num_plus_lines, + num_minus_lines=num_minus_lines,) diff_files.append(file_patch_canonical_structure) self.diff_files = diff_files @@ -442,7 +449,7 @@ class GithubProvider(GitProvider): def publish_labels(self, pr_types): try: label_color_map = {"Bug fix": "1d76db", "Tests": "e99695", "Bug fix with tests": "c5def5", - "Refactoring": "bfdadc", "Enhancement": "bfd4f2", "Documentation": "d4c5f9", + "Enhancement": "bfd4f2", "Documentation": "d4c5f9", "Other": "d1bcf9"} post_parameters = [] for p in pr_types: @@ -506,7 +513,9 @@ class GithubProvider(GitProvider): def get_line_link(self, relevant_file: str, relevant_line_start: int, relevant_line_end: int = None) -> str: sha_file = hashlib.sha256(relevant_file.encode('utf-8')).hexdigest() - if relevant_line_end: + if relevant_line_start == -1: + link = f"https://github.com/{self.repo}/pull/{self.pr_num}/files#diff-{sha_file}" + elif relevant_line_end: link = f"https://github.com/{self.repo}/pull/{self.pr_num}/files#diff-{sha_file}R{relevant_line_start}-R{relevant_line_end}" else: link = f"https://github.com/{self.repo}/pull/{self.pr_num}/files#diff-{sha_file}R{relevant_line_start}" diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index 3a593439..7b11ef54 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -115,12 +115,20 @@ class GitLabProvider(GitProvider): if not patch: patch = load_large_diff(filename, new_file_content_str, original_file_content_str) + + # count number of lines added and removed + patch_lines = patch.splitlines(keepends=True) + num_plus_lines = len([line for line in patch_lines if line.startswith('+')]) + num_minus_lines = len([line for line in patch_lines if line.startswith('-')]) diff_files.append( FilePatchInfo(original_file_content_str, new_file_content_str, patch=patch, filename=filename, edit_type=edit_type, - old_filename=None if diff['old_path'] == diff['new_path'] else diff['old_path'])) + old_filename=None if diff['old_path'] == diff['new_path'] else diff['old_path'], + num_plus_lines=num_plus_lines, + num_minus_lines=num_minus_lines, )) + self.diff_files = diff_files return diff_files @@ -424,7 +432,9 @@ class GitLabProvider(GitProvider): return "" def get_line_link(self, relevant_file: str, relevant_line_start: int, relevant_line_end: int = None) -> str: - if relevant_line_end: + if relevant_line_start == -1: + link = f"https://gitlab.com/codiumai/pr-agent/-/blob/{self.mr.source_branch}/{relevant_file}?ref_type=heads" + elif relevant_line_end: link = f"https://gitlab.com/codiumai/pr-agent/-/blob/{self.mr.source_branch}/{relevant_file}?ref_type=heads#L{relevant_line_start}-L{relevant_line_end}" else: link = f"https://gitlab.com/codiumai/pr-agent/-/blob/{self.mr.source_branch}/{relevant_file}?ref_type=heads#L{relevant_line_start}" diff --git a/pr_agent/servers/help.py b/pr_agent/servers/help.py index c32c5666..7c3bf287 100644 --- a/pr_agent/servers/help.py +++ b/pr_agent/servers/help.py @@ -1,19 +1,19 @@ -commands_text = "> **/review**: Request a review of your Pull Request.\n" \ - "> **/describe**: Update the PR title and description based on the contents of the PR.\n" \ - "> **/improve [--extended]**: Suggest code improvements. Extended mode provides a higher quality feedback.\n" \ - "> **/ask \\**: Ask a question about the PR.\n" \ - "> **/update_changelog**: Update the changelog based on the PR's contents.\n" \ - "> **/add_docs**: Generate docstring for new components introduced in the PR.\n" \ - "> **/generate_labels**: Generate labels for the PR based on the PR's contents.\n" \ +commands_text = "> **/review**: Request a review of your Pull Request. \n" \ + "> **/describe**: Update the PR title and description based on the contents of the PR. \n" \ + "> **/improve [--extended]**: Suggest code improvements. Extended mode provides a higher quality feedback. \n" \ + "> **/ask \\**: Ask a question about the PR. \n" \ + "> **/update_changelog**: Update the changelog based on the PR's contents. \n" \ + "> **/add_docs**: Generate docstring for new components introduced in the PR. \n" \ + "> **/generate_labels**: Generate labels for the PR based on the PR's contents. \n" \ "> see the [tools guide](https://github.com/Codium-ai/pr-agent/blob/main/docs/TOOLS_GUIDE.md) for more details.\n\n" \ - ">To edit any configuration parameter from the [configuration.toml](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml), add --config_path=new_value.\n" \ - ">For example: /review --pr_reviewer.extra_instructions=\"focus on the file: ...\" \n" \ - ">To list the possible configuration parameters, add a **/config** comment.\n" \ + ">To edit any configuration parameter from the [configuration.toml](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml), add --config_path=new_value. \n" \ + ">For example: /review --pr_reviewer.extra_instructions=\"focus on the file: ...\" \n" \ + ">To list the possible configuration parameters, add a **/config** comment. \n" \ def bot_help_text(user: str): - return f"> Tag me in a comment '@{user}' and add one of the following commands:\n" + commands_text + return f"> Tag me in a comment '@{user}' and add one of the following commands: \n" + commands_text -actions_help_text = "> To invoke the PR-Agent, add a comment using one of the following commands:\n" + \ +actions_help_text = "> To invoke the PR-Agent, add a comment using one of the following commands: \n" + \ commands_text diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 14f4f6ae..259383d7 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -46,13 +46,16 @@ keep_original_user_title=false use_bullet_points=true extra_instructions = "" enable_pr_type=true +enable_file_walkthrough=false +enable_semantic_files_types=true final_update_message = true + # markers use_description_markers=false include_generated_by_header=true -#custom_labels = ['Bug fix', 'Tests', 'Bug fix with tests', 'Refactoring', 'Enhancement', 'Documentation', 'Other'] +#custom_labels = ['Bug fix', 'Tests', 'Bug fix with tests', 'Enhancement', 'Documentation', 'Other'] [pr_questions] # /ask # diff --git a/pr_agent/settings/custom_labels.toml b/pr_agent/settings/custom_labels.toml index 9c751d0e..ee45fb19 100644 --- a/pr_agent/settings/custom_labels.toml +++ b/pr_agent/settings/custom_labels.toml @@ -8,10 +8,8 @@ enable_custom_labels=false #description = """Adds or modifies tests""" #[custom_labels."Bug fix with tests"] #description = """Fixes a bug in the code and adds or modifies tests""" -#[custom_labels."Refactoring"] -#description = """Code refactoring without changing functionality""" #[custom_labels."Enhancement"] -#description = """Adds new features or functionality""" +#description = """Adds new features or modifies existing ones""" #[custom_labels."Documentation"] #description = """Adds or modifies documentation""" #[custom_labels."Other"] diff --git a/pr_agent/settings/pr_custom_labels.toml b/pr_agent/settings/pr_custom_labels.toml index 46ee0684..d9a5e004 100644 --- a/pr_agent/settings/pr_custom_labels.toml +++ b/pr_agent/settings/pr_custom_labels.toml @@ -24,7 +24,6 @@ The output must be a YAML object equivalent to type $Labels, according to the fo class Label(str, Enum): bug_fix = "Bug fix" tests = "Tests" - refactoring = "Refactoring" enhancement = "Enhancement" documentation = "Documentation" other = "Other" diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml index 73f03b97..6e0c395e 100644 --- a/pr_agent/settings/pr_description_prompts.toml +++ b/pr_agent/settings/pr_description_prompts.toml @@ -20,7 +20,6 @@ The output must be a YAML object equivalent to type $PRDescription, according to class PRType(str, Enum): bug_fix = "Bug fix" tests = "Tests" - refactoring = "Refactoring" enhancement = "Enhancement" documentation = "Documentation" other = "Other" @@ -31,18 +30,32 @@ class PRType(str, Enum): {%- endif %} +{%- if enable_file_walkthrough %} class FileWalkthrough(BaseModel): filename: str = Field(description="the relevant file full path") - changes_in_file: str = Field(description="minimal and concise description of the changes in the relevant file") + changes_in_file: str = Field(description="minimal and concise summary of the changes in the relevant file") +{%- endif %} + +{%- if enable_semantic_files_types %} +Class FileDescription(BaseModel): + filename: str = Field(description="the relevant file full path") + changes_summary: str = Field(description="minimal and concise summary of the changes in the relevant file") + label: str = Field(description="a single semantic label that represents a type of code changes that occurred in the File. Possible values (partial list): 'bug fix', 'tests', 'enhancement', 'documentation', 'error handling', 'configuration changes', 'dependencies', 'formatting', 'miscellaneous', ...") +{%- endif %} Class PRDescription(BaseModel): title: str = Field(description="an informative title for the PR, describing its main theme") - type: List[PRType] = Field(description="one or more types that describe the PR type. . Return the label value, not the name.") + type: List[PRType] = Field(description="one or more types that describe the PR type. Return the label value, not the name.") description: str = Field(description="an informative and concise description of the PR. {%- if use_bullet_points %} Use bullet points.{% endif %}") {%- if enable_custom_labels %} labels: List[Label] = Field(min_items=0, description="custom labels that describe the PR. Return the label value, not the name.") {%- endif %} +{%- if enable_file_walkthrough %} main_files_walkthrough: List[FileWalkthrough] = Field(max_items=10) +{%- endif %} +{%- if enable_semantic_files_types %} + pr_files[List[FileDescription]] = Field(max_items=15") +{%- endif %} ===== @@ -61,9 +74,21 @@ labels: {%- endif %} description: |- ... +{%- if enable_file_walkthrough %} main_files_walkthrough: - ... - ... +{%- endif %} +{%- if enable_semantic_files_types %} +pr_files: +- filename: | + ... + changes_summary: | + ... + label: | + ... +... +{%- endif %} ``` Answer should be a valid YAML, and nothing else. Each YAML output MUST be after a newline, with proper indent, and block scalar indicator ('|-') diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index d38dc287..ce32bfc9 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -57,7 +57,6 @@ PR Analysis: enum: - Bug fix - Tests - - Refactoring - Enhancement - Documentation - Other diff --git a/pr_agent/tools/pr_description.py b/pr_agent/tools/pr_description.py index 0268e740..88baaac5 100644 --- a/pr_agent/tools/pr_description.py +++ b/pr_agent/tools/pr_description.py @@ -30,6 +30,11 @@ class PRDescription: ) self.pr_id = self.git_provider.get_pr_id() + if get_settings().pr_description.enable_semantic_files_types and not self.git_provider.is_supported( + "gfm_markdown"): + get_logger().debug(f"Disabling semantic files types for {self.pr_id}") + get_settings().pr_description.enable_semantic_files_types = False + # Initialize the AI handler self.ai_handler = AiHandler() @@ -45,6 +50,8 @@ class PRDescription: "commit_messages_str": self.git_provider.get_commit_messages(), "enable_custom_labels": get_settings().config.enable_custom_labels, "custom_labels_class": "", # will be filled if necessary in 'set_custom_labels' function + "enable_file_walkthrough": get_settings().pr_description.enable_file_walkthrough, + "enable_semantic_files_types": get_settings().pr_description.enable_semantic_files_types, } self.user_description = self.git_provider.get_user_description() @@ -79,6 +86,9 @@ class PRDescription: else: return None + if get_settings().pr_description.enable_semantic_files_types: + self._prepare_file_labels() + pr_labels = [] if get_settings().pr_description.publish_labels: pr_labels = self._prepare_labels() @@ -257,10 +267,13 @@ class PRDescription: # except for the items containing the word 'walkthrough' pr_body = "" for idx, (key, value) in enumerate(self.data.items()): - key_publish = key.strip(':').replace('_', ' ').capitalize() + if key == 'pr_files': + value = self.file_label_dict + key_publish = "PR changes summary" + else: + key_publish = key.rstrip(':').replace("_", " ").capitalize() pr_body += f"## {key_publish}\n" if 'walkthrough' in key.lower(): - # for filename, description in value.items(): if self.git_provider.is_supported("gfm_markdown"): pr_body += "
files:\n\n" for file in value: @@ -268,10 +281,12 @@ class PRDescription: description = file['changes_in_file'] pr_body += f'- `{filename}`: {description}\n' if self.git_provider.is_supported("gfm_markdown"): - pr_body +="
\n" + pr_body += "\n" + elif 'pr_files' in key.lower(): + pr_body = self.process_pr_files_prediction(pr_body, value) else: # if the value is a list, join its items by comma - if type(value) == list: + if isinstance(value, list): value = ', '.join(v for v in value) pr_body += f"{value}\n" if idx < len(self.data) - 1: @@ -280,4 +295,95 @@ class PRDescription: if get_settings().config.verbosity_level >= 2: get_logger().info(f"title:\n{title}\n{pr_body}") - return title, pr_body \ No newline at end of file + return title, pr_body + + def _prepare_file_labels(self): + self.file_label_dict = {} + for file in self.data['pr_files']: + try: + filename = file['filename'].replace("'", "`").replace('"', '`') + changes_summary = file['changes_summary'] + label = file['label'] + if label not in self.file_label_dict: + self.file_label_dict[label] = [] + self.file_label_dict[label].append((filename, changes_summary)) + except Exception as e: + get_logger().error(f"Error preparing file label dict {self.pr_id}: {e}") + pass + + def process_pr_files_prediction(self, pr_body, value): + if not self.git_provider.is_supported("gfm_markdown"): + get_logger().info(f"Disabling semantic files types for {self.pr_id} since gfm_markdown is not supported") + return pr_body + + try: + pr_body += """\n| | Relevant Files """ + pr_body += "  " * 70 + pr_body += """|\n|-----------|-------------|\n""" + for semantic_label in value.keys(): + s_label = semantic_label.strip("'").strip('"') + if self.git_provider.is_supported("gfm_markdown"): + # pr_body += f"
{semantic_label['label']}\n\n" + pr_body += f"| **{s_label}** |
files:
    " + + list_tuples = value[semantic_label] + for filename, file_change_description in list_tuples: + filename = filename.replace("'", "`") + filename_publish = filename.split("/")[-1] + filename_publish = f"**{filename_publish}**" + diff_plus_minus = "" + diff_files = self.git_provider.diff_files + for f in diff_files: + if f.filename.lower() == filename.lower(): + num_plus_lines = f.num_plus_lines + num_minus_lines = f.num_minus_lines + diff_plus_minus += f" ( +{num_plus_lines}/-{num_minus_lines} )" + break + + # try to add line numbers link to code suggestions + if hasattr(self.git_provider, 'get_line_link'): + filename = filename.strip() + link = self.git_provider.get_line_link(filename, relevant_line_start=-1) + if link: + diff_plus_minus = f"[{diff_plus_minus}]({link})" + diff_plus_minus = f" {diff_plus_minus}" + + if diff_plus_minus: + filename_publish += diff_plus_minus + if self.git_provider.is_supported("gfm_markdown"): + pr_body += f"
    {filename_publish}" + file_change_description = self._insert_br_after_x_chars(file_change_description) + if diff_plus_minus: + pr_body += f"
      Changes summary:
      **{file_change_description}**
    " + else: + pr_body += f"
      Changes summary:
      **{file_change_description}**
" + if self.git_provider.is_supported("gfm_markdown"): + pr_body += "
|\n" + except Exception as e: + get_logger().error(f"Error processing pr files to markdown {self.pr_id}: {e}") + pass + return pr_body + + def _insert_br_after_x_chars(self, text): + """ + Insert
into a string after a word that increases its length above x characters. + """ + x = 70 + if len(text) < x: + return text + + words = text.split(' ') + new_text = "" + current_length = 0 + + for word in words: + # Check if adding this word exceeds x characters + if current_length + len(word) > x: + new_text += "
" # Insert line break + current_length = 0 # Reset counter + + # Add the word to the new text + new_text += word + " " + current_length += len(word) + 1 # Add 1 for the space + + return new_text.strip() # Remove trailing space