diff --git a/.github/workflows/pr-agent-review.yaml b/.github/workflows/pr-agent-review.yaml index eb811a38..6932b4bd 100644 --- a/.github/workflows/pr-agent-review.yaml +++ b/.github/workflows/pr-agent-review.yaml @@ -21,8 +21,8 @@ jobs: id: pragent uses: Codium-ai/pr-agent@main env: - OPENAI.KEY: ${{ secrets.OPENAI_KEY }} - OPENAI.ORG: ${{ secrets.OPENAI_ORG }} # optional + OPENAI_KEY: ${{ secrets.OPENAI_KEY }} + OPENAI_ORG: ${{ secrets.OPENAI_ORG }} # optional GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} PINECONE.API_KEY: ${{ secrets.PINECONE_API_KEY }} PINECONE.ENVIRONMENT: ${{ secrets.PINECONE_ENVIRONMENT }} diff --git a/README.md b/README.md index 56406ce0..720277a0 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,8 @@ CodiumAI `PR-Agent` is an open-source tool aiming to help developers review pull ‣ **Code Suggestions (`/improve`)**: [Committable code suggestions](https://github.com/Codium-ai/pr-agent/pull/229#discussion_r1306919276) for improving the PR. \ ‣ **Update Changelog (`/update_changelog`)**: Automatically updating the CHANGELOG.md file with the [PR changes](https://github.com/Codium-ai/pr-agent/pull/168#discussion_r1282077645). +\ +‣ **Find similar issue (`/similar_issue`)**: Automatically retrieves and presents [similar issues](https://github.com/Alibaba-MIIL/ASL/issues/107). See the [usage guide](./Usage.md) for instructions how to run the different tools from [CLI](./Usage.md#working-from-a-local-repo-cli), or by [online usage](./Usage.md#online-usage), as well as additional details on optional commands and configurations. @@ -106,6 +108,7 @@ See the [usage guide](./Usage.md) for instructions how to run the different tool | | ⮑ Extended | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | | :white_check_mark: | | | Reflect and Review | :white_check_mark: | :white_check_mark: | :white_check_mark: | | :white_check_mark: | :white_check_mark: | | | Update CHANGELOG.md | :white_check_mark: | :white_check_mark: | :white_check_mark: | | | | +| | Find similar issue | :white_check_mark: | | | | | | | | | | | | | | | USAGE | CLI | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | | | App / webhook | :white_check_mark: | :white_check_mark: | | | | @@ -183,7 +186,7 @@ Here are some advantages of PR-Agent: - [x] Support additional models, as a replacement for OpenAI (see [here](https://github.com/Codium-ai/pr-agent/pull/172)) - [x] Develop additional logic for handling large PRs (see [here](https://github.com/Codium-ai/pr-agent/pull/229)) - [ ] Add additional context to the prompt. For example, repo (or relevant files) summarization, with tools such a [ctags](https://github.com/universal-ctags/ctags) -- [ ] PR-Agent for issues, and just for pull requests +- [x] PR-Agent for issues - [ ] Adding more tools. Possible directions: - [x] PR description - [x] Inline code suggestions @@ -201,3 +204,13 @@ Here are some advantages of PR-Agent: - [openai-pr-reviewer](https://github.com/coderabbitai/openai-pr-reviewer) - [CodeReview BOT](https://github.com/anc95/ChatGPT-CodeReview) - [AI-Maintainer](https://github.com/merwanehamadi/AI-Maintainer) + +## Links + +[![Join our Discord community](https://raw.githubusercontent.com/Codium-ai/codiumai-vscode-release/main/media/docs/Joincommunity.png)](https://discord.gg/kG35uSHDBc) + +- Discord community: https://discord.gg/kG35uSHDBc +- CodiumAI site: https://codium.ai +- Blog: https://www.codium.ai/blog/ +- Troubleshooting: https://www.codium.ai/blog/technical-faq-and-troubleshooting/ +- Support: support@codium.ai diff --git a/Usage.md b/Usage.md index 03b5f54b..bc2544b8 100644 --- a/Usage.md +++ b/Usage.md @@ -247,4 +247,26 @@ And use the following settings (you have to replace the values) in .secrets.toml [azure_devops] org = "https://dev.azure.com/YOUR_ORGANIZATION/" pat = "YOUR_PAT_TOKEN" -``` \ No newline at end of file +``` + +#### Similar issue tool + +[Example usage](https://github.com/Alibaba-MIIL/ASL/issues/107) + + + +To enable usage of the '**similar issue**' tool, you need to set the following keys in `.secrets.toml` (or in the relevant environment variables): +``` +[pinecone] +api_key = "..." +environment = "..." +``` +These parameters can be obtained by registering to [Pinecone](https://app.pinecone.io/?sessionType=signup/). + +- To invoke the 'similar issue' tool from **CLI**, run: +`python3 cli.py --issue_url=... similar_issue` + +- To invoke the 'similar' issue tool via online usage, [comment](https://github.com/Codium-ai/pr-agent/issues/178#issuecomment-1716934893) on a PR: +`/similar_issue` + +- You can also enable the 'similar issue' tool to run automatically when a new issue is opened, by adding it to the [pr_commands list in the github_app section](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L66) diff --git a/pics/debugger.png b/pics/debugger.png deleted file mode 100644 index 7d8f201f..00000000 Binary files a/pics/debugger.png and /dev/null differ diff --git a/pics/similar_issue_tool.png b/pics/similar_issue_tool.png new file mode 100644 index 00000000..4ec51c81 Binary files /dev/null and b/pics/similar_issue_tool.png differ diff --git a/pr_agent/algo/language_handler.py b/pr_agent/algo/language_handler.py index 586a3161..66e85025 100644 --- a/pr_agent/algo/language_handler.py +++ b/pr_agent/algo/language_handler.py @@ -42,6 +42,11 @@ def sort_files_by_main_languages(languages: Dict, files: list): files_sorted = [] rest_files = {} + # if no languages detected, put all files in the "Other" category + if not languages: + files_sorted = [({"language": "Other", "files": list(files_filtered)})] + return files_sorted + main_extensions_flat = [] for ext in main_extensions: main_extensions_flat.extend(ext) diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index ac865471..c7923d16 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -20,7 +20,7 @@ def get_setting(key: str) -> Any: except Exception: return global_settings.get(key, None) -def convert_to_markdown(output_data: dict, gfm_supported: bool) -> str: +def convert_to_markdown(output_data: dict, gfm_supported: bool=True) -> str: """ Convert a dictionary of data into markdown format. Args: @@ -42,6 +42,7 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool) -> str: "General suggestions": "💡", "Insights from user's answers": "📝", "Code feedback": "🤖", + "Estimated effort to review [1-5]": "⏱️", } for key, value in output_data.items(): diff --git a/pr_agent/git_providers/git_provider.py b/pr_agent/git_providers/git_provider.py index 330590a1..0911d2d2 100644 --- a/pr_agent/git_providers/git_provider.py +++ b/pr_agent/git_providers/git_provider.py @@ -132,6 +132,10 @@ def get_main_pr_language(languages, files) -> str: Get the main language of the commit. Return an empty string if cannot determine. """ main_language_str = "" + if not languages: + logging.info("No languages detected") + return main_language_str + try: top_language = max(languages, key=languages.get).lower() diff --git a/pr_agent/servers/github_action_runner.py b/pr_agent/servers/github_action_runner.py index fbf4f89c..7dbea972 100644 --- a/pr_agent/servers/github_action_runner.py +++ b/pr_agent/servers/github_action_runner.py @@ -12,8 +12,8 @@ async def run_action(): # Get environment variables GITHUB_EVENT_NAME = os.environ.get('GITHUB_EVENT_NAME') GITHUB_EVENT_PATH = os.environ.get('GITHUB_EVENT_PATH') - OPENAI_KEY = os.environ.get('OPENAI_KEY') - OPENAI_ORG = os.environ.get('OPENAI_ORG') + OPENAI_KEY = os.environ.get('OPENAI_KEY') or os.environ.get('OPENAI.KEY') + OPENAI_ORG = os.environ.get('OPENAI_ORG') or os.environ.get('OPENAI.ORG') GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN') get_settings().set("CONFIG.PUBLISH_OUTPUT_PROGRESS", False) @@ -61,12 +61,21 @@ async def run_action(): if action in ["created", "edited"]: comment_body = event_payload.get("comment", {}).get("body") if comment_body: - pr_url = event_payload.get("issue", {}).get("pull_request", {}).get("url") - if pr_url: + is_pr = False + # check if issue is pull request + if event_payload.get("issue", {}).get("pull_request"): + url = event_payload.get("issue", {}).get("pull_request", {}).get("url") + is_pr = True + else: + url = event_payload.get("issue", {}).get("url") + if url: body = comment_body.strip().lower() comment_id = event_payload.get("comment", {}).get("id") - provider = get_git_provider()(pr_url=pr_url) - await PRAgent().handle_request(pr_url, body, notify=lambda: provider.add_eyes_reaction(comment_id)) + provider = get_git_provider()(pr_url=url) + if is_pr: + await PRAgent().handle_request(url, body, notify=lambda: provider.add_eyes_reaction(comment_id)) + else: + await PRAgent().handle_request(url, body) if __name__ == '__main__': diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 75dfcf97..a272e1f9 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -11,12 +11,14 @@ ai_timeout=180 max_description_tokens = 500 max_commits_tokens = 500 secret_provider="google_cloud_storage" +cli_mode=false [pr_reviewer] # /review # require_focused_review=false require_score_review=false require_tests_review=true require_security_review=true +require_estimate_effort_to_review=true num_code_suggestions=4 inline_code_comments = false ask_and_reflect=false @@ -24,10 +26,14 @@ automatic_review=true extra_instructions = "" [pr_description] # /describe # +publish_labels=true publish_description_as_comment=false add_original_user_description=false keep_original_user_title=false extra_instructions = "" +# markers +use_description_markers=false +include_generated_by_header=true [pr_questions] # /ask # diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index 7c21f433..90ac91cc 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -85,6 +85,14 @@ PR Analysis: code diff changes are too scattered, then the PR is not focused. Explain your answer shortly. {%- endif %} +{%- if require_estimate_effort_to_review %} + Estimated effort to review [1-5]: + type: string + description: >- + Estimate, on a scale of 1-5 (inclusive), the time and effort required to review this PR by an experienced and knowledgeable developer. 1 means short and easy review , 5 means long and hard review. + Take into account the size, complexity, quality, and the needed changes of the PR code diff. + Explain your answer shortly (1-2 sentences). +{%- endif %} PR Feedback: General suggestions: type: string diff --git a/pr_agent/tools/pr_description.py b/pr_agent/tools/pr_description.py index f30b0165..3c388eb4 100644 --- a/pr_agent/tools/pr_description.py +++ b/pr_agent/tools/pr_description.py @@ -1,5 +1,6 @@ import copy import json +import re import logging from typing import List, Tuple @@ -28,6 +29,7 @@ class PRDescription: self.main_pr_language = get_main_pr_language( self.git_provider.get_languages(), self.git_provider.get_files() ) + self.pr_id = f"{self.git_provider.repo}/{self.git_provider.pr_num}" # Initialize the AI handler self.ai_handler = AiHandler() @@ -61,26 +63,39 @@ class PRDescription: """ Generates a PR description using an AI model and publishes it to the PR. """ - logging.info('Generating a PR description...') + logging.info(f"Generating a PR description {self.pr_id}") if get_settings().config.publish_output: self.git_provider.publish_comment("Preparing pr description...", is_temporary=True) - + await retry_with_fallback_models(self._prepare_prediction) - - logging.info('Preparing answer...') - pr_title, pr_body, pr_types, markdown_text, description = self._prepare_pr_answer() - + + logging.info(f"Preparing answer {self.pr_id}") + if self.prediction: + self._prepare_data() + else: + return None + + pr_labels = [] + if get_settings().pr_description.publish_labels: + pr_labels = self._prepare_labels() + + if get_settings().pr_description.use_description_markers: + pr_title, pr_body = self._prepare_pr_answer_with_markers() + else: + pr_title, pr_body, = self._prepare_pr_answer() + full_markdown_description = f"## Title\n\n{pr_title}\n\n___\n{pr_body}" + if get_settings().config.publish_output: - logging.info('Pushing answer...') + logging.info(f"Pushing answer {self.pr_id}") if get_settings().pr_description.publish_description_as_comment: - self.git_provider.publish_comment(pr_body) + self.git_provider.publish_comment(full_markdown_description) else: self.git_provider.publish_description(pr_title, pr_body) - if self.git_provider.is_supported("get_labels"): + if get_settings().pr_description.publish_labels and self.git_provider.is_supported("get_labels"): current_labels = self.git_provider.get_labels() if current_labels is None: current_labels = [] - self.git_provider.publish_labels(pr_types + current_labels) + self.git_provider.publish_labels(pr_labels + current_labels) self.git_provider.remove_initial_comment() return "" @@ -99,9 +114,12 @@ class PRDescription: Any exceptions raised by the 'get_pr_diff' and '_get_prediction' functions. """ - logging.info('Getting PR diff...') + if get_settings().pr_description.use_description_markers and 'pr_agent:' not in self.user_description: + return None + + logging.info(f"Getting PR diff {self.pr_id}") self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model) - logging.info('Getting AI prediction...') + logging.info(f"Getting AI prediction {self.pr_id}") self.prediction = await self._get_prediction(model) async def _get_prediction(self, model: str) -> str: @@ -134,35 +152,71 @@ class PRDescription: return response - def _prepare_pr_answer(self) -> Tuple[str, str, List[str], str]: + + def _prepare_data(self): + # Load the AI prediction data into a dictionary + self.data = load_yaml(self.prediction.strip()) + + if get_settings().pr_description.add_original_user_description and self.user_description: + self.data["User Description"] = self.user_description + + + def _prepare_labels(self) -> List[str]: + pr_types = [] + + # If the 'PR Type' key is present in the dictionary, split its value by comma and assign it to 'pr_types' + if 'PR Type' in self.data: + if type(self.data['PR Type']) == list: + pr_types = self.data['PR Type'] + elif type(self.data['PR Type']) == str: + pr_types = self.data['PR Type'].split(',') + + return pr_types + + def _prepare_pr_answer_with_markers(self) -> Tuple[str, str]: + logging.info(f"Using description marker replacements {self.pr_id}") + title = self.vars["title"] + body = self.user_description + if get_settings().pr_description.include_generated_by_header: + ai_header = f"### 🤖 Generated by PR Agent at {self.git_provider.last_commit_id.sha}\n\n" + else: + ai_header = "" + + ai_summary = self.data.get('PR Description') + if ai_summary and not re.search(r'', body): + summary = f"{ai_header}{ai_summary}" + body = body.replace('pr_agent:summary', summary) + + if not re.search(r'', body): + ai_walkthrough = self.data.get('PR Main Files Walkthrough') + if ai_walkthrough: + walkthrough = str(ai_header) + for file in ai_walkthrough: + filename = file['filename'].replace("'", "`") + description = file['changes in file'].replace("'", "`") + walkthrough += f'- `{filename}`: {description}\n' + + body = body.replace('pr_agent:walkthrough', walkthrough) + + return title, body + + def _prepare_pr_answer(self) -> Tuple[str, str]: """ Prepare the PR description based on the AI prediction data. Returns: - title: a string containing the PR title. - - pr_body: a string containing the PR body in a markdown format. - - pr_types: a list of strings containing the PR types. - - markdown_text: a string containing the AI prediction data in a markdown format. used for publishing a comment - - user_description: a string containing the user description + - pr_body: a string containing the PR description body in a markdown format. """ - # Load the AI prediction data into a dictionary - data = load_yaml(self.prediction.strip()) - if get_settings().pr_description.add_original_user_description and self.user_description: - data["User Description"] = self.user_description - - # Initialization - pr_types = [] - - # If the 'PR Type' key is present in the dictionary, split its value by comma and assign it to 'pr_types' - if 'PR Type' in data: - if type(data['PR Type']) == list: - pr_types = data['PR Type'] - elif type(data['PR Type']) == str: - pr_types = data['PR Type'].split(',') + # Iterate over the dictionary items and append the key and value to 'markdown_text' in a markdown format + markdown_text = "" + for key, value in self.data.items(): + markdown_text += f"## {key}\n\n" + markdown_text += f"{value}\n\n" # Remove the 'PR Title' key from the dictionary - ai_title = data.pop('PR Title') + ai_title = self.data.pop('PR Title', self.vars["title"]) if get_settings().pr_description.keep_original_user_title: # Assign the original PR title to the 'title' variable title = self.vars["title"] @@ -173,26 +227,27 @@ class PRDescription: # Iterate over the remaining dictionary items and append the key and value to 'pr_body' in a markdown format, # except for the items containing the word 'walkthrough' pr_body = "" - for idx, (key, value) in enumerate(data.items()): + for idx, (key, value) in enumerate(self.data.items()): pr_body += f"## {key}:\n" if 'walkthrough' in key.lower(): # for filename, description in value.items(): + if self.git_provider.is_supported("gfm_markdown"): + pr_body += "
files:\n\n" for file in value: filename = file['filename'].replace("'", "`") description = file['changes in file'] pr_body += f'`{filename}`: {description}\n' + if self.git_provider.is_supported("gfm_markdown"): + pr_body +="
\n" else: # if the value is a list, join its items by comma if type(value) == list: value = ', '.join(v for v in value) pr_body += f"{value}\n" - if idx < len(data) - 1: + if idx < len(self.data) - 1: pr_body += "\n___\n" - markdown_text = f"## Title\n\n{title}\n\n___\n{pr_body}" - description = data['PR Description'] - if get_settings().config.verbosity_level >= 2: logging.info(f"title:\n{title}\n{pr_body}") - return title, pr_body, pr_types, markdown_text, description \ No newline at end of file + return title, pr_body \ No newline at end of file diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index 7f790d3b..b6bca536 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -59,6 +59,7 @@ class PRReviewer: "require_tests": get_settings().pr_reviewer.require_tests_review, "require_security": get_settings().pr_reviewer.require_security_review, "require_focused": get_settings().pr_reviewer.require_focused_review, + "require_estimate_effort_to_review": get_settings().pr_reviewer.require_estimate_effort_to_review, 'num_code_suggestions': get_settings().pr_reviewer.num_code_suggestions, 'question_str': question_str, 'answer_str': answer_str, diff --git a/pr_agent/tools/pr_similar_issue.py b/pr_agent/tools/pr_similar_issue.py index 98d6a1f6..d7b6a799 100644 --- a/pr_agent/tools/pr_similar_issue.py +++ b/pr_agent/tools/pr_similar_issue.py @@ -98,11 +98,14 @@ class PRSimilarIssue: logging.info('No new issues to update') async def run(self): + logging.info('Getting issue...') repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1]) issue_main = self.git_provider.repo_obj.get_issue(original_issue_number) issue_str, comments, number = self._process_issue(issue_main) openai.api_key = get_settings().openai.key + logging.info('Done') + logging.info('Querying...') res = openai.Embedding.create(input=[issue_str], engine=MODEL) embeds = [record['embedding'] for record in res['data']] pinecone_index = pinecone.Index(index_name=self.index_name) @@ -111,22 +114,34 @@ class PRSimilarIssue: filter={"repo": self.repo_name_for_index}, include_metadata=True).to_dict() relevant_issues_number_list = [] + relevant_comment_number_list = [] + score_list = [] for r in res['matches']: issue_number = int(r["id"].split('.')[0].split('_')[-1]) if original_issue_number == issue_number: continue if issue_number not in relevant_issues_number_list: relevant_issues_number_list.append(issue_number) + if 'comment' in r["id"]: + relevant_comment_number_list.append(int(r["id"].split('.')[1].split('_')[-1])) + else: + relevant_comment_number_list.append(-1) + score_list.append(str("{:.2f}".format(r['score']))) + logging.info('Done') - similar_issues_str = "Similar Issues:\n\n" + logging.info('Publishing response...') + similar_issues_str = "### Similar Issues\n___\n\n" for i, issue_number_similar in enumerate(relevant_issues_number_list): issue = self.git_provider.repo_obj.get_issue(issue_number_similar) title = issue.title url = issue.html_url - similar_issues_str += f"{i + 1}. [{title}]({url})\n\n" + if relevant_comment_number_list[i] != -1: + url = list(issue.get_comments())[relevant_comment_number_list[i]].html_url + similar_issues_str += f"{i + 1}. **[{title}]({url})** (score={score_list[i]})\n\n" if get_settings().config.publish_output: response = issue_main.create_comment(similar_issues_str) logging.info(similar_issues_str) + logging.info('Done') def _process_issue(self, issue): header = issue.title diff --git a/tests/unittest/test_language_handler.py b/tests/unittest/test_language_handler.py index 875ec1a7..fdde7bb0 100644 --- a/tests/unittest/test_language_handler.py +++ b/tests/unittest/test_language_handler.py @@ -61,7 +61,7 @@ class TestSortFilesByMainLanguages: type('', (object,), {'filename': 'file1.py'})(), type('', (object,), {'filename': 'file2.java'})() ] - expected_output = [{'language': 'Other', 'files': []}] + expected_output = [{'language': 'Other', 'files': files}] assert sort_files_by_main_languages(languages, files) == expected_output # Tests that function handles empty files list