diff --git a/.github/workflows/pr-agent-review.yaml b/.github/workflows/pr-agent-review.yaml
index eb811a38..6932b4bd 100644
--- a/.github/workflows/pr-agent-review.yaml
+++ b/.github/workflows/pr-agent-review.yaml
@@ -21,8 +21,8 @@ jobs:
id: pragent
uses: Codium-ai/pr-agent@main
env:
- OPENAI.KEY: ${{ secrets.OPENAI_KEY }}
- OPENAI.ORG: ${{ secrets.OPENAI_ORG }} # optional
+ OPENAI_KEY: ${{ secrets.OPENAI_KEY }}
+ OPENAI_ORG: ${{ secrets.OPENAI_ORG }} # optional
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PINECONE.API_KEY: ${{ secrets.PINECONE_API_KEY }}
PINECONE.ENVIRONMENT: ${{ secrets.PINECONE_ENVIRONMENT }}
diff --git a/README.md b/README.md
index 56406ce0..720277a0 100644
--- a/README.md
+++ b/README.md
@@ -26,6 +26,8 @@ CodiumAI `PR-Agent` is an open-source tool aiming to help developers review pull
‣ **Code Suggestions (`/improve`)**: [Committable code suggestions](https://github.com/Codium-ai/pr-agent/pull/229#discussion_r1306919276) for improving the PR.
\
‣ **Update Changelog (`/update_changelog`)**: Automatically updating the CHANGELOG.md file with the [PR changes](https://github.com/Codium-ai/pr-agent/pull/168#discussion_r1282077645).
+\
+‣ **Find similar issue (`/similar_issue`)**: Automatically retrieves and presents [similar issues](https://github.com/Alibaba-MIIL/ASL/issues/107).
See the [usage guide](./Usage.md) for instructions how to run the different tools from [CLI](./Usage.md#working-from-a-local-repo-cli), or by [online usage](./Usage.md#online-usage), as well as additional details on optional commands and configurations.
@@ -106,6 +108,7 @@ See the [usage guide](./Usage.md) for instructions how to run the different tool
| | ⮑ Extended | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | | :white_check_mark: |
| | Reflect and Review | :white_check_mark: | :white_check_mark: | :white_check_mark: | | :white_check_mark: | :white_check_mark: |
| | Update CHANGELOG.md | :white_check_mark: | :white_check_mark: | :white_check_mark: | | | |
+| | Find similar issue | :white_check_mark: | | | | | |
| | | | | | | |
| USAGE | CLI | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
| | App / webhook | :white_check_mark: | :white_check_mark: | | | |
@@ -183,7 +186,7 @@ Here are some advantages of PR-Agent:
- [x] Support additional models, as a replacement for OpenAI (see [here](https://github.com/Codium-ai/pr-agent/pull/172))
- [x] Develop additional logic for handling large PRs (see [here](https://github.com/Codium-ai/pr-agent/pull/229))
- [ ] Add additional context to the prompt. For example, repo (or relevant files) summarization, with tools such a [ctags](https://github.com/universal-ctags/ctags)
-- [ ] PR-Agent for issues, and just for pull requests
+- [x] PR-Agent for issues
- [ ] Adding more tools. Possible directions:
- [x] PR description
- [x] Inline code suggestions
@@ -201,3 +204,13 @@ Here are some advantages of PR-Agent:
- [openai-pr-reviewer](https://github.com/coderabbitai/openai-pr-reviewer)
- [CodeReview BOT](https://github.com/anc95/ChatGPT-CodeReview)
- [AI-Maintainer](https://github.com/merwanehamadi/AI-Maintainer)
+
+## Links
+
+[](https://discord.gg/kG35uSHDBc)
+
+- Discord community: https://discord.gg/kG35uSHDBc
+- CodiumAI site: https://codium.ai
+- Blog: https://www.codium.ai/blog/
+- Troubleshooting: https://www.codium.ai/blog/technical-faq-and-troubleshooting/
+- Support: support@codium.ai
diff --git a/Usage.md b/Usage.md
index 03b5f54b..bc2544b8 100644
--- a/Usage.md
+++ b/Usage.md
@@ -247,4 +247,26 @@ And use the following settings (you have to replace the values) in .secrets.toml
[azure_devops]
org = "https://dev.azure.com/YOUR_ORGANIZATION/"
pat = "YOUR_PAT_TOKEN"
-```
\ No newline at end of file
+```
+
+#### Similar issue tool
+
+[Example usage](https://github.com/Alibaba-MIIL/ASL/issues/107)
+
+
+
+To enable usage of the '**similar issue**' tool, you need to set the following keys in `.secrets.toml` (or in the relevant environment variables):
+```
+[pinecone]
+api_key = "..."
+environment = "..."
+```
+These parameters can be obtained by registering to [Pinecone](https://app.pinecone.io/?sessionType=signup/).
+
+- To invoke the 'similar issue' tool from **CLI**, run:
+`python3 cli.py --issue_url=... similar_issue`
+
+- To invoke the 'similar' issue tool via online usage, [comment](https://github.com/Codium-ai/pr-agent/issues/178#issuecomment-1716934893) on a PR:
+`/similar_issue`
+
+- You can also enable the 'similar issue' tool to run automatically when a new issue is opened, by adding it to the [pr_commands list in the github_app section](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L66)
diff --git a/pics/debugger.png b/pics/debugger.png
deleted file mode 100644
index 7d8f201f..00000000
Binary files a/pics/debugger.png and /dev/null differ
diff --git a/pics/similar_issue_tool.png b/pics/similar_issue_tool.png
new file mode 100644
index 00000000..4ec51c81
Binary files /dev/null and b/pics/similar_issue_tool.png differ
diff --git a/pr_agent/algo/language_handler.py b/pr_agent/algo/language_handler.py
index 586a3161..66e85025 100644
--- a/pr_agent/algo/language_handler.py
+++ b/pr_agent/algo/language_handler.py
@@ -42,6 +42,11 @@ def sort_files_by_main_languages(languages: Dict, files: list):
files_sorted = []
rest_files = {}
+ # if no languages detected, put all files in the "Other" category
+ if not languages:
+ files_sorted = [({"language": "Other", "files": list(files_filtered)})]
+ return files_sorted
+
main_extensions_flat = []
for ext in main_extensions:
main_extensions_flat.extend(ext)
diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py
index ac865471..c7923d16 100644
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@@ -20,7 +20,7 @@ def get_setting(key: str) -> Any:
except Exception:
return global_settings.get(key, None)
-def convert_to_markdown(output_data: dict, gfm_supported: bool) -> str:
+def convert_to_markdown(output_data: dict, gfm_supported: bool=True) -> str:
"""
Convert a dictionary of data into markdown format.
Args:
@@ -42,6 +42,7 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool) -> str:
"General suggestions": "💡",
"Insights from user's answers": "📝",
"Code feedback": "🤖",
+ "Estimated effort to review [1-5]": "⏱️",
}
for key, value in output_data.items():
diff --git a/pr_agent/git_providers/git_provider.py b/pr_agent/git_providers/git_provider.py
index 330590a1..0911d2d2 100644
--- a/pr_agent/git_providers/git_provider.py
+++ b/pr_agent/git_providers/git_provider.py
@@ -132,6 +132,10 @@ def get_main_pr_language(languages, files) -> str:
Get the main language of the commit. Return an empty string if cannot determine.
"""
main_language_str = ""
+ if not languages:
+ logging.info("No languages detected")
+ return main_language_str
+
try:
top_language = max(languages, key=languages.get).lower()
diff --git a/pr_agent/servers/github_action_runner.py b/pr_agent/servers/github_action_runner.py
index fbf4f89c..7dbea972 100644
--- a/pr_agent/servers/github_action_runner.py
+++ b/pr_agent/servers/github_action_runner.py
@@ -12,8 +12,8 @@ async def run_action():
# Get environment variables
GITHUB_EVENT_NAME = os.environ.get('GITHUB_EVENT_NAME')
GITHUB_EVENT_PATH = os.environ.get('GITHUB_EVENT_PATH')
- OPENAI_KEY = os.environ.get('OPENAI_KEY')
- OPENAI_ORG = os.environ.get('OPENAI_ORG')
+ OPENAI_KEY = os.environ.get('OPENAI_KEY') or os.environ.get('OPENAI.KEY')
+ OPENAI_ORG = os.environ.get('OPENAI_ORG') or os.environ.get('OPENAI.ORG')
GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN')
get_settings().set("CONFIG.PUBLISH_OUTPUT_PROGRESS", False)
@@ -61,12 +61,21 @@ async def run_action():
if action in ["created", "edited"]:
comment_body = event_payload.get("comment", {}).get("body")
if comment_body:
- pr_url = event_payload.get("issue", {}).get("pull_request", {}).get("url")
- if pr_url:
+ is_pr = False
+ # check if issue is pull request
+ if event_payload.get("issue", {}).get("pull_request"):
+ url = event_payload.get("issue", {}).get("pull_request", {}).get("url")
+ is_pr = True
+ else:
+ url = event_payload.get("issue", {}).get("url")
+ if url:
body = comment_body.strip().lower()
comment_id = event_payload.get("comment", {}).get("id")
- provider = get_git_provider()(pr_url=pr_url)
- await PRAgent().handle_request(pr_url, body, notify=lambda: provider.add_eyes_reaction(comment_id))
+ provider = get_git_provider()(pr_url=url)
+ if is_pr:
+ await PRAgent().handle_request(url, body, notify=lambda: provider.add_eyes_reaction(comment_id))
+ else:
+ await PRAgent().handle_request(url, body)
if __name__ == '__main__':
diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml
index 75dfcf97..a272e1f9 100644
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@@ -11,12 +11,14 @@ ai_timeout=180
max_description_tokens = 500
max_commits_tokens = 500
secret_provider="google_cloud_storage"
+cli_mode=false
[pr_reviewer] # /review #
require_focused_review=false
require_score_review=false
require_tests_review=true
require_security_review=true
+require_estimate_effort_to_review=true
num_code_suggestions=4
inline_code_comments = false
ask_and_reflect=false
@@ -24,10 +26,14 @@ automatic_review=true
extra_instructions = ""
[pr_description] # /describe #
+publish_labels=true
publish_description_as_comment=false
add_original_user_description=false
keep_original_user_title=false
extra_instructions = ""
+# markers
+use_description_markers=false
+include_generated_by_header=true
[pr_questions] # /ask #
diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml
index 7c21f433..90ac91cc 100644
--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@@ -85,6 +85,14 @@ PR Analysis:
code diff changes are too scattered, then the PR is not focused. Explain
your answer shortly.
{%- endif %}
+{%- if require_estimate_effort_to_review %}
+ Estimated effort to review [1-5]:
+ type: string
+ description: >-
+ Estimate, on a scale of 1-5 (inclusive), the time and effort required to review this PR by an experienced and knowledgeable developer. 1 means short and easy review , 5 means long and hard review.
+ Take into account the size, complexity, quality, and the needed changes of the PR code diff.
+ Explain your answer shortly (1-2 sentences).
+{%- endif %}
PR Feedback:
General suggestions:
type: string
diff --git a/pr_agent/tools/pr_description.py b/pr_agent/tools/pr_description.py
index f30b0165..3c388eb4 100644
--- a/pr_agent/tools/pr_description.py
+++ b/pr_agent/tools/pr_description.py
@@ -1,5 +1,6 @@
import copy
import json
+import re
import logging
from typing import List, Tuple
@@ -28,6 +29,7 @@ class PRDescription:
self.main_pr_language = get_main_pr_language(
self.git_provider.get_languages(), self.git_provider.get_files()
)
+ self.pr_id = f"{self.git_provider.repo}/{self.git_provider.pr_num}"
# Initialize the AI handler
self.ai_handler = AiHandler()
@@ -61,26 +63,39 @@ class PRDescription:
"""
Generates a PR description using an AI model and publishes it to the PR.
"""
- logging.info('Generating a PR description...')
+ logging.info(f"Generating a PR description {self.pr_id}")
if get_settings().config.publish_output:
self.git_provider.publish_comment("Preparing pr description...", is_temporary=True)
-
+
await retry_with_fallback_models(self._prepare_prediction)
-
- logging.info('Preparing answer...')
- pr_title, pr_body, pr_types, markdown_text, description = self._prepare_pr_answer()
-
+
+ logging.info(f"Preparing answer {self.pr_id}")
+ if self.prediction:
+ self._prepare_data()
+ else:
+ return None
+
+ pr_labels = []
+ if get_settings().pr_description.publish_labels:
+ pr_labels = self._prepare_labels()
+
+ if get_settings().pr_description.use_description_markers:
+ pr_title, pr_body = self._prepare_pr_answer_with_markers()
+ else:
+ pr_title, pr_body, = self._prepare_pr_answer()
+ full_markdown_description = f"## Title\n\n{pr_title}\n\n___\n{pr_body}"
+
if get_settings().config.publish_output:
- logging.info('Pushing answer...')
+ logging.info(f"Pushing answer {self.pr_id}")
if get_settings().pr_description.publish_description_as_comment:
- self.git_provider.publish_comment(pr_body)
+ self.git_provider.publish_comment(full_markdown_description)
else:
self.git_provider.publish_description(pr_title, pr_body)
- if self.git_provider.is_supported("get_labels"):
+ if get_settings().pr_description.publish_labels and self.git_provider.is_supported("get_labels"):
current_labels = self.git_provider.get_labels()
if current_labels is None:
current_labels = []
- self.git_provider.publish_labels(pr_types + current_labels)
+ self.git_provider.publish_labels(pr_labels + current_labels)
self.git_provider.remove_initial_comment()
return ""
@@ -99,9 +114,12 @@ class PRDescription:
Any exceptions raised by the 'get_pr_diff' and '_get_prediction' functions.
"""
- logging.info('Getting PR diff...')
+ if get_settings().pr_description.use_description_markers and 'pr_agent:' not in self.user_description:
+ return None
+
+ logging.info(f"Getting PR diff {self.pr_id}")
self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model)
- logging.info('Getting AI prediction...')
+ logging.info(f"Getting AI prediction {self.pr_id}")
self.prediction = await self._get_prediction(model)
async def _get_prediction(self, model: str) -> str:
@@ -134,35 +152,71 @@ class PRDescription:
return response
- def _prepare_pr_answer(self) -> Tuple[str, str, List[str], str]:
+
+ def _prepare_data(self):
+ # Load the AI prediction data into a dictionary
+ self.data = load_yaml(self.prediction.strip())
+
+ if get_settings().pr_description.add_original_user_description and self.user_description:
+ self.data["User Description"] = self.user_description
+
+
+ def _prepare_labels(self) -> List[str]:
+ pr_types = []
+
+ # If the 'PR Type' key is present in the dictionary, split its value by comma and assign it to 'pr_types'
+ if 'PR Type' in self.data:
+ if type(self.data['PR Type']) == list:
+ pr_types = self.data['PR Type']
+ elif type(self.data['PR Type']) == str:
+ pr_types = self.data['PR Type'].split(',')
+
+ return pr_types
+
+ def _prepare_pr_answer_with_markers(self) -> Tuple[str, str]:
+ logging.info(f"Using description marker replacements {self.pr_id}")
+ title = self.vars["title"]
+ body = self.user_description
+ if get_settings().pr_description.include_generated_by_header:
+ ai_header = f"### 🤖 Generated by PR Agent at {self.git_provider.last_commit_id.sha}\n\n"
+ else:
+ ai_header = ""
+
+ ai_summary = self.data.get('PR Description')
+ if ai_summary and not re.search(r'', body):
+ summary = f"{ai_header}{ai_summary}"
+ body = body.replace('pr_agent:summary', summary)
+
+ if not re.search(r'', body):
+ ai_walkthrough = self.data.get('PR Main Files Walkthrough')
+ if ai_walkthrough:
+ walkthrough = str(ai_header)
+ for file in ai_walkthrough:
+ filename = file['filename'].replace("'", "`")
+ description = file['changes in file'].replace("'", "`")
+ walkthrough += f'- `{filename}`: {description}\n'
+
+ body = body.replace('pr_agent:walkthrough', walkthrough)
+
+ return title, body
+
+ def _prepare_pr_answer(self) -> Tuple[str, str]:
"""
Prepare the PR description based on the AI prediction data.
Returns:
- title: a string containing the PR title.
- - pr_body: a string containing the PR body in a markdown format.
- - pr_types: a list of strings containing the PR types.
- - markdown_text: a string containing the AI prediction data in a markdown format. used for publishing a comment
- - user_description: a string containing the user description
+ - pr_body: a string containing the PR description body in a markdown format.
"""
- # Load the AI prediction data into a dictionary
- data = load_yaml(self.prediction.strip())
- if get_settings().pr_description.add_original_user_description and self.user_description:
- data["User Description"] = self.user_description
-
- # Initialization
- pr_types = []
-
- # If the 'PR Type' key is present in the dictionary, split its value by comma and assign it to 'pr_types'
- if 'PR Type' in data:
- if type(data['PR Type']) == list:
- pr_types = data['PR Type']
- elif type(data['PR Type']) == str:
- pr_types = data['PR Type'].split(',')
+ # Iterate over the dictionary items and append the key and value to 'markdown_text' in a markdown format
+ markdown_text = ""
+ for key, value in self.data.items():
+ markdown_text += f"## {key}\n\n"
+ markdown_text += f"{value}\n\n"
# Remove the 'PR Title' key from the dictionary
- ai_title = data.pop('PR Title')
+ ai_title = self.data.pop('PR Title', self.vars["title"])
if get_settings().pr_description.keep_original_user_title:
# Assign the original PR title to the 'title' variable
title = self.vars["title"]
@@ -173,26 +227,27 @@ class PRDescription:
# Iterate over the remaining dictionary items and append the key and value to 'pr_body' in a markdown format,
# except for the items containing the word 'walkthrough'
pr_body = ""
- for idx, (key, value) in enumerate(data.items()):
+ for idx, (key, value) in enumerate(self.data.items()):
pr_body += f"## {key}:\n"
if 'walkthrough' in key.lower():
# for filename, description in value.items():
+ if self.git_provider.is_supported("gfm_markdown"):
+ pr_body += " files:
\n\n"
for file in value:
filename = file['filename'].replace("'", "`")
description = file['changes in file']
pr_body += f'`{filename}`: {description}\n'
+ if self.git_provider.is_supported("gfm_markdown"):
+ pr_body +=" \n"
else:
# if the value is a list, join its items by comma
if type(value) == list:
value = ', '.join(v for v in value)
pr_body += f"{value}\n"
- if idx < len(data) - 1:
+ if idx < len(self.data) - 1:
pr_body += "\n___\n"
- markdown_text = f"## Title\n\n{title}\n\n___\n{pr_body}"
- description = data['PR Description']
-
if get_settings().config.verbosity_level >= 2:
logging.info(f"title:\n{title}\n{pr_body}")
- return title, pr_body, pr_types, markdown_text, description
\ No newline at end of file
+ return title, pr_body
\ No newline at end of file
diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py
index 7f790d3b..b6bca536 100644
--- a/pr_agent/tools/pr_reviewer.py
+++ b/pr_agent/tools/pr_reviewer.py
@@ -59,6 +59,7 @@ class PRReviewer:
"require_tests": get_settings().pr_reviewer.require_tests_review,
"require_security": get_settings().pr_reviewer.require_security_review,
"require_focused": get_settings().pr_reviewer.require_focused_review,
+ "require_estimate_effort_to_review": get_settings().pr_reviewer.require_estimate_effort_to_review,
'num_code_suggestions': get_settings().pr_reviewer.num_code_suggestions,
'question_str': question_str,
'answer_str': answer_str,
diff --git a/pr_agent/tools/pr_similar_issue.py b/pr_agent/tools/pr_similar_issue.py
index 98d6a1f6..d7b6a799 100644
--- a/pr_agent/tools/pr_similar_issue.py
+++ b/pr_agent/tools/pr_similar_issue.py
@@ -98,11 +98,14 @@ class PRSimilarIssue:
logging.info('No new issues to update')
async def run(self):
+ logging.info('Getting issue...')
repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1])
issue_main = self.git_provider.repo_obj.get_issue(original_issue_number)
issue_str, comments, number = self._process_issue(issue_main)
openai.api_key = get_settings().openai.key
+ logging.info('Done')
+ logging.info('Querying...')
res = openai.Embedding.create(input=[issue_str], engine=MODEL)
embeds = [record['embedding'] for record in res['data']]
pinecone_index = pinecone.Index(index_name=self.index_name)
@@ -111,22 +114,34 @@ class PRSimilarIssue:
filter={"repo": self.repo_name_for_index},
include_metadata=True).to_dict()
relevant_issues_number_list = []
+ relevant_comment_number_list = []
+ score_list = []
for r in res['matches']:
issue_number = int(r["id"].split('.')[0].split('_')[-1])
if original_issue_number == issue_number:
continue
if issue_number not in relevant_issues_number_list:
relevant_issues_number_list.append(issue_number)
+ if 'comment' in r["id"]:
+ relevant_comment_number_list.append(int(r["id"].split('.')[1].split('_')[-1]))
+ else:
+ relevant_comment_number_list.append(-1)
+ score_list.append(str("{:.2f}".format(r['score'])))
+ logging.info('Done')
- similar_issues_str = "Similar Issues:\n\n"
+ logging.info('Publishing response...')
+ similar_issues_str = "### Similar Issues\n___\n\n"
for i, issue_number_similar in enumerate(relevant_issues_number_list):
issue = self.git_provider.repo_obj.get_issue(issue_number_similar)
title = issue.title
url = issue.html_url
- similar_issues_str += f"{i + 1}. [{title}]({url})\n\n"
+ if relevant_comment_number_list[i] != -1:
+ url = list(issue.get_comments())[relevant_comment_number_list[i]].html_url
+ similar_issues_str += f"{i + 1}. **[{title}]({url})** (score={score_list[i]})\n\n"
if get_settings().config.publish_output:
response = issue_main.create_comment(similar_issues_str)
logging.info(similar_issues_str)
+ logging.info('Done')
def _process_issue(self, issue):
header = issue.title
diff --git a/tests/unittest/test_language_handler.py b/tests/unittest/test_language_handler.py
index 875ec1a7..fdde7bb0 100644
--- a/tests/unittest/test_language_handler.py
+++ b/tests/unittest/test_language_handler.py
@@ -61,7 +61,7 @@ class TestSortFilesByMainLanguages:
type('', (object,), {'filename': 'file1.py'})(),
type('', (object,), {'filename': 'file2.java'})()
]
- expected_output = [{'language': 'Other', 'files': []}]
+ expected_output = [{'language': 'Other', 'files': files}]
assert sort_files_by_main_languages(languages, files) == expected_output
# Tests that function handles empty files list