diff --git a/.dockerignore b/.dockerignore
index 9c8ebe57..fb39a7e5 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,2 +1,3 @@
venv/
-pr_agent/settings/.secrets.toml
\ No newline at end of file
+pr_agent/settings/.secrets.toml
+pics/
\ No newline at end of file
diff --git a/PR_COMPRESSION.md b/PR_COMPRESSION.md
index 82fde0c4..4783b43b 100644
--- a/PR_COMPRESSION.md
+++ b/PR_COMPRESSION.md
@@ -39,4 +39,4 @@ We use [tiktoken](https://github.com/openai/tiktoken) to tokenize the patches af
4. If we haven't reached the max token length, add the `deleted files` to the prompt until the prompt reaches the max token length (hard stop), skip the rest of the patches.
### Example
-
\ No newline at end of file
+
\ No newline at end of file
diff --git a/README.md b/README.md
index 091830dd..5e56260a 100644
--- a/README.md
+++ b/README.md
@@ -27,25 +27,25 @@ CodiumAI `PR-Agent` is an open-source tool aiming to help developers review PRs
Describe:
-
+
Review:
-
+
Ask:
-
+
Improve:
-
+
@@ -64,7 +64,7 @@ CodiumAI `PR-Agent` is an open-source tool aiming to help developers review PRs
Experience GPT-4 powered PR review on your public GitHub repository with our hosted PR-Agent. To try it, just mention `@CodiumAI-Agent` and add the desired command in any PR comment! The agent will generate a response based on your command.
-
+
To set up your own PR-Agent, see the [Quickstart](#Quickstart) section
@@ -78,6 +78,7 @@ To set up your own PR-Agent, see the [Quickstart](#Quickstart) section
| | Ask | :white_check_mark: | :white_check_mark: | |
| | Auto-Description | :white_check_mark: | | |
| | Improve Code | :white_check_mark: | :white_check_mark: | |
+| | Reflect and Review | :white_check_mark: | | |
| | | | | |
| USAGE | CLI | :white_check_mark: | :white_check_mark: | :white_check_mark: |
| | Tagging bot | :white_check_mark: | :white_check_mark: | |
@@ -92,6 +93,7 @@ Examples for invoking the different tools via the [CLI](#quickstart):
- **Describe**: python cli.py --pr-url=
describe
- **Improve**: python cli.py --pr-url= improve
- **Ask**: python cli.py --pr-url= ask "Write me a poem about this PR"
+- **Reflect**: python cli.py --pr-url= reflect
"" is the url of the relevant PR (for example: https://github.com/Codium-ai/pr-agent/pull/50).
@@ -120,16 +122,17 @@ Here are several ways to install and run PR-Agent:
## Usage and Tools
-**PR-Agent** provides four types of interactions ("tools"): `"PR Reviewer"`, `"PR Q&A"`, `"PR Description"` and `"PR Code Sueggestions"`.
+**PR-Agent** provides five types of interactions ("tools"): `"PR Reviewer"`, `"PR Q&A"`, `"PR Description"`, `"PR Code Sueggestions"` and `"PR Reflect and Review"`.
- The "PR Reviewer" tool automatically analyzes PRs, and provides various types of feedback.
- The "PR Q&A" tool answers free-text questions about the PR.
- The "PR Description" tool automatically sets the PR Title and body.
- The "PR Code Suggestion" tool provide inline code suggestions for the PR that can be applied and committed.
+- The "PR Reflect and Review" tool first initiates a dialog with the user and asks them to reflect on the PR, and then provides a review.
## How it works
-
+
Check out the [PR Compression strategy](./PR_COMPRESSION.md) page for more details on how we convert a code diff to a manageable LLM prompt
@@ -138,11 +141,11 @@ Check out the [PR Compression strategy](./PR_COMPRESSION.md) page for more detai
- [ ] Support open-source models, as a replacement for openai models. (Note - a minimal requirement for each open-source model is to have 8k+ context, and good support for generating json as an output)
- [x] Support other Git providers, such as Gitlab and Bitbucket.
- [ ] Develop additional logics for handling large PRs, and compressing git patches
-- [ ] Dedicated tools and sub-tools for specific programming languages (Python, Javascript, Java, C++, etc)
- [ ] Add additional context to the prompt. For example, repo (or relevant files) summarization, with tools such a [ctags](https://github.com/universal-ctags/ctags)
- [ ] Adding more tools. Possible directions:
- [x] PR description
- [x] Inline code suggestions
+ - [x] Reflect and review
- [ ] Enforcing CONTRIBUTING.md guidelines
- [ ] Performance (are there any performance issues)
- [ ] Documentation (is the PR properly documented)
diff --git a/pics/.DS_Store b/pics/.DS_Store
deleted file mode 100644
index 5008ddfc..00000000
Binary files a/pics/.DS_Store and /dev/null differ
diff --git a/pics/ask.gif b/pics/ask.gif
deleted file mode 100644
index b4a3557b..00000000
Binary files a/pics/ask.gif and /dev/null differ
diff --git a/pics/demo.gif b/pics/demo.gif
deleted file mode 100644
index 4fe5f968..00000000
Binary files a/pics/demo.gif and /dev/null differ
diff --git a/pics/describe.gif b/pics/describe.gif
deleted file mode 100644
index a279db9f..00000000
Binary files a/pics/describe.gif and /dev/null differ
diff --git a/pics/git_patch_logic.png b/pics/git_patch_logic.png
deleted file mode 100644
index bc378db1..00000000
Binary files a/pics/git_patch_logic.png and /dev/null differ
diff --git a/pics/improve.gif b/pics/improve.gif
deleted file mode 100644
index f284af89..00000000
Binary files a/pics/improve.gif and /dev/null differ
diff --git a/pics/main_pic_4_tools.gif b/pics/main_pic_4_tools.gif
deleted file mode 100644
index 8cdbef05..00000000
Binary files a/pics/main_pic_4_tools.gif and /dev/null differ
diff --git a/pics/pr-agent-review-process1.gif b/pics/pr-agent-review-process1.gif
deleted file mode 100644
index dca5e631..00000000
Binary files a/pics/pr-agent-review-process1.gif and /dev/null differ
diff --git a/pics/pr_agent_overview.png b/pics/pr_agent_overview.png
deleted file mode 100644
index 2ee523bd..00000000
Binary files a/pics/pr_agent_overview.png and /dev/null differ
diff --git a/pics/pr_auto_description.png b/pics/pr_auto_description.png
deleted file mode 100644
index 1f3d1859..00000000
Binary files a/pics/pr_auto_description.png and /dev/null differ
diff --git a/pics/pr_code_suggestions.png b/pics/pr_code_suggestions.png
deleted file mode 100644
index b7c86e2e..00000000
Binary files a/pics/pr_code_suggestions.png and /dev/null differ
diff --git a/pics/pr_questions.png b/pics/pr_questions.png
deleted file mode 100644
index 7ba11491..00000000
Binary files a/pics/pr_questions.png and /dev/null differ
diff --git a/pics/pr_reviewer_1.png b/pics/pr_reviewer_1.png
deleted file mode 100644
index 1ae2bad6..00000000
Binary files a/pics/pr_reviewer_1.png and /dev/null differ
diff --git a/pics/pr_reviewer_2.png b/pics/pr_reviewer_2.png
deleted file mode 100644
index 6e2fe1c3..00000000
Binary files a/pics/pr_reviewer_2.png and /dev/null differ
diff --git a/pics/review.gif b/pics/review.gif
deleted file mode 100644
index d88b51aa..00000000
Binary files a/pics/review.gif and /dev/null differ
diff --git a/pr_agent/agent/pr_agent.py b/pr_agent/agent/pr_agent.py
index 9a5ccf36..672a7a94 100644
--- a/pr_agent/agent/pr_agent.py
+++ b/pr_agent/agent/pr_agent.py
@@ -2,8 +2,10 @@ import re
from pr_agent.tools.pr_code_suggestions import PRCodeSuggestions
from pr_agent.tools.pr_description import PRDescription
+from pr_agent.tools.pr_information_from_user import PRInformationFromUser
from pr_agent.tools.pr_questions import PRQuestions
from pr_agent.tools.pr_reviewer import PRReviewer
+from pr_agent.config_loader import settings
class PRAgent:
@@ -11,8 +13,13 @@ class PRAgent:
pass
async def handle_request(self, pr_url, request) -> bool:
- if any(cmd in request for cmd in ["/review", "/review_pr"]):
- await PRReviewer(pr_url).review()
+ if any(cmd in request for cmd in ["/answer"]):
+ await PRReviewer(pr_url, is_answer=True).review()
+ elif any(cmd in request for cmd in ["/review", "/review_pr", "/reflect_and_review"]):
+ if settings.pr_reviewer.ask_and_reflect or "/reflect_and_review" in request:
+ await PRInformationFromUser(pr_url).generate_questions()
+ else:
+ await PRReviewer(pr_url).review()
elif any(cmd in request for cmd in ["/describe", "/describe_pr"]):
await PRDescription(pr_url).describe()
elif any(cmd in request for cmd in ["/improve", "/improve_code"]):
diff --git a/pr_agent/cli.py b/pr_agent/cli.py
index 3eb6bc76..ca9d5db0 100644
--- a/pr_agent/cli.py
+++ b/pr_agent/cli.py
@@ -18,19 +18,22 @@ For example:
- cli.py --pr-url=... describe
- cli.py --pr-url=... improve
- cli.py --pr-url=... ask "write me a poem about this PR"
+- cli.py --pr-url=... reflect
Supported commands:
review / review_pr - Add a review that includes a summary of the PR and specific suggestions for improvement.
ask / ask_question [question] - Ask a question about the PR.
describe / describe_pr - Modify the PR title and description based on the PR's contents.
improve / improve_code - Suggest improvements to the code in the PR as pull request comments ready to commit.
+reflect - Ask the PR author questions about the PR.
""")
parser.add_argument('--pr_url', type=str, help='The URL of the PR to review', required=True)
parser.add_argument('command', type=str, help='The', choices=['review', 'review_pr',
'ask', 'ask_question',
'describe', 'describe_pr',
'improve', 'improve_code',
- 'user_questions'], default='review')
+ 'reflect', 'review_after_reflect'],
+ default='review')
parser.add_argument('rest', nargs=argparse.REMAINDER, default=[])
args = parser.parse_args()
logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"))
@@ -56,10 +59,14 @@ improve / improve_code - Suggest improvements to the code in the PR as pull requ
print(f"Reviewing PR: {args.pr_url}")
reviewer = PRReviewer(args.pr_url, cli_mode=True)
asyncio.run(reviewer.review())
- elif command in ['user_questions']:
+ elif command in ['reflect']:
print(f"Asking the PR author questions: {args.pr_url}")
reviewer = PRInformationFromUser(args.pr_url)
asyncio.run(reviewer.generate_questions())
+ elif command in ['review_after_reflect']:
+ print(f"Processing author's answers and sending review: {args.pr_url}")
+ reviewer = PRReviewer(args.pr_url, cli_mode=True, is_answer=True)
+ asyncio.run(reviewer.review())
else:
print(f"Unknown command: {command}")
parser.print_help()
diff --git a/pr_agent/git_providers/bitbucket_provider.py b/pr_agent/git_providers/bitbucket_provider.py
index e9946aa9..86e445ac 100644
--- a/pr_agent/git_providers/bitbucket_provider.py
+++ b/pr_agent/git_providers/bitbucket_provider.py
@@ -25,6 +25,11 @@ class BitbucketProvider:
if pr_url:
self.set_pr(pr_url)
+ def is_supported(self, capability: str) -> bool:
+ if capability == 'get_issue_comments':
+ return False
+ return True
+
def set_pr(self, pr_url: str):
self.workspace_slug, self.repo_slug, self.pr_num = self._parse_pr_url(pr_url)
self.pr = self._get_pr()
@@ -74,6 +79,9 @@ class BitbucketProvider:
def get_user_id(self):
return 0
+ def get_issue_comments(self):
+ raise NotImplementedError("Bitbucket provider does not support issue comments yet")
+
@staticmethod
def _parse_pr_url(pr_url: str) -> Tuple[str, int]:
parsed_url = urlparse(pr_url)
diff --git a/pr_agent/git_providers/git_provider.py b/pr_agent/git_providers/git_provider.py
index ae39cc74..4beba204 100644
--- a/pr_agent/git_providers/git_provider.py
+++ b/pr_agent/git_providers/git_provider.py
@@ -21,6 +21,10 @@ class FilePatchInfo:
class GitProvider(ABC):
+ @abstractmethod
+ def is_supported(self, capability: str) -> bool:
+ pass
+
@abstractmethod
def get_diff_files(self) -> list[FilePatchInfo]:
pass
@@ -62,6 +66,10 @@ class GitProvider(ABC):
def get_pr_description(self):
pass
+ @abstractmethod
+ def get_issue_comments(self):
+ pass
+
def get_main_pr_language(languages, files) -> str:
"""
diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py
index 6d6d1c13..5d11c586 100644
--- a/pr_agent/git_providers/github_provider.py
+++ b/pr_agent/git_providers/github_provider.py
@@ -23,6 +23,9 @@ class GithubProvider(GitProvider):
self.set_pr(pr_url)
self.last_commit_id = list(self.pr.get_commits())[-1]
+ def is_supported(self, capability: str) -> bool:
+ return True
+
def set_pr(self, pr_url: str):
self.repo, self.pr_num = self._parse_pr_url(pr_url)
self.pr = self._get_pr()
@@ -172,6 +175,9 @@ class GithubProvider(GitProvider):
notifications = self.github_client.get_user().get_notifications(since=since)
return notifications
+ def get_issue_comments(self):
+ return self.pr.get_issue_comments()
+
@staticmethod
def _parse_pr_url(pr_url: str) -> Tuple[str, int]:
parsed_url = urlparse(pr_url)
diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py
index 07a25e2f..0149662b 100644
--- a/pr_agent/git_providers/gitlab_provider.py
+++ b/pr_agent/git_providers/gitlab_provider.py
@@ -4,6 +4,7 @@ from typing import Optional, Tuple
from urllib.parse import urlparse
import gitlab
+from gitlab import GitlabGetError
from pr_agent.config_loader import settings
@@ -31,6 +32,11 @@ class GitLabProvider(GitProvider):
self.RE_HUNK_HEADER = re.compile(
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
+ def is_supported(self, capability: str) -> bool:
+ if capability == 'get_issue_comments':
+ return False
+ return True
+
@property
def pr(self):
'''The GitLab terminology is merge request (MR) instead of pull request (PR)'''
@@ -42,7 +48,11 @@ class GitLabProvider(GitProvider):
self.last_diff = self.mr.diffs.list()[-1]
def _get_pr_file_content(self, file_path: str, branch: str) -> str:
- return self.gl.projects.get(self.id_project).files.get(file_path, branch).decode()
+ try:
+ return self.gl.projects.get(self.id_project).files.get(file_path, branch).decode()
+ except GitlabGetError:
+ # In case of file creation the method returns GitlabGetError (404 file not found). In this case we return an empty string for the diff.
+ return ''
def get_diff_files(self) -> list[FilePatchInfo]:
diffs = self.mr.changes()['changes']
@@ -58,8 +68,10 @@ class GitLabProvider(GitProvider):
elif diff['renamed_file']:
edit_type = EDIT_TYPE.RENAMED
try:
- original_file_content_str = bytes.decode(original_file_content_str, 'utf-8')
- new_file_content_str = bytes.decode(new_file_content_str, 'utf-8')
+ if isinstance(original_file_content_str, bytes):
+ original_file_content_str = bytes.decode(original_file_content_str, 'utf-8')
+ if isinstance(new_file_content_str, bytes):
+ new_file_content_str = bytes.decode(new_file_content_str, 'utf-8')
except UnicodeDecodeError:
logging.warning(
f"Cannot decode file {diff['old_path']} or {diff['new_path']} in merge request {self.id_mr}")
@@ -203,6 +215,9 @@ class GitLabProvider(GitProvider):
def get_pr_description(self):
return self.mr.description
+ def get_issue_comments(self):
+ raise NotImplementedError("GitLab provider does not support issue comments yet")
+
def _parse_merge_request_url(self, merge_request_url: str) -> Tuple[int, int]:
parsed_url = urlparse(merge_request_url)
diff --git a/pr_agent/servers/github_action_runner.py b/pr_agent/servers/github_action_runner.py
index ba6ffe9c..31a4800d 100644
--- a/pr_agent/servers/github_action_runner.py
+++ b/pr_agent/servers/github_action_runner.py
@@ -3,9 +3,11 @@ import json
import os
import re
+from pr_agent.agent.pr_agent import PRAgent
from pr_agent.config_loader import settings
from pr_agent.tools.pr_code_suggestions import PRCodeSuggestions
from pr_agent.tools.pr_description import PRDescription
+from pr_agent.tools.pr_information_from_user import PRInformationFromUser
from pr_agent.tools.pr_questions import PRQuestions
from pr_agent.tools.pr_reviewer import PRReviewer
@@ -53,20 +55,7 @@ async def run_action():
pr_url = event_payload.get("issue", {}).get("pull_request", {}).get("url", None)
if pr_url:
body = comment_body.strip().lower()
- if any(cmd in body for cmd in ["/review", "/review_pr"]):
- await PRReviewer(pr_url).review()
- elif any(cmd in body for cmd in ["/describe", "/describe_pr"]):
- await PRDescription(pr_url).describe()
- elif any(cmd in body for cmd in ["/improve", "/improve_code"]):
- await PRCodeSuggestions(pr_url).suggest()
- elif any(cmd in body for cmd in ["/ask", "/ask_question"]):
- pattern = r'(/ask|/ask_question)\s*(.*)'
- matches = re.findall(pattern, comment_body, re.IGNORECASE)
- if matches:
- question = matches[0][1]
- await PRQuestions(pr_url, question).answer()
- else:
- print(f"Unknown command: {body}")
+ await PRAgent().handle_request(pr_url, body)
if __name__ == '__main__':
diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml
index cc2ffa77..fc2fa2b8 100644
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@@ -10,6 +10,7 @@ require_tests_review=true
require_security_review=true
num_code_suggestions=3
inline_code_comments = true
+ask_and_reflect=false
[pr_description]
publish_description_as_comment=false
diff --git a/pr_agent/settings/pr_information_from_user_prompts.toml b/pr_agent/settings/pr_information_from_user_prompts.toml
index c4ba1d52..f32ec715 100644
--- a/pr_agent/settings/pr_information_from_user_prompts.toml
+++ b/pr_agent/settings/pr_information_from_user_prompts.toml
@@ -1,16 +1,17 @@
[pr_information_from_user_prompt]
system="""You are CodiumAI-PR-Reviewer, a language model designed to review git pull requests.
-Given the PR Info and the PR Git Diff, generate 4 questions about the PR for the PR author.
+Given the PR Info and the PR Git Diff, generate 3 short questions about the PR code for the PR author.
The goal of the questions is to help the language model understand the PR better, so the questions should be insightful, informative, non-trivial, and relevant to the PR.
-Prefer yes\\no or multiple choice questions. If you have to ask open-ended questions, make sure they are not too difficult, and can be answered in a sentence or two.
+You should prefer asking yes\\no questions, or multiple choice questions. Also add at least one open-ended question, but make sure they are not too difficult, and can be answered in a sentence or two.
Example output:
'
Questions to better understand the PR:
-1. ...
-2. ...
+1) ...
+2) ...
...
+'
"""
user="""PR Info:
diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml
index 022490b5..51a873c4 100644
--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@@ -26,6 +26,12 @@ You must use the following JSON schema to format your answer:
"description": "yes\\no question: does this PR have relevant tests ?"
},
{%- endif %}
+{%- if question_str %}
+ "Insights from user's answer": {
+ "type": "string",
+ "description": "shortly summarize the insights you gained from the user's answers to the questions"
+ },
+{%- endif %}
{%- if require_focused %}
"Focused PR": {
"type": "string",
@@ -115,6 +121,16 @@ Description: '{{description}}'
Main language: {{language}}
{%- endif %}
+{%- if question_str %}
+######
+Here are questions to better understand the PR. Use the answers to provide better feedback.
+
+{{question_str|trim}}
+
+User answers:
+{{answer_str|trim}}
+######
+{%- endif %}
The PR Git Diff:
```
diff --git a/pr_agent/tools/pr_information_from_user.py b/pr_agent/tools/pr_information_from_user.py
index d4412dcf..ff78858f 100644
--- a/pr_agent/tools/pr_information_from_user.py
+++ b/pr_agent/tools/pr_information_from_user.py
@@ -21,7 +21,7 @@ class PRInformationFromUser:
self.vars = {
"title": self.git_provider.pr.title,
"branch": self.git_provider.get_pr_branch(),
- "description": self.git_provider.get_description(),
+ "description": self.git_provider.get_pr_description(),
"language": self.main_pr_language,
"diff": "", # empty diff for initial calculation
}
@@ -35,7 +35,7 @@ class PRInformationFromUser:
async def generate_questions(self):
logging.info('Generating question to the user...')
if settings.config.publish_output:
- self.git_provider.publish_comment("Preparing answer...", is_temporary=True)
+ self.git_provider.publish_comment("Preparing questions...", is_temporary=True)
logging.info('Getting PR diff...')
self.patches_diff = get_pr_diff(self.git_provider, self.token_handler)
logging.info('Getting AI prediction...')
@@ -66,6 +66,6 @@ class PRInformationFromUser:
model_output = self.prediction.strip()
if settings.config.verbosity_level >= 2:
logging.info(f"answer_str:\n{model_output}")
- answer_str = f"{model_output}\n\n Please respond to the question above in the following format:\n\n" + \
- f"/answer \n\n" + f"Example:\n'\n/answer\n1. Yes, because ...\n2. No, because ...\n'"
+ answer_str = f"{model_output}\n\n Please respond to the questions above in the following format:\n\n" +\
+ f"\n>/answer\n>1) ...\n>2) ...\n>...\n"
return answer_str
diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py
index fe4a6986..0e96fea3 100644
--- a/pr_agent/tools/pr_reviewer.py
+++ b/pr_agent/tools/pr_reviewer.py
@@ -15,12 +15,16 @@ from pr_agent.servers.help import bot_help_text, actions_help_text
class PRReviewer:
- def __init__(self, pr_url: str, cli_mode=False):
+ def __init__(self, pr_url: str, cli_mode=False, is_answer: bool = False):
self.git_provider = get_git_provider()(pr_url)
self.main_language = get_main_pr_language(
self.git_provider.get_languages(), self.git_provider.get_files()
)
+ self.is_answer = is_answer
+ if self.is_answer and not self.git_provider.is_supported("get_issue_comments"):
+ raise Exception(f"Answer mode is not supported for {settings.config.git_provider} for now")
+ answer_str = question_str = self._get_user_answers()
self.ai_handler = AiHandler()
self.patches_diff = None
self.prediction = None
@@ -35,6 +39,9 @@ class PRReviewer:
"require_security": settings.pr_reviewer.require_security_review,
"require_focused": settings.pr_reviewer.require_focused_review,
'num_code_suggestions': settings.pr_reviewer.num_code_suggestions,
+ #
+ 'question_str': question_str,
+ 'answer_str': answer_str,
}
self.token_handler = TokenHandler(self.git_provider.pr,
self.vars,
@@ -142,3 +149,16 @@ class PRReviewer:
if comments:
self.git_provider.publish_inline_comments(comments)
+
+ def _get_user_answers(self):
+ answer_str = question_str = ""
+ if self.is_answer:
+ discussion_messages = self.git_provider.get_issue_comments()
+ for message in discussion_messages.reversed:
+ if "Questions to better understand the PR:" in message.body:
+ question_str = message.body
+ elif '/answer' in message.body:
+ answer_str = message.body
+ if answer_str and question_str:
+ break
+ return question_str, answer_str