Compare commits

..

44 Commits

Author SHA1 Message Date
5fdc9223e9 Separate output token threshold to soft and hard instead of implicit hard = soft/2 2023-07-11 14:11:46 +03:00
c8d369ee61 Protect against no notifications received 2023-07-06 20:04:32 +03:00
ffa4ce3f1e Protect against no notifications received 2023-07-06 19:22:55 +03:00
2de83827b6 Add exception protection for unexpected conditions during request handling 2023-07-06 19:08:47 +03:00
9e96fbab1f Don't add "How to use" when running from the command line - a small correction #2 2023-07-06 18:33:03 +03:00
f1ab6ec88f Merge pull request #11 from Codium-ai/bugfix/double_notifications
Protect from notifications that may be handled twice
2023-07-06 18:17:13 +03:00
f293717827 Merge pull request #12 from Codium-ai/readme-content-fixes
fix the configuration order in the outline, section break fixes, text…
2023-07-06 18:15:48 +03:00
270912d41e fix the configuration order in the outline, section break fixes, text adjustments 2023-07-06 18:11:01 +03:00
933f2ca093 Merge pull request #10 from Codium-ai/readme-updates
add giff, icon and demo section
2023-07-06 17:55:48 +03:00
4331610e01 Don't add "How to use" when running from the command line - a small correction 2023-07-06 17:53:52 +03:00
d04c0f490c Don't add "How to use" when running from the command line 2023-07-06 17:52:12 +03:00
f7c703751f add ai maintainer to the list of links 2023-07-06 17:51:01 +03:00
1eab6a8479 adjust the header paraghraph 2023-07-06 17:47:21 +03:00
6648c04799 Protect from notifications that may be handled twice by keeping a set of handled notification IDs 2023-07-06 17:46:43 +03:00
24697d613b resolve conflicts after merging main 2023-07-06 17:46:19 +03:00
938a8a7c7d add giff, icon and demo section 2023-07-06 17:41:19 +03:00
deda4baa87 Merge pull request #9 from Codium-ai/feature/minor_fixes
minor fixes
2023-07-06 17:35:04 +03:00
30248c2a7b readme update 2023-07-06 17:34:40 +03:00
c2e3bf7b70 newline 2023-07-06 16:39:56 +03:00
e5e90e35e5 minor fixes 2023-07-06 16:27:39 +03:00
53e7ff62bf Merge pull request #3 from Codium-ai/algo/combine_modified_files_one_list
Combine all modified and deleted files that been compressed to the prompt
2023-07-06 14:59:13 +03:00
1eea60c6a5 Merge pull request #7 from Codium-ai/algo/fix_speacial_tokens
Fix encoding error on special_tokens
2023-07-06 14:14:52 +03:00
d0c544e650 Merge pull request #8 from Codium-ai/tombrewsviews-patch-1
Update README.md
2023-07-06 14:01:07 +03:00
28249924fd Update README.md
name change
2023-07-06 13:57:23 +03:00
a2d8695ca4 Merge pull request #6 from Codium-ai/feature/github_tag_improve
Improve handling of user interaction on the Github App and the polling bot
2023-07-06 13:24:47 +03:00
259fa84eeb disabling encoding error on special_tokens 2023-07-06 13:22:12 +03:00
ff720d32fe pylance 2023-07-06 13:20:08 +03:00
399d7b7990 Improve handling of tagging and Github app user interaction - a small correction 2023-07-06 13:09:51 +03:00
74dfae8dbe Merge pull request #5 from Codium-ai/enhancment/markdown
formatting
2023-07-06 13:00:37 +03:00
71b077faf8 Merge remote-tracking branch 'origin/enhancment/markdown' into feature/github_tag_improve 2023-07-06 12:59:25 +03:00
b6333e7f20 Improve handling of tagging and Github app user interaction 2023-07-06 12:58:05 +03:00
e53ae712f9 formatting 2023-07-06 12:49:10 +03:00
542c4599ba fix tests 2023-07-06 12:36:25 +03:00
795f6ab8d5 Add deleted files section and count their tokens 2023-07-06 12:21:27 +03:00
e3b2469e0f Merge commit '0ebd29d39891fba68a64e476cd52b16428c3132b' into algo/combine_modified_files_one_list 2023-07-06 12:01:51 +03:00
0ebd29d398 Merge pull request #4 from Codium-ai/feature/merge_cli
Merge CLI scripts
2023-07-06 11:52:06 +03:00
987befe457 Merge CLI scripts to cli.py, update Dockerfile and README.md 2023-07-06 11:37:44 +03:00
1a626fb1f3 change "modified files" to "more modified files" 2023-07-06 11:23:38 +03:00
0ce42e786e Combine all modified file that been compressed into one list at the end of the PR 2023-07-06 11:12:41 +03:00
84231f99dc Merge pull request #2 from Codium-ai/feature/support_openai_org
Add support for OpenAI organization in the secrets file
2023-07-06 10:06:16 +03:00
70b7acee15 Merge pull request #1 from Codium-ai/feature/delete_initial_comment
delete "Preparing review..." comment
2023-07-06 10:03:50 +03:00
aa1c32c714 Merge remote-tracking branch 'origin/feature/delete_initial_comment' into feature/improve_instructions 2023-07-06 10:00:31 +03:00
f1004273ec add try-except 2023-07-06 08:57:55 +03:00
33f859b073 delete "Preparing review..." comment 2023-07-06 08:44:08 +03:00
24 changed files with 307 additions and 173 deletions

View File

@ -1 +1,2 @@
venv/
venv/
pr_agent/settings/.secrets.toml

163
README.md
View File

@ -1,57 +1,59 @@
<div align="center">
# 🛡️ CodiumAI PR-Agent
<img src="./pics/Icon-7.png" alt="pr-agent_icon" width="100"/>
# pr-agent
[![GitHub license](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://github.com/Codium-ai/pr-agent/blob/main/LICENSE)
[![Discord](https://badgen.net/badge/icon/discord?icon=discord&label&color=purple)](https://discord.com/channels/1057273017547378788/1126104260430528613)
CodiumAI `PR-Agent` is an open-source tool that helps developers review PRs faster and more efficiently.
It automatically analyzes the PR, and provides feedback and suggestions, and can answer questions.
It is powered by GPT-4, and is based on the [CodiumAI](https://github.com/Codium-ai/) platform.
CodiumAI `pr-agent` is an open-source tool is powered by GPT-4 aming to help developers review PRs faster and more efficiently. It automatically analyzes the PR, and provides feedback and suggestions, and can answer questions.
</div>
TBD: Add screenshot of the PR Reviewer (could be gif)
- [Quickstart](#Quickstart)
- [Usage and Tools](#usage-and-tools)
- [Configuration](#Configuration)
- [Roadmap](#roadmap)
- [Similar projects](#similar-projects)
## Live demo
* [Quickstart](#Quickstart)
* [Configuration](#Configuration)
* [Usage and Tools](#usage-and-tools)
* [Roadmap](#roadmap)
* [Similar projects](#similar-projects)
* Additional files:
* CONTRIBUTION.md
* LICENSE
*
Experience GPT-4 powered PR review on your public Github repository with our hosted pr-agent. To try it, mention @CodiumAI-Agent in a PR comment! The agent will generate the review in response ([see details in the Usage section](#usage-and-tools)).
![Review generation process](./pics/pr-agent-review-process1.gif)
---
## Quickstart
To get started with PR-Agent quickly, you first need to acquire two tokens:
To get started with pr-agent quickly, you first need to acquire two tokens:
1. An OpenAI key from [here](https://platform.openai.com/), with access to GPT-4.
2. A GitHub personal access token (classic) with the repo scope.
There are several ways to use PR-Agent. Let's start with the simplest one:
There are several ways to use pr-agent. Let's start with the simplest one:
---
### Method 1: Use Docker image (no installation required)
#### Method 1: Use Docker image (no installation required)
To request a review for a PR, or ask a question about a PR, you can run the appropriate
Python scripts from the scripts folder. Here's how:
To request a review for a PR, or ask a question about a PR, you can run directly from the Docker image. Here's how:
1. To request a review for a PR, run the following command:
```
docker run --rm -it -e OPENAI.KEY=<your key> -e GITHUB.USER_TOKEN=<your token> codiumai/pr-agent \
python pr_agent/scripts/review_pr_from_url.py --pr_url <pr url>
```
---
```
docker run --rm -it -e OPENAI.KEY=<your key> -e GITHUB.USER_TOKEN=<your token> codiumai/pr-agent --pr_url <pr url>
```
2. To ask a question about a PR, run the following command:
```
docker run --rm -it -e OPENAI.KEY -e GITHUB.USER_TOKEN codiumai/pr-agent \
python pr_agent/scripts/answer_pr_questions_from_url.py --pr_url <pr url> --question "<your question>"
docker run --rm -it -e OPENAI.KEY=<your key> -e GITHUB.USER_TOKEN=<your token> codiumai/pr-agent --pr_url <pr url> --question "<your question>"
```
Possible questions you can ask include:
- What is the main theme of this PR?
- Is the PR ready for merge?
- What are the main changes in this PR?
@ -60,45 +62,51 @@ Possible questions you can ask include:
---
### Method 2: Run from source
#### Method 2: Run from source
1. Clone this repository:
```
git clone https://github.com/Codium-ai/pr-agent.git
```
2. Install the requirements in your favorite virtual environment:
```
pip install -r requirements.txt
```
3. Copy the secrets template file and fill in your OpenAI key and your GitHub user token:
```
cp pr_agent/settings/.secrets_template.toml pr_agent/settings/.secrets
# Edit .secrets file
```
4. Run the appropriate Python scripts from the scripts folder:
```
python pr_agent/scripts/review_pr_from_url.py --pr_url <pr url>
python pr_agent/scripts/answer_pr_questions_from_url.py --pr_url <pr url> --question "<your question>"
python pr_agent/cli.py --pr_url <pr url>
python pr_agent/cli.py --pr_url <pr url> --question "<your question>"
```
---
### Method 3: Method 3: Run as a polling server; request reviews by tagging your Github user on a PR
#### Method 3: Method 3: Run as a polling server; request reviews by tagging your Github user on a PR
Follow steps 1-3 of method 2.
Run the following command to start the server:
```
python pr_agent/servers/github_polling.py
```
---
### Method 4: Run as a Github App, allowing you to automate the review process on your private or public repositories.
#### Method 4: Run as a Github App, allowing you to automate the review process on your private or public repositories.
1. Create a GitHub App from the [Github Developer Portal](https://docs.github.com/en/developers/apps/creating-a-github-app).
- Set the following permissions:
- Pull requests: Read & write
- Issue comment: Read & write
@ -108,15 +116,18 @@ python pr_agent/servers/github_polling.py
- Pull request
2. Generate a random secret for your app, and save it for later. For example, you can use:
```
WEBHOOK_SECRET=$(python -c "import secrets; print(secrets.token_hex(10))")
```
3. Acquire the following pieces of information from your app's settings page:
- App private key (click "Generate a private key", and save the file)
- App ID
4. Clone this repository:
```
git clone https://github.com/Codium-ai/pr-agent.git
```
@ -127,21 +138,24 @@ git clone https://github.com/Codium-ai/pr-agent.git
- Copy your app's private key to the private_key field.
- Copy your app's ID to the app_id field.
- Copy your app's webhook secret to the webhook_secret field.
```
cp pr_agent/settings/.secrets_template.toml pr_agent/settings/.secrets
# Edit .secrets file
```
6. Build a Docker image for the app and optionally push it to a Docker repository. We'll use Dockerhub as an example:
```
docker build . -t codiumai/pr-agent:github_app --target github_app -f docker/Dockerfile
docker push codiumai/pr-agent:github_app # Push to your Docker repository
```
7. Host the app using a server, serverless function, or container environment. Alternatively, for development and
debugging, you may use tools like smee.io to forward webhooks to your local machine.
7. Host the app using a server, serverless function, or container environment. Alternatively, for development and
debugging, you may use tools like smee.io to forward webhooks to your local machine.
8. Go back to your app's settings, set the following:
- Webhook URL: The URL of your app's server, or the URL of the smee.io channel.
- Webhook secret: The secret you generated earlier.
@ -150,11 +164,14 @@ docker push codiumai/pr-agent:github_app # Push to your Docker repository
---
## Usage and Tools
CodiumAI PR-Agent provides two types of interactions ("tools"): `"PR Reviewer"` and `"PR Q&A"`.
CodiumAI pr-agent provides two types of interactions ("tools"): `"PR Reviewer"` and `"PR Q&A"`.
- The "PR Reviewer" tool automatically analyzes PRs, and provides different types of feedbacks.
- The "PR Q&A" tool answers free-text questions about the PR.
### PR Reviewer
Here is a quick overview of the different sub-tools of PR Reviewer:
- PR Analysis
@ -163,14 +180,15 @@ Here is a quick overview of the different sub-tools of PR Reviewer:
- PR type classification
- Is the PR covered by relevant tests
- Is the PR minimal and focused
- Are there security concerns
- PR Feedback
- General PR suggestions
- Code suggestions
- Security concerns
This is how a typical output of the PR Reviewer looks like:
---
#### PR Analysis
- 🎯 **Main theme:** Adding language extension handler and token handler
@ -178,32 +196,32 @@ This is how a typical output of the PR Reviewer looks like:
- 📌 **Type of PR:** Enhancement
- 🧪 **Relevant tests added:** No
-**Minimal and focused:** Yes, the PR is focused on adding two new handlers for language extension and token counting.
- 🔒 **Security concerns:** No, the PR does not introduce possible security concerns or issues.
#### PR Feedback
- 💡 **General PR suggestions:** The PR is generally well-structured and the code is clean. However, it would be beneficial to add some tests to ensure the new handlers work as expected. Also, consider adding docstrings to the new functions and classes to improve code readability and maintainability.
- 🤖 **Code suggestions:**
- **suggestion 1:**
- **relevant file:** pr_agent/algo/language_handler.py
- **suggestion content:** Consider using a set instead of a list for 'bad_extensions' as checking membership in a set is faster than in a list. [medium]
- **suggestion 2:**
**suggestion content:** Consider using a set instead of a list for 'bad_extensions' as checking membership in a set is faster than in a list. [medium]
- **relevant file:** pr_agent/algo/language_handler.py
- **suggestion content:** In the 'filter_bad_extensions' function, you are splitting the filename on '.' and taking the last element to get the extension. This might not work as expected if the filename contains multiple '.' characters. Consider using 'os.path.splitext' to get the file extension more reliably. [important]
- 🔒 **Security concerns:** No, the PR does not introduce possible security concerns or issues.
**suggestion content:** In the 'filter_bad_extensions' function, you are splitting the filename on '.' and taking the last element to get the extension. This might not work as expected if the filename contains multiple '.' characters. Consider using 'os.path.splitext' to get the file extension more reliably. [important]
---
### PR Q&A
This tool answers free-text questions about the PR. This is how a typical output of the PR Q&A looks like:
---
**Question**: summarize for me the PR in 4 bullet points
**Answer**:
**Answer**:
- The PR introduces a new feature to sort files by their main languages. It uses a mapping of programming languages to their file extensions to achieve this.
- It also introduces a filter to exclude files with certain extensions, deemed as 'bad extensions', from the sorting process.
- The PR modifies the `get_pr_diff` function in `pr_processing.py` to use the new sorting function. It also refactors the code to move the PR pruning logic into a separate function.
@ -212,57 +230,60 @@ This tool answers free-text questions about the PR. This is how a typical output
---
## Configuration
The different tools and sub-tools used by CodiumAI PR-Agent are easily configurable via the configuration file: `/settings/configuration.toml`.
The different tools and sub-tools used by CodiumAI pr-agent are easily configurable via the configuration file: `/settings/configuration.toml`.
#### Enabling/disabling sub-tools:
You can enable/disable the different PR Reviewer sub-sections with the following flags:
You can enable/disable the different PR Reviewer sub-sections with the following flags:
```
require_minimal_and_focused_review=true
require_tests_review=true
require_security_review=true
```
#### Code Suggestions configuration:
There are also configuration options to control different aspects of the `code suggestions` feature.
The number of suggestions provided can be controlled by adjusting the following parameter:
```
num_code_suggestions=4
```
You can also enable more verbose and informative mode of code suggestions:
```
extended_code_suggestions=false
```
```
This is a comparison of the regular and extended code suggestions modes:
---
Example for regular suggestion:
- **suggestion 1:**
- **relevant file:** sql.py
- **suggestion content:** Remove hardcoded sensitive information like username and password. Use environment variables or a secure method to store these values. [important]
---
- **relevant file:** sql.py
- **suggestion content:** Remove hardcoded sensitive information like username and password. Use environment variables or a secure method to store these values. [important]
Example for extended suggestion:
- **relevant file:** sql.py
- **suggestion content:** Remove hardcoded sensitive information (username and password) [important]
- **why:** Hardcoding sensitive information is a security risk. It's better to use environment variables or a secure way to store these values.
- **code example:**
- **before code:**
```
user = "root",
password = "Mysql@123",
```
- **after code:**
```
user = os.getenv('DB_USER'),
password = os.getenv('DB_PASSWORD'),
```
- **suggestion 1:**
- **relevant file:** sql.py
- **suggestion content:** Remove hardcoded sensitive information (username and password) [important]
- **why:** Hardcoding sensitive information is a security risk. It's better to use environment variables or a secure way to store these values.
- **code example:**
- **before code:**
```
user = "root",
password = "Mysql@123",
```
- **after code:**
```
user = os.getenv('DB_USER'),
password = os.getenv('DB_PASSWORD'),
```
---
## Roadmap
- [ ] Support open-source models, as a replacement for openai models. Note that a minimal requirement for each open-source model is to have 8k+ context, and good support for generating json as an output
- [ ] Support other Git providers, such as Gitlab and Bitbucket.
- [ ] Develop additional logics for handling large PRs, and compressing git patches
@ -277,7 +298,9 @@ Example for extended suggestion:
- [ ] ...
## Similar Projects
- [CodiumAI - Meaningful tests for busy devs](https://github.com/Codium-ai/codiumai-vscode-release)
- [Aider - GPT powered coding in your terminal](https://github.com/paul-gauthier/aider)
- [GPT-Engineer](https://github.com/AntonOsika/gpt-engineer)
- [CodeReview BOT](https://github.com/anc95/ChatGPT-CodeReview)
- [AI-Maintainer](https://github.com/merwanehamadi/AI-Maintainer)

View File

@ -7,14 +7,14 @@ ENV PYTHONPATH=/app
ADD pr_agent pr_agent
FROM base as github_app
CMD ["python", "servers/github_app.py"]
CMD ["python", "pr_agent/servers/github_app.py"]
FROM base as github_polling
CMD ["python", "servers/github_polling.py"]
CMD ["python", "pr_agent/servers/github_polling.py"]
FROM base as test
ADD requirements-dev.txt .
RUN pip install -r requirements-dev.txt && rm requirements-dev.txt
FROM base as cli
CMD ["bash"]
ENTRYPOINT ["python", "pr_agent/cli.py"]

BIN
pics/.DS_Store vendored Normal file

Binary file not shown.

BIN
pics/Icon-7.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 100 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 MiB

View File

@ -10,11 +10,16 @@ class PRAgent:
self.installation_id = installation_id
async def handle_request(self, pr_url, request):
if 'please review' in request.lower():
if 'please review' in request.lower() or 'review' == request.lower().strip() or len(request) == 0:
reviewer = PRReviewer(pr_url, self.installation_id)
await reviewer.review()
elif 'please answer' in request.lower():
question = re.split(r'(?i)please answer', request)[1].strip()
else:
if "please answer" in request.lower():
question = re.split(r'(?i)please answer', request)[1].strip()
elif request.lower().strip().startswith("answer"):
question = re.split(r'(?i)answer', request)[1].strip()
else:
question = request
answerer = PRQuestions(pr_url, question, self.installation_id)
await answerer.answer()

View File

@ -96,7 +96,7 @@ def handle_patch_deletions(patch: str, original_file_content_str: str,
# logic for handling deleted files - don't show patch, just show that the file was deleted
if settings.config.verbosity_level > 0:
logging.info(f"Processing file: {file_name}, minimizing deletion file")
patch = "File was deleted\n"
patch = None # file was deleted
else:
patch_lines = patch.splitlines()
patch_new = omit_deletion_hunks(patch_lines)

View File

@ -2,7 +2,7 @@ from __future__ import annotations
import difflib
import logging
from typing import Any, Dict, Tuple
from typing import Any, Tuple, Union
from pr_agent.algo.git_patch_processing import extend_patch, handle_patch_deletions
from pr_agent.algo.language_handler import sort_files_by_main_languages
@ -10,11 +10,16 @@ from pr_agent.algo.token_handler import TokenHandler
from pr_agent.config_loader import settings
from pr_agent.git_providers import GithubProvider
OUTPUT_BUFFER_TOKENS = 800
DELETED_FILES_ = "Deleted files:\n"
MORE_MODIFIED_FILES_ = "More modified files:\n"
OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD = 1000
OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD = 600
PATCH_EXTRA_LINES = 3
def get_pr_diff(git_provider: [GithubProvider, Any], token_handler: TokenHandler) -> str:
def get_pr_diff(git_provider: Union[GithubProvider, Any], token_handler: TokenHandler) -> str:
"""
Returns a string with the diff of the PR.
If needed, apply diff minimization techniques to reduce the number of tokens
@ -28,12 +33,20 @@ def get_pr_diff(git_provider: [GithubProvider, Any], token_handler: TokenHandler
patches_extended, total_tokens = pr_generate_extended_diff(pr_languages, token_handler)
# if we are under the limit, return the full diff
if total_tokens + OUTPUT_BUFFER_TOKENS < token_handler.limit:
if total_tokens + OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD < token_handler.limit:
return "\n".join(patches_extended)
# if we are over the limit, start pruning
patches_compressed = pr_generate_compressed_diff(pr_languages, token_handler)
return "\n".join(patches_compressed)
patches_compressed, modified_file_names, deleted_file_names = pr_generate_compressed_diff(pr_languages,
token_handler)
final_diff = "\n".join(patches_compressed)
if modified_file_names:
modified_list_str = MORE_MODIFIED_FILES_ + "\n".join(modified_file_names)
final_diff = final_diff + "\n\n" + modified_list_str
if deleted_file_names:
deleted_list_str = DELETED_FILES_ + "\n".join(deleted_file_names)
final_diff = final_diff + "\n\n" + deleted_list_str
return final_diff
def pr_generate_extended_diff(pr_languages: list, token_handler: TokenHandler) -> \
@ -67,7 +80,7 @@ def pr_generate_extended_diff(pr_languages: list, token_handler: TokenHandler) -
return patches_extended, total_tokens
def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler) -> list:
def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler) -> Tuple[list, list, list]:
# Apply Diff Minimization techniques to reduce the number of tokens:
# 0. Start from the largest diff patch to smaller ones
# 1. Don't use extend context lines around diff
@ -76,7 +89,8 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler) ->
# 4. Minimize all remaining files when you reach token limit
patches = []
modified_files_list = []
deleted_files_list = []
# sort each one of the languages in top_langs by the number of tokens in the diff
sorted_files = []
for lang in top_langs:
@ -94,25 +108,40 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler) ->
# removing delete-only hunks
patch = handle_patch_deletions(patch, original_file_content_str,
new_file_content_str, file.filename)
if patch is None:
if not deleted_files_list:
total_tokens += token_handler.count_tokens(DELETED_FILES_)
deleted_files_list.append(file.filename)
total_tokens += token_handler.count_tokens(file.filename) + 1
continue
new_patch_tokens = token_handler.count_tokens(patch)
if total_tokens > token_handler.limit - OUTPUT_BUFFER_TOKENS // 2:
# Hard Stop, no more tokens
if total_tokens > token_handler.limit - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD:
logging.warning(f"File was fully skipped, no more tokens: {file.filename}.")
continue # Hard Stop, no more tokens
if total_tokens + new_patch_tokens > token_handler.limit - OUTPUT_BUFFER_TOKENS:
continue
# If the patch is too large, just show the file name
if total_tokens + new_patch_tokens > token_handler.limit - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD:
# Current logic is to skip the patch if it's too large
# TODO: Option for alternative logic to remove hunks from the patch to reduce the number of tokens
# until we meet the requirements
if settings.config.verbosity_level >= 2:
logging.warning(f"Patch too large, minimizing it, {file.filename}")
patch = "File was modified"
if not modified_files_list:
total_tokens += token_handler.count_tokens(MORE_MODIFIED_FILES_)
modified_files_list.append(file.filename)
total_tokens += token_handler.count_tokens(file.filename) + 1
continue
if patch:
patch_final = f"## {file.filename}\n\n{patch}\n"
patches.append(patch_final)
total_tokens += token_handler.count_tokens(patch_final)
if settings.config.verbosity_level >= 2:
logging.info(f"Tokens: {total_tokens}, last filename: {file.filename}")
return patches
return patches, modified_files_list, deleted_files_list
def load_large_diff(file, new_file_content_str: str, original_file_content_str: str, patch: str) -> str:

View File

@ -21,4 +21,4 @@ class TokenHandler:
return system_prompt_tokens + user_prompt_tokens
def count_tokens(self, patch: str) -> int:
return len(self.encoder.encode(patch))
return len(self.encoder.encode(patch, disallowed_special=()))

View File

@ -51,9 +51,13 @@ def parse_code_suggestion(code_suggestions: dict) -> str:
markdown_text += f" - **{code_key}:**\n{code_str_indented}\n"
else:
if "suggestion number" in sub_key.lower():
markdown_text += f"- **suggestion {sub_value}:**\n" # prettier formatting
# markdown_text += f"- **suggestion {sub_value}:**\n" # prettier formatting
pass
elif "relevant file" in sub_key.lower():
markdown_text += f"\n - **{sub_key}:** {sub_value}\n"
else:
markdown_text += f" - **{sub_key}:** {sub_value}\n"
markdown_text += f" **{sub_key}:** {sub_value}\n"
markdown_text += "\n"
return markdown_text

27
pr_agent/cli.py Normal file
View File

@ -0,0 +1,27 @@
import argparse
import asyncio
import logging
import os
from pr_agent.tools.pr_questions import PRQuestions
from pr_agent.tools.pr_reviewer import PRReviewer
def run():
parser = argparse.ArgumentParser(description='AI based pull request analyzer')
parser.add_argument('--pr_url', type=str, help='The URL of the PR to review', required=True)
parser.add_argument('--question', type=str, help='Optional question to ask', required=False)
args = parser.parse_args()
logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"))
if args.question:
print(f"Question: {args.question} about PR {args.pr_url}")
reviewer = PRQuestions(args.pr_url, args.question, installation_id=None)
asyncio.run(reviewer.answer())
else:
print(f"Reviewing PR: {args.pr_url}")
reviewer = PRReviewer(args.pr_url, installation_id=None, cli_mode=True)
asyncio.run(reviewer.review())
if __name__ == '__main__':
run()

View File

@ -9,6 +9,7 @@ settings = Dynaconf(
"settings/.secrets.toml",
"settings/configuration.toml",
"settings/pr_reviewer_prompts.toml",
"settings/pr_questions_prompts.toml"
"settings/pr_questions_prompts.toml",
"settings_prod/.secrets.toml"
]]
)

View File

@ -1,3 +1,4 @@
import logging
from collections import namedtuple
from dataclasses import dataclass
from datetime import datetime
@ -23,6 +24,7 @@ class GithubProvider:
self.repo = None
self.pr_num = None
self.pr = None
self.github_user_id = None
if pr_url:
self.set_pr(pr_url)
@ -39,8 +41,22 @@ class GithubProvider:
diff_files.append(FilePatchInfo(original_file_content_str, new_file_content_str, file.patch, file.filename))
return diff_files
def publish_comment(self, pr_comment: str):
self.pr.create_issue_comment(pr_comment)
def publish_comment(self, pr_comment: str, is_temporary: bool = False):
response = self.pr.create_issue_comment(pr_comment)
if hasattr(response, "user") and hasattr(response.user, "login"):
self.github_user_id = response.user.login
response.is_temporary = is_temporary
if not hasattr(self.pr, 'comments_list'):
self.pr.comments_list = []
self.pr.comments_list.append(response)
def remove_initial_comment(self):
try:
for comment in self.pr.comments_list:
if comment.is_temporary:
comment.delete()
except Exception as e:
logging.exception(f"Failed to remove initial comment, error: {e}")
def get_title(self):
return self.pr.title
@ -96,6 +112,14 @@ class GithubProvider:
def get_pr_branch(self):
return self.pr.head.ref
def get_user_id(self):
if not self.github_user_id:
try:
self.github_user_id = self.github_client.get_user().login
except Exception as e:
logging.exception(f"Failed to get user id, error: {e}")
return self.github_user_id
def get_notifications(self, since: datetime):
deployment_type = settings.get("GITHUB.DEPLOYMENT_TYPE", "user")
@ -153,7 +177,9 @@ class GithubProvider:
try:
token = settings.github.user_token
except AttributeError as e:
raise ValueError("GitHub token is required when using user deployment") from e
raise ValueError(
"GitHub token is required when using user deployment. See: "
"https://github.com/Codium-ai/pr-agent#method-2-run-from-source") from e
return Github(token)
def _get_repo(self):

View File

@ -1,16 +0,0 @@
import argparse
import asyncio
import logging
import os
from pr_agent.tools.pr_questions import PRQuestions
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Review a PR from a URL')
parser.add_argument('--pr_url', type=str, help='The URL of the PR to review', required=True)
parser.add_argument('--question_str', type=str, help='The question to answer', required=True)
args = parser.parse_args()
logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"))
reviewer = PRQuestions(args.pr_url, args.question_str, None)
asyncio.run(reviewer.answer())

View File

@ -1,14 +0,0 @@
import argparse
import asyncio
import logging
import os
from pr_agent.tools.pr_reviewer import PRReviewer
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Review a PR from a URL')
parser.add_argument('--pr_url', type=str, help='The URL of the PR to review', required=True)
args = parser.parse_args()
logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"))
reviewer = PRReviewer(args.pr_url, None)
asyncio.run(reviewer.review())

View File

@ -40,7 +40,7 @@ async def handle_request(body):
if "comment" not in body:
return {}
comment_body = body.get("comment", {}).get("body", None)
if "says 'Please" in comment_body:
if 'sender' in body and 'login' in body['sender'] and 'bot' in body['sender']['login']:
return {}
if "issue" not in body and "pull_request" not in body["issue"]:
return {}

View File

@ -7,6 +7,7 @@ import aiohttp
from pr_agent.agent.pr_agent import PRAgent
from pr_agent.config_loader import settings
from pr_agent.git_providers import get_git_provider
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
NOTIFICATION_URL = "https://api.github.com/notifications"
@ -19,8 +20,11 @@ def now() -> str:
async def polling_loop():
handled_ids = set()
since = [now()]
last_modified = [None]
git_provider = get_git_provider()()
user_id = git_provider.get_user_id()
try:
deployment_type = settings.github.deployment_type
token = settings.github.user_token
@ -33,41 +37,58 @@ async def polling_loop():
raise ValueError("User token must be set to get notifications")
async with aiohttp.ClientSession() as session:
while True:
headers = {
"Accept": "application/vnd.github.v3+json",
"Authorization": f"Bearer {token}"
}
params = {
"participating": "true"
}
if since[0]:
params["since"] = since[0]
if last_modified[0]:
headers["If-Modified-Since"] = last_modified[0]
async with session.get(NOTIFICATION_URL, headers=headers, params=params) as response:
if response.status == 200:
if 'Last-Modified' in response.headers:
last_modified[0] = response.headers['Last-Modified']
since[0] = None
notifications = await response.json()
for notification in notifications:
if 'reason' in notification and notification['reason'] == 'mention':
if 'subject' in notification and notification['subject']['type'] == 'PullRequest':
pr_url = notification['subject']['url']
latest_comment = notification['subject']['latest_comment_url']
async with session.get(latest_comment, headers=headers) as comment_response:
if comment_response.status == 200:
comment = await comment_response.json()
comment_body = comment['body'] if 'body' in comment else ''
commenter_github_user = comment['user']['login'] if 'user' in comment else ''
logging.info(f"Commenter: {commenter_github_user}\nComment: {comment_body}")
if comment_body.strip().startswith("@"):
try:
await asyncio.sleep(5)
headers = {
"Accept": "application/vnd.github.v3+json",
"Authorization": f"Bearer {token}"
}
params = {
"participating": "true"
}
if since[0]:
params["since"] = since[0]
if last_modified[0]:
headers["If-Modified-Since"] = last_modified[0]
async with session.get(NOTIFICATION_URL, headers=headers, params=params) as response:
if response.status == 200:
if 'Last-Modified' in response.headers:
last_modified[0] = response.headers['Last-Modified']
since[0] = None
notifications = await response.json()
if not notifications:
continue
for notification in notifications:
handled_ids.add(notification['id'])
if 'reason' in notification and notification['reason'] == 'mention':
if 'subject' in notification and notification['subject']['type'] == 'PullRequest':
pr_url = notification['subject']['url']
latest_comment = notification['subject']['latest_comment_url']
async with session.get(latest_comment, headers=headers) as comment_response:
if comment_response.status == 200:
comment = await comment_response.json()
if 'id' in comment:
if comment['id'] in handled_ids:
continue
else:
handled_ids.add(comment['id'])
if 'user' in comment and 'login' in comment['user']:
if comment['user']['login'] == user_id:
continue
comment_body = comment['body'] if 'body' in comment else ''
commenter_github_user = comment['user']['login'] if 'user' in comment else ''
logging.info(f"Commenter: {commenter_github_user}\nComment: {comment_body}")
user_tag = "@" + user_id
if user_tag not in comment_body:
continue
rest_of_comment = comment_body.split(user_tag)[1].strip()
agent = PRAgent()
await agent.handle_request(pr_url, comment_body)
elif response.status != 304:
print(f"Failed to fetch notifications. Status code: {response.status}")
await agent.handle_request(pr_url, rest_of_comment)
elif response.status != 304:
print(f"Failed to fetch notifications. Status code: {response.status}")
await asyncio.sleep(5)
except Exception as e:
logging.error(f"Exception during processing of a notification: {e}")
if __name__ == '__main__':
asyncio.run(polling_loop())

View File

@ -3,6 +3,7 @@ system="""You are CodiumAI-PR-Reviewer, a language model designed to review git
Your task is to answer questions about the new PR code (the '+' lines), and provide feedback.
Be informative, constructive, and give examples. Try to be as specific as possible, and don't avoid answering the questions.
Make sure not to repeat modifications already implemented in the new PR code (the '+' lines).
Answer only the questions, and don't add unrelated content.
"""
user="""PR Info:

View File

@ -33,7 +33,7 @@ You must use the following JSON schema to format your answer:
{%- if require_minimal_and_focused %}
"Minimal and focused": {
"type": "string",
"description": "is this PR as minimal and focused as possible, with all code changes centered around a single coherent theme, described in the PR description and title ?" explain your answer"
"description": "is this PR as minimal and focused as possible, with all code changes centered around a single coherent theme, described in the PR description and title ?" Make sure to explain your answer"
}
},
{%- endif %}
@ -107,7 +107,7 @@ Example output:
"Relevant tests added": "No",
{%- endif %}
{%- if require_minimal_and_focused %}
"Minimal and focused": "No, because ..."
"Minimal and focused": "yes\\no, because ..."
{%- endif %}
},
"PR Feedback":

View File

@ -35,7 +35,8 @@ class PRQuestions:
async def answer(self):
logging.info('Answering a PR question...')
self.git_provider.publish_comment("Preparing answer...")
if settings.config.publish_review:
self.git_provider.publish_comment("Preparing answer...", is_temporary=True)
logging.info('Getting PR diff...')
self.patches_diff = get_pr_diff(self.git_provider, self.token_handler)
logging.info('Getting AI prediction...')
@ -45,6 +46,7 @@ class PRQuestions:
if settings.config.publish_review:
logging.info('Pushing answer...')
self.git_provider.publish_comment(pr_comment)
self.git_provider.remove_initial_comment()
return ""
async def _get_prediction(self):
@ -62,6 +64,8 @@ class PRQuestions:
return response
def _prepare_pr_answer(self) -> str:
answer_str = f"Questions: {self.question_str}\n\n"
answer_str += f"Answer: {self.prediction.strip()}\n\n"
answer_str = f"Question: {self.question_str}\n\n"
answer_str += f"Answer:\n{self.prediction.strip()}\n\n"
if settings.config.verbosity_level >= 2:
logging.info(f"answer_str:\n{answer_str}")
return answer_str

View File

@ -14,7 +14,7 @@ from pr_agent.git_providers import get_git_provider
class PRReviewer:
def __init__(self, pr_url: str, installation_id: Optional[int] = None):
def __init__(self, pr_url: str, installation_id: Optional[int] = None, cli_mode=False):
self.git_provider = get_git_provider()(pr_url, installation_id)
self.main_language = self.git_provider.get_main_pr_language()
@ -22,11 +22,12 @@ class PRReviewer:
self.ai_handler = AiHandler()
self.patches_diff = None
self.prediction = None
self.cli_mode = cli_mode
self.vars = {
"title": self.git_provider.pr.title,
"branch": self.git_provider.get_pr_branch(),
"description": self.git_provider.pr.body,
"language": self.git_provider.get_main_pr_language(),
"language": self.main_language,
"diff": "", # empty diff for initial calculation
"require_tests": settings.pr_reviewer.require_tests_review,
"require_security": settings.pr_reviewer.require_security_review,
@ -42,7 +43,7 @@ class PRReviewer:
async def review(self):
logging.info('Reviewing PR...')
if settings.config.publish_review:
self.git_provider.publish_comment("Preparing review...")
self.git_provider.publish_comment("Preparing review...", is_temporary=True)
logging.info('Getting PR diff...')
self.patches_diff = get_pr_diff(self.git_provider, self.token_handler)
logging.info('Getting AI prediction...')
@ -52,6 +53,7 @@ class PRReviewer:
if settings.config.publish_review:
logging.info('Pushing PR review...')
self.git_provider.publish_comment(pr_comment)
self.git_provider.remove_initial_comment()
return ""
async def _get_prediction(self):
@ -80,9 +82,29 @@ class PRReviewer:
except json.decoder.JSONDecodeError:
logging.error("Unable to decode JSON response from AI")
data = {}
# reordering for nicer display
if 'PR Feedback' in data:
if 'Security concerns' in data['PR Feedback']:
val = data['PR Feedback']['Security concerns']
del data['PR Feedback']['Security concerns']
data['PR Analysis']['Security concerns'] = val
markdown_text = convert_to_markdown(data)
markdown_text += "\nAdd a comment that says 'Please review' to ask for a new review after you update the PR.\n"
markdown_text += "Add a comment that says 'Please answer <QUESTION...>' to ask a question about this PR.\n"
user = self.git_provider.get_user_id()
if not self.cli_mode:
markdown_text += "\n### How to use\n"
if user and '[bot]' not in user:
markdown_text += f"> Tag me in a comment '@{user}' to ask for a new review after you update the PR.\n"
markdown_text += "> You can also tag me and ask any question, " \
f"for example '@{user} is the PR ready for merge?'"
else:
markdown_text += "> Add a comment that says 'review' to ask for a new review " \
"after you update the PR.\n"
markdown_text += "> You can also add a comment that says 'answer QUESTION', " \
"for example 'answer is the PR ready for merge?'"
if settings.config.verbosity_level >= 2:
logging.info(f"Markdown response:\n{markdown_text}")
return markdown_text

View File

@ -62,7 +62,7 @@ class TestHandlePatchDeletions:
new_file_content_str = ''
file_name = 'file.py'
assert handle_patch_deletions(patch, original_file_content_str, new_file_content_str,
file_name) == 'File was deleted\n'
file_name) is None
# Tests that handle_patch_deletions returns the original patch when patch and patch_new are equal
def test_handle_patch_deletions_edge_case_patch_and_patch_new_are_equal(self):