mirror of
https://github.com/qodo-ai/pr-agent.git
synced 2025-07-07 22:30:38 +08:00
Compare commits
285 Commits
test-PR-ne
...
v0.23
Author | SHA1 | Date | |
---|---|---|---|
0bf8c1e647 | |||
be18152446 | |||
7fc41409d4 | |||
78bcb72205 | |||
e35f83bdb6 | |||
20d9d8ad07 | |||
f3c80891f8 | |||
12973c2c99 | |||
1f5c3a4c0f | |||
422b4082b5 | |||
2235a19345 | |||
e30c70d2ca | |||
f7a6e93b6c | |||
23e6abcdce | |||
0bac03496a | |||
a228ea8109 | |||
0c3940b6a7 | |||
b05e15e9ec | |||
bea68084b3 | |||
57abf4ac62 | |||
f0efe4a707 | |||
040503039e | |||
3e265682a7 | |||
d7c0f87ea5 | |||
92d040c80f | |||
96ededd12a | |||
8d87b41cf2 | |||
f058c09a68 | |||
f2cb70ea67 | |||
3e6263e1cc | |||
3373fb404a | |||
df02cc1437 | |||
6a5f43f8ce | |||
ebbf9c25b3 | |||
0dc7bdabd2 | |||
defe200817 | |||
bf5673912d | |||
089a76c897 | |||
4c444f5c9a | |||
e5aae0d14f | |||
15f854336a | |||
056eb3a954 | |||
11abce3ede | |||
556dc68add | |||
b1f728e6b0 | |||
ca18f85294 | |||
382da3a5b6 | |||
406dcd7b7b | |||
b20f364b15 | |||
692904bb71 | |||
ba963149ac | |||
7348d4144b | |||
d0315164be | |||
41607b10ef | |||
2d21df61c7 | |||
c185b7c610 | |||
3d60954167 | |||
a57896aa94 | |||
73f0eebb69 | |||
b1d07be728 | |||
0f920bcc5b | |||
55a82382ef | |||
6c2a14d557 | |||
4ab747dbfd | |||
b814e4a26d | |||
609fa266cf | |||
69f6997739 | |||
8cc436cbd6 | |||
384dfc2292 | |||
40737c3932 | |||
c46434ac5e | |||
255c2d8e94 | |||
74bb07e9c4 | |||
a4db59fadc | |||
2990aac955 | |||
afe037e976 | |||
666fcb6264 | |||
3f3e9909fe | |||
685c443d87 | |||
c4361ccb01 | |||
a3d4d6d86f | |||
b12554ee84 | |||
29bc0890ab | |||
5fd7ca7d02 | |||
41ffa8df51 | |||
47b12d8bbc | |||
ded8dc3689 | |||
9034e18772 | |||
833bb29808 | |||
bdf1be921d | |||
0c1331f77e | |||
164999d83d | |||
a710f3ff43 | |||
025a14014a | |||
5968db67b9 | |||
3affe011fe | |||
c4a653f70a | |||
663604daa5 | |||
deda06866d | |||
e33f2e4c67 | |||
00b6a67e1e | |||
024ef7eea3 | |||
3fee687a34 | |||
b2c0c4d654 | |||
6b56ea4289 | |||
2a68a90474 | |||
de9b21d7bd | |||
612c6ed135 | |||
6ed65eb82b | |||
bc09330a44 | |||
7bd1e5211c | |||
8d44804f84 | |||
a4320b6b0d | |||
73ec67b14e | |||
790dcc552e | |||
8463aaac0a | |||
195f8a03ab | |||
5268a84bcc | |||
e53badbac4 | |||
a9a27b5a8f | |||
4db428456d | |||
925fab474c | |||
a1fb9aac29 | |||
774bba4ed2 | |||
dd8a7200f7 | |||
33d8b51abd | |||
e083841d96 | |||
1070f9583f | |||
bedcc2433c | |||
8ff85a9daf | |||
58bc54b193 | |||
aa56c0097d | |||
20f6af803c | |||
2076454798 | |||
e367df352b | |||
a32a12a851 | |||
3a897935ae | |||
55b52ad6b2 | |||
b0f9b96c75 | |||
aac7aeabd1 | |||
306fd3d064 | |||
f1d5587220 | |||
07f21a5511 | |||
1106dccc4f | |||
e5f269040e | |||
9c8bc6c86a | |||
f4c9d23084 | |||
25fdf16894 | |||
12b0df4608 | |||
529346b8e0 | |||
b28f66aaa0 | |||
2e535e42ee | |||
9c6a363a51 | |||
75a27d64b4 | |||
4549cb3948 | |||
d046c2a939 | |||
aed4ed41cc | |||
4d96d11ba5 | |||
faf4576f03 | |||
0b7dcf03a5 | |||
8e12787fc8 | |||
213ced7e18 | |||
6d6fb67306 | |||
fac8a80c24 | |||
c53c6aee7f | |||
b980168e75 | |||
86d901d5a6 | |||
b1444eb180 | |||
d3a7041f0d | |||
b4f0ad948f | |||
ab31d2f1f8 | |||
2b0dfc6298 | |||
76ff49d446 | |||
413547f404 | |||
f8feaa0be7 | |||
09190efb65 | |||
2746bd4754 | |||
4f13007267 | |||
962bb1c23d | |||
e9804c9f0d | |||
f3aa9c02cc | |||
416b150d66 | |||
83f3cc5033 | |||
1e1636911f | |||
40658cfb7c | |||
85f6353d15 | |||
b9aeb8e443 | |||
ea7a84901d | |||
37f6e18953 | |||
62c6211998 | |||
dc6ae9fa7e | |||
c6e6cbb50e | |||
731c8de4ea | |||
4971071b1f | |||
c341446015 | |||
ea9d410c84 | |||
d9a7dae6c4 | |||
c9c14c10b0 | |||
bd2f2b3a87 | |||
c11ee8643e | |||
04d55a6309 | |||
e6c5236156 | |||
ee90f38501 | |||
6e6f54933e | |||
911c1268fc | |||
17f46bb53b | |||
806ba3f9d8 | |||
2a69116767 | |||
b7225c1d10 | |||
ca5efbc52f | |||
da44bd7d5e | |||
83ff9a0b9b | |||
4cd9626217 | |||
ca9f96a1e3 | |||
811965d841 | |||
39fe6f69d0 | |||
66dc9349bd | |||
63340eb75e | |||
fab5b6f871 | |||
71770f3c04 | |||
a13cb14e9f | |||
e5bbb701d3 | |||
7779038e2a | |||
c3dca2ef5a | |||
985b4f05cf | |||
8921d9eb0e | |||
2880e48860 | |||
9b56c83c1d | |||
2369b8da69 | |||
dcd188193b | |||
89819b302b | |||
3432d377c7 | |||
ea4ee1adbc | |||
f9af9e4a91 | |||
3b3e885b76 | |||
46e934772c | |||
cc08394e51 | |||
2b4eac2123 | |||
570f7d6dcf | |||
188d092524 | |||
8599c0fed4 | |||
0ab19b84b2 | |||
fec583e45e | |||
589b865db5 | |||
be701aa868 | |||
4231a84e7a | |||
e56320540b | |||
e4565f7106 | |||
b4458ffede | |||
36ad8935ad | |||
9dd2520dbd | |||
e6708fcb7b | |||
05876afc02 | |||
f3eb74d718 | |||
b0aac4ec5d | |||
95c7b3f55c | |||
efd906ccf1 | |||
5fed21ce37 | |||
853cfb3fc9 | |||
6c0837491c | |||
fbacc7c765 | |||
e69b798aa1 | |||
61ba015a55 | |||
4f6490b17c | |||
9dfc263e2e | |||
d348cffbae | |||
c04ab933cd | |||
a55fa753b9 | |||
8e0435d9a0 | |||
39c0733d6f | |||
a588e9f2bb | |||
7627e651ea | |||
1ebc20b761 | |||
38058ea714 | |||
c92c26448f | |||
38051f79b7 | |||
738eb055ff | |||
5d8d178a60 | |||
e8f4a45774 | |||
aa60c7d701 | |||
4645cd7cf9 | |||
edb230c993 | |||
7bb1917be7 | |||
d360fb72cb | |||
253f77f4d9 |
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,4 +1,6 @@
|
||||
.idea/
|
||||
.lsp/
|
||||
.vscode/
|
||||
venv/
|
||||
pr_agent/settings/.secrets.toml
|
||||
__pycache__
|
||||
|
@ -1,7 +1,3 @@
|
||||
[pr_reviewer]
|
||||
enable_review_labels_effort = true
|
||||
enable_auto_approval = true
|
||||
|
||||
|
||||
[pr_code_suggestions]
|
||||
commitable_code_suggestions=false
|
||||
|
67
README.md
67
README.md
@ -14,6 +14,8 @@ CodiumAI PR-Agent aims to help efficiently review and handle pull requests, by p
|
||||
</div>
|
||||
|
||||
[](https://github.com/Codium-ai/pr-agent/blob/main/LICENSE)
|
||||
[](https://chromewebstore.google.com/detail/pr-agent-chrome-extension/ephlnjeghhogofkifjloamocljapahnl)
|
||||
[](https://pr-agent-docs.codium.ai/finetuning_benchmark/)
|
||||
[](https://discord.com/channels/1057273017547378788/1126104260430528613)
|
||||
[](https://twitter.com/codiumai)
|
||||
<a href="https://github.com/Codium-ai/pr-agent/commits/main">
|
||||
@ -40,48 +42,26 @@ CodiumAI PR-Agent aims to help efficiently review and handle pull requests, by p
|
||||
|
||||
## News and Updates
|
||||
|
||||
### May 2, 2024
|
||||
Check out the new [PR-Agent Chrome Extension](https://chromewebstore.google.com/detail/pr-agent-chrome-extension/ephlnjeghhogofkifjloamocljapahnl) 🚀🚀🚀
|
||||
### July 4, 2024
|
||||
|
||||
This toolbar integrates seamlessly with your GitHub environment, allowing you to access PR-Agent tools [directly from the GitHub interface](https://www.youtube.com/watch?v=gT5tli7X4H4).
|
||||
You can also easily export your chosen configuration, and use it for the automatic commands.
|
||||
Added improved support for claude-sonnet-3.5 model (anthropic, vertex, bedrock), including dedicated prompts.
|
||||
|
||||
<kbd><img src="https://codium.ai/images/pr_agent/toolbar1.png" width="512"></kbd>
|
||||
### June 17, 2024
|
||||
|
||||
<kbd><img src="https://codium.ai/images/pr_agent/toolbar2.png" width="512"></kbd>
|
||||
New option for a self-review checkbox is now available for the `/improve` tool, along with the ability(💎) to enable auto-approve, or demand self-review in addition to human reviewer. See more [here](https://pr-agent-docs.codium.ai/tools/improve/#self-review).
|
||||
|
||||
<kbd><img src="https://www.codium.ai/images/pr_agent/self_review_1.png" width="512"></kbd>
|
||||
|
||||
### April 14, 2024
|
||||
You can now ask questions about images that appear in the comment, where the entire PR is considered as the context.
|
||||
see [here](https://pr-agent-docs.codium.ai/tools/ask/#ask-on-images) for more details.
|
||||
### June 6, 2024
|
||||
|
||||
<kbd><img src="https://codium.ai/images/pr_agent/ask_images5.png" width="512"></kbd>
|
||||
New option now available (💎) - **apply suggestions**:
|
||||
|
||||
### March 24, 2024
|
||||
PR-Agent is now available for easy installation via [pip](https://pr-agent-docs.codium.ai/installation/locally/#using-pip-package).
|
||||
<kbd><img src="https://www.codium.ai/images/pr_agent/apply_suggestion_1.png" width="512"></kbd>
|
||||
|
||||
### March 17, 2024
|
||||
- A new feature is now available for the review tool: [`require_can_be_split_review`](https://pr-agent-docs.codium.ai/tools/review/#enabledisable-features).
|
||||
If set to true, the tool will add a section that checks if the PR contains several themes, and can be split into smaller PRs.
|
||||
→
|
||||
|
||||
<kbd><img src="https://codium.ai/images/pr_agent/multiple_pr_themes.png" width="512"></kbd>
|
||||
<kbd><img src="https://www.codium.ai/images/pr_agent/apply_suggestion_2.png" width="512"></kbd>
|
||||
|
||||
### March 10, 2024
|
||||
- A new [knowledge-base website](https://pr-agent-docs.codium.ai/) for PR-Agent is now available. It includes detailed information about the different tools, usage guides and more, in an accessible and organized format.
|
||||
|
||||
### March 8, 2024
|
||||
|
||||
- A new tool, [Find Similar Code](https://pr-agent-docs.codium.ai/tools/similar_code/) 💎 is now available.
|
||||
<br>This tool retrieves the most similar code components from inside the organization's codebase, or from open-source code:
|
||||
|
||||
<kbd><a href="https://codium.ai/images/pr_agent/similar_code.mp4"><img src="https://codium.ai/images/pr_agent/similar_code_global2.png" width="512"></a></kbd>
|
||||
|
||||
(click on the image to see an instructional video)
|
||||
|
||||
### Feb 29, 2024
|
||||
- You can now use the repo's [wiki page](https://pr-agent-docs.codium.ai/usage-guide/configuration_options/) to set configurations for PR-Agent 💎
|
||||
|
||||
<kbd><img src="https://codium.ai/images/pr_agent/wiki_configuration.png" width="512"></kbd>
|
||||
|
||||
|
||||
## Overview
|
||||
@ -90,7 +70,7 @@ If set to true, the tool will add a section that checks if the PR contains sever
|
||||
Supported commands per platform:
|
||||
|
||||
| | | GitHub | Gitlab | Bitbucket | Azure DevOps |
|
||||
|-------|-------------------------------------------------------------------------------------------------------------------|:--------------------:|:--------------------:|:--------------------:|:--------------------:|
|
||||
|-------|---------------------------------------------------------------------------------------------------------|:--------------------:|:--------------------:|:--------------------:|:--------------------:|
|
||||
| TOOLS | Review | ✅ | ✅ | ✅ | ✅ |
|
||||
| | ⮑ Incremental | ✅ | | | |
|
||||
| | ⮑ [SOC2 Compliance](https://pr-agent-docs.codium.ai/tools/review/#soc2-ticket-compliance) 💎 | ✅ | ✅ | ✅ | ✅ |
|
||||
@ -100,7 +80,7 @@ Supported commands per platform:
|
||||
| | ⮑ Extended | ✅ | ✅ | ✅ | ✅ |
|
||||
| | Ask | ✅ | ✅ | ✅ | ✅ |
|
||||
| | ⮑ [Ask on code lines](https://pr-agent-docs.codium.ai/tools/ask#ask-lines) | ✅ | ✅ | | |
|
||||
| | [Custom Suggestions](https://pr-agent-docs.codium.ai/tools/custom_suggestions/) 💎 | ✅ | ✅ | ✅ | ✅ |
|
||||
| | [Custom Prompt](https://pr-agent-docs.codium.ai/tools/custom_prompt/) 💎 | ✅ | ✅ | ✅ | ✅ |
|
||||
| | [Test](https://pr-agent-docs.codium.ai/tools/test/) 💎 | ✅ | ✅ | | ✅ |
|
||||
| | Reflect and Review | ✅ | ✅ | ✅ | ✅ |
|
||||
| | Update CHANGELOG.md | ✅ | ✅ | ✅ | ✅ |
|
||||
@ -146,7 +126,7 @@ ___
|
||||
\
|
||||
‣ **Analyze 💎 ([`/analyze`](https://pr-agent-docs.codium.ai/tools/analyze/))**: Identify code components that changed in the PR, and enables to interactively generate tests, docs, and code suggestions for each component.
|
||||
\
|
||||
‣ **Custom Suggestions 💎 ([`/custom_suggestions`](https://pr-agent-docs.codium.ai/tools/custom_suggestions/))**: Automatically generates custom suggestions for improving the PR code, based on specific guidelines defined by the user.
|
||||
‣ **Custom Prompt 💎 ([`/custom_prompt`](https://pr-agent-docs.codium.ai/tools/custom_prompt/))**: Automatically generates custom suggestions for improving the PR code, based on specific guidelines defined by the user.
|
||||
\
|
||||
‣ **Generate Tests 💎 ([`/test component_name`](https://pr-agent-docs.codium.ai/tools/test/))**: Generates unit tests for a selected component, based on the PR code changes.
|
||||
\
|
||||
@ -321,11 +301,22 @@ Here are some advantages of PR-Agent:
|
||||
|
||||
## Data privacy
|
||||
|
||||
If you host PR-Agent with your OpenAI API key, it is between you and OpenAI. You can read their API data privacy policy here:
|
||||
### Self-hosted PR-Agent
|
||||
|
||||
- If you host PR-Agent with your OpenAI API key, it is between you and OpenAI. You can read their API data privacy policy here:
|
||||
https://openai.com/enterprise-privacy
|
||||
|
||||
When using PR-Agent Pro 💎, hosted by CodiumAI, we will not store any of your data, nor will we use it for training.
|
||||
You will also benefit from an OpenAI account with zero data retention.
|
||||
### CodiumAI-hosted PR-Agent Pro 💎
|
||||
|
||||
- When using PR-Agent Pro 💎, hosted by CodiumAI, we will not store any of your data, nor will we use it for training. You will also benefit from an OpenAI account with zero data retention.
|
||||
|
||||
- For certain clients, CodiumAI-hosted PR-Agent Pro will use CodiumAI’s proprietary models — if this is the case, you will be notified.
|
||||
|
||||
- No passive collection of Code and Pull Requests’ data — PR-Agent will be active only when you invoke it, and it will then extract and analyze only data relevant to the executed command and queried pull request.
|
||||
|
||||
### PR-Agent Chrome extension
|
||||
|
||||
- The [PR-Agent Chrome extension](https://chromewebstore.google.com/detail/pr-agent-chrome-extension/ephlnjeghhogofkifjloamocljapahnl) serves solely to modify the visual appearance of a GitHub PR screen. It does not transmit any user's repo or pull request code. Code is only sent for processing when a user submits a GitHub comment that activates a PR-Agent tool, in accordance with the standard privacy policy of PR-Agent.
|
||||
|
||||
## Links
|
||||
|
||||
|
@ -8,7 +8,7 @@ ENV PYTHONPATH=/app
|
||||
|
||||
FROM base as github_app
|
||||
ADD pr_agent pr_agent
|
||||
CMD ["python", "pr_agent/servers/github_app.py"]
|
||||
CMD ["python", "-m", "gunicorn", "-k", "uvicorn.workers.UvicornWorker", "-c", "pr_agent/servers/gunicorn_config.py", "--forwarded-allow-ips", "*", "pr_agent.servers.github_app:app"]
|
||||
|
||||
FROM base as bitbucket_app
|
||||
ADD pr_agent pr_agent
|
||||
|
49
docs/docs/chrome-extension/index.md
Normal file
49
docs/docs/chrome-extension/index.md
Normal file
@ -0,0 +1,49 @@
|
||||
## PR-Agent chrome extension
|
||||
PR-Agent Chrome extension is a collection of tools that integrates seamlessly with your GitHub environment, aiming to enhance your PR-Agent usage experience, and providing additional features.
|
||||
|
||||
## Features
|
||||
|
||||
### Toolbar extension
|
||||
With PR-Agent Chrome extension, it's [easier than ever](https://www.youtube.com/watch?v=gT5tli7X4H4) to interactively configure and experiment with the different tools and configuration options.
|
||||
|
||||
After you found the setup that works for you, you can also easily export it as a persistent configuration file, and use it for automatic commands.
|
||||
|
||||
<img src="https://codium.ai/images/pr_agent/toolbar1.png" width="512">
|
||||
|
||||
<img src="https://codium.ai/images/pr_agent/toolbar2.png" width="512">
|
||||
|
||||
### PR-Agent filters
|
||||
|
||||
PR-Agent filters is a sidepanel option. that allows you to filter different message in the conversation tab.
|
||||
|
||||
For example, you can choose to present only message from PR-Agent, or filter those messages, focusing only on user's comments.
|
||||
|
||||
<img src="https://codium.ai/images/pr_agent/pr_agent_filters1.png" width="256">
|
||||
|
||||
<img src="https://codium.ai/images/pr_agent/pr_agent_filters2.png" width="256">
|
||||
|
||||
|
||||
### Enhanced code suggestions
|
||||
|
||||
PR-Agent Chrome extension adds the following capabilities to code suggestions tool's comments:
|
||||
|
||||
- Auto-expand the table when you are viewing a code block, to avoid clipping.
|
||||
- Adding a "quote-and-reply" button, that enables to address and comment on a specific suggestion (for example, asking the author to fix the issue)
|
||||
|
||||
|
||||
<img src="https://codium.ai/images/pr_agent/chrome_extension_code_suggestion1.png" width="512">
|
||||
|
||||
<img src="https://codium.ai/images/pr_agent/chrome_extension_code_suggestion2.png" width="512">
|
||||
|
||||
## Installation
|
||||
|
||||
Go to the marketplace and install the extension:
|
||||
[PR-Agent Chrome Extension](https://chromewebstore.google.com/detail/pr-agent-chrome-extension/ephlnjeghhogofkifjloamocljapahnl)
|
||||
|
||||
## Pre-requisites
|
||||
|
||||
The PR-Agent Chrome extension will work on any repo where you have previously [installed PR-Agent](https://pr-agent-docs.codium.ai/installation/).
|
||||
|
||||
## Data privacy and security
|
||||
|
||||
The PR-Agent Chrome extension only modifies the visual appearance of a GitHub PR screen. It does not transmit any user's repo or pull request code. Code is only sent for processing when a user submits a GitHub comment that activates a PR-Agent tool, in accordance with the standard privacy policy of PR-Agent.
|
@ -4,12 +4,21 @@
|
||||
--md-primary-fg-color: #765bfa;
|
||||
--md-accent-fg-color: #AEA1F1;
|
||||
}
|
||||
.md-nav__title, .md-nav__link {
|
||||
font-size: 16px;
|
||||
|
||||
.md-nav--primary {
|
||||
.md-nav__link {
|
||||
font-size: 18px; /* Change the font size as per your preference */
|
||||
}
|
||||
}
|
||||
|
||||
/*.md-nav__title, .md-nav__link {*/
|
||||
/* font-size: 18px;*/
|
||||
/* margin-top: 14px; !* Adjust the space as needed *!*/
|
||||
/* margin-bottom: 14px; !* Adjust the space as needed *!*/
|
||||
/*}*/
|
||||
|
||||
.md-tabs__link {
|
||||
font-size: 16px;
|
||||
font-size: 18px;
|
||||
}
|
||||
|
||||
.md-header__title {
|
||||
|
92
docs/docs/finetuning_benchmark/index.md
Normal file
92
docs/docs/finetuning_benchmark/index.md
Normal file
@ -0,0 +1,92 @@
|
||||
# PR-Agent Code Fine-tuning Benchmark
|
||||
|
||||
On coding tasks, the gap between open-source models and top closed-source models such as GPT4 is significant.
|
||||
<br>
|
||||
In practice, open-source models are unsuitable for most real-world code tasks, and require further fine-tuning to produce acceptable results.
|
||||
|
||||
_PR-Agent fine-tuning benchmark_ aims to benchmark open-source models on their ability to be fine-tuned for a coding task.
|
||||
Specifically, we chose to fine-tune open-source models on the task of analyzing a pull request, and providing useful feedback and code suggestions.
|
||||
|
||||
Here are the results:
|
||||
<br>
|
||||
<br>
|
||||
|
||||
**Model performance:**
|
||||
|
||||
| Model name | Model size [B] | Better than gpt-4 rate, after fine-tuning [%] |
|
||||
|-----------------------------|----------------|----------------------------------------------|
|
||||
| **DeepSeek 34B-instruct** | **34** | **40.7** |
|
||||
| DeepSeek 34B-base | 34 | 38.2 |
|
||||
| Phind-34b | 34 | 38 |
|
||||
| Granite-34B | 34 | 37.6 |
|
||||
| Codestral-22B-v0.1 | 22 | 32.7 |
|
||||
| QWEN-1.5-32B | 32 | 29 |
|
||||
| | | |
|
||||
| **CodeQwen1.5-7B** | **7** | **35.4** |
|
||||
| Granite-8b-code-instruct | 8 | 34.2 |
|
||||
| CodeLlama-7b-hf | 7 | 31.8 |
|
||||
| Gemma-7B | 7 | 27.2 |
|
||||
| DeepSeek coder-7b-instruct | 7 | 26.8 |
|
||||
| Llama-3-8B-Instruct | 8 | 26.8 |
|
||||
| Mistral-7B-v0.1 | 7 | 16.1 |
|
||||
|
||||
<br>
|
||||
|
||||
**Fine-tuning impact:**
|
||||
|
||||
| Model name | Model size [B] | Fine-tuned | Better than gpt-4 rate [%] |
|
||||
|---------------------------|----------------|------------|----------------------------|
|
||||
| DeepSeek 34B-instruct | 34 | yes | 40.7 |
|
||||
| DeepSeek 34B-instruct | 34 | no | 3.6 |
|
||||
|
||||
## Results analysis
|
||||
|
||||
- **Fine-tuning is a must** - without fine-tuning, open-source models provide poor results on most real-world code tasks, which include complicated prompt and lengthy context. We clearly see that without fine-tuning, deepseek model was 96.4% of the time inferior to GPT-4, while after fine-tuning, it is better 40.7% of the time.
|
||||
- **Always start from a code-dedicated model** — When fine-tuning, always start from a code-dedicated model, and not from a general-usage model. The gaps in downstream results are very big.
|
||||
- **Don't believe the hype** —newer models, or models from big-tech companies (Llama3, Gemma, Mistral), are not always better for fine-tuning.
|
||||
- **The best large model** - For large 34B code-dedicated models, the gaps when doing proper fine-tuning are small. The current top model is **DeepSeek 34B-instruct**
|
||||
- **The best small model** - For small 7B code-dedicated models, the gaps when fine-tuning are much larger. **CodeQWEN 1.5-7B** is by far the best model for fine-tuning.
|
||||
- **Base vs. instruct** - For the top model (deepseek), we saw small advantage when starting from the instruct version. However, we recommend testing both versions on each specific task, as the base model is generally considered more suitable for fine-tuning.
|
||||
|
||||
## The dataset
|
||||
|
||||
### Training dataset
|
||||
|
||||
Our training dataset comprises 25,000 pull requests, aggregated from permissive license repos. For each pull request, we generated responses for the three main tools of PR-Agent:
|
||||
[Describe](https://pr-agent-docs.codium.ai/tools/describe/), [Review](https://pr-agent-docs.codium.ai/tools/improve/) and [Improve](https://pr-agent-docs.codium.ai/tools/improve/).
|
||||
|
||||
On the raw data collected, we employed various automatic and manual cleaning techniques to ensure the outputs were of the highest quality, and suitable for instruct-tuning.
|
||||
|
||||
Here are the prompts, and example outputs, used as input-output pairs to fine-tune the models:
|
||||
|
||||
| Tool | Prompt | Example output |
|
||||
|----------|------------------------------------------------------------------------------------------------------------|----------------|
|
||||
| Describe | [link](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/pr_description_prompts.toml) | [link](https://github.com/Codium-ai/pr-agent/pull/910#issue-2303989601) |
|
||||
| Review | [link](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/pr_reviewer_prompts.toml) | [link](https://github.com/Codium-ai/pr-agent/pull/910#issuecomment-2118761219) |
|
||||
| Improve | [link](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/pr_code_suggestions_prompts.toml) | [link](https://github.com/Codium-ai/pr-agent/pull/910#issuecomment-2118761309) |
|
||||
|
||||
### Evaluation dataset
|
||||
|
||||
- For each tool, we aggregated 100 additional examples to be used for evaluation. These examples were not used in the training dataset, and were manually selected to represent diverse real-world use-cases.
|
||||
- For each test example, we generated two responses: one from the fine-tuned model, and one from the best code model in the world, `gpt-4-turbo-2024-04-09`.
|
||||
|
||||
- We used a third LLM to judge which response better answers the prompt, and will likely be perceived by a human as better response.
|
||||
<br>
|
||||
|
||||
We experimented with three model as judges: `gpt-4-turbo-2024-04-09`, `gpt-4o`, and `claude-3-opus-20240229`. All three produced similar results, with the same ranking order. This strengthens the validity of our testing protocol.
|
||||
The evaluation prompt can be found [here](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/pr_evaluate_prompt_response.toml)
|
||||
|
||||
Here is an example of a judge model feedback:
|
||||
|
||||
```
|
||||
command: improve
|
||||
model1_score: 9,
|
||||
model2_score: 6,
|
||||
why: |
|
||||
Response 1 is better because it provides more actionable and specific suggestions that directly
|
||||
enhance the code's maintainability, performance, and best practices. For example, it suggests
|
||||
using a variable for reusable widget instances and using named routes for navigation, which
|
||||
are practical improvements. In contrast, Response 2 focuses more on general advice and less
|
||||
actionable suggestions, such as changing variable names and adding comments, which are less
|
||||
critical for immediate code improvement."
|
||||
```
|
@ -13,7 +13,7 @@ CodiumAI PR-Agent is an open-source tool to help efficiently review and handle p
|
||||
PR-Agent offers extensive pull request functionalities across various git providers.
|
||||
|
||||
| | | GitHub | Gitlab | Bitbucket | Azure DevOps |
|
||||
|-------|---------------------------------------------------------------------------------------------------------------------|:------:|:------:|:---------:|:------------:|
|
||||
|-------|-----------------------------------------------------------------------------------------------------------------------|:------:|:------:|:---------:|:------------:|
|
||||
| TOOLS | Review | ✅ | ✅ | ✅ | ✅ |
|
||||
| | ⮑ Incremental | ✅ | | | |
|
||||
| | ⮑ [SOC2 Compliance](https://pr-agent-docs.codium.ai/tools/review/#soc2-ticket-compliance){:target="_blank"} 💎 | ✅ | ✅ | ✅ | ✅ |
|
||||
@ -22,7 +22,7 @@ PR-Agent offers extensive pull request functionalities across various git provid
|
||||
| | ⮑ [Inline file summary](https://pr-agent-docs.codium.ai/tools/describe/#inline-file-summary){:target="_blank"} 💎 | ✅ | ✅ | | ✅ |
|
||||
| | Improve | ✅ | ✅ | ✅ | ✅ |
|
||||
| | ⮑ Extended | ✅ | ✅ | ✅ | ✅ |
|
||||
| | [Custom Suggestions](./tools/custom_suggestions.md){:target="_blank"} 💎 | ✅ | ✅ | ✅ | ✅ |
|
||||
| | [Custom Prompt](./tools/custom_prompt.md){:target="_blank"} 💎 | ✅ | ✅ | ✅ | ✅ |
|
||||
| | Reflect and Review | ✅ | ✅ | ✅ | ✅ |
|
||||
| | Update CHANGELOG.md | ✅ | ✅ | ✅ | ️ |
|
||||
| | Find Similar Issue | ✅ | | | ️ |
|
||||
@ -79,35 +79,3 @@ The following diagram illustrates PR-Agent tools and their flow:
|
||||

|
||||
|
||||
Check out the [PR Compression strategy](core-abilities/index.md) page for more details on how we convert a code diff to a manageable LLM prompt
|
||||
|
||||
|
||||
|
||||
## PR-Agent Pro 💎
|
||||
|
||||
[PR-Agent Pro](https://www.codium.ai/pricing/) is a hosted version of PR-Agent, provided by CodiumAI. It is available for a monthly fee, and provides the following benefits:
|
||||
|
||||
1. **Fully managed** - We take care of everything for you - hosting, models, regular updates, and more. Installation is as simple as signing up and adding the PR-Agent app to your GitHub\GitLab\BitBucket repo.
|
||||
2. **Improved privacy** - No data will be stored or used to train models. PR-Agent Pro will employ zero data retention, and will use an OpenAI account with zero data retention.
|
||||
3. **Improved support** - PR-Agent Pro users will receive priority support, and will be able to request new features and capabilities.
|
||||
4. **Extra features** -In addition to the benefits listed above, PR-Agent Pro will emphasize more customization, and the usage of static code analysis, in addition to LLM logic, to improve results. It has the following additional tools and features:
|
||||
- (Tool): [**Analyze PR components**](./tools/analyze.md/)
|
||||
- (Tool): [**Custom Code Suggestions**](./tools/custom_suggestions.md/)
|
||||
- (Tool): [**Tests**](./tools/test.md/)
|
||||
- (Tool): [**PR documentation**](./tools/documentation.md/)
|
||||
- (Tool): [**Improve Component**](https://pr-agent-docs.codium.ai/tools/improve_component/)
|
||||
- (Tool): [**Similar code search**](https://pr-agent-docs.codium.ai/tools/similar_code/)
|
||||
- (Tool): [**CI feedback**](./tools/ci_feedback.md/)
|
||||
- (Feature): [**Interactive triggering**](./usage-guide/automations_and_usage.md/#interactive-triggering)
|
||||
- (Feature): [**SOC2 compliance check**](./tools/review.md/#soc2-ticket-compliance)
|
||||
- (Feature): [**Custom labels**](./tools/describe.md/#handle-custom-labels-from-the-repos-labels-page)
|
||||
- (Feature): [**Global and wiki configuration**](./usage-guide/configuration_options.md/#wiki-configuration-file)
|
||||
- (Feature): [**Inline file summary**](https://pr-agent-docs.codium.ai/tools/describe/#inline-file-summary)
|
||||
|
||||
|
||||
## Data Privacy
|
||||
|
||||
If you host PR-Agent with your OpenAI API key, it is between you and OpenAI. You can read their API data privacy policy here:
|
||||
https://openai.com/enterprise-privacy
|
||||
|
||||
When using PR-Agent Pro 💎, hosted by CodiumAI, we will not store any of your data, nor will we use it for training.
|
||||
You will also benefit from an OpenAI account with zero data retention.
|
||||
|
17
docs/docs/overview/data_privacy.md
Normal file
17
docs/docs/overview/data_privacy.md
Normal file
@ -0,0 +1,17 @@
|
||||
## Self-hosted PR-Agent
|
||||
|
||||
- If you host PR-Agent with your OpenAI API key, it is between you and OpenAI. You can read their API data privacy policy here:
|
||||
https://openai.com/enterprise-privacy
|
||||
|
||||
## PR-Agent Pro 💎
|
||||
|
||||
- When using PR-Agent Pro 💎, hosted by CodiumAI, we will not store any of your data, nor will we use it for training. You will also benefit from an OpenAI account with zero data retention.
|
||||
|
||||
- For certain clients, CodiumAI-hosted PR-Agent Pro will use CodiumAI’s proprietary models. If this is the case, you will be notified.
|
||||
|
||||
- No passive collection of Code and Pull Requests’ data — PR-Agent will be active only when you invoke it, and it will then extract and analyze only data relevant to the executed command and queried pull request.
|
||||
|
||||
|
||||
## PR-Agent Chrome extension
|
||||
|
||||
- The [PR-Agent Chrome extension](https://chromewebstore.google.com/detail/pr-agent-chrome-extension/ephlnjeghhogofkifjloamocljapahnl) serves solely to modify the visual appearance of a GitHub PR screen. It does not transmit any user's repo or pull request code. Code is only sent for processing when a user submits a GitHub comment that activates a PR-Agent tool, in accordance with the standard privacy policy of PR-Agent.
|
81
docs/docs/overview/index.md
Normal file
81
docs/docs/overview/index.md
Normal file
@ -0,0 +1,81 @@
|
||||
# Overview
|
||||
|
||||
CodiumAI PR-Agent is an open-source tool to help efficiently review and handle pull requests.
|
||||
|
||||
- See the [Installation Guide](./installation/index.md) for instructions on installing and running the tool on different git platforms.
|
||||
|
||||
- See the [Usage Guide](./usage-guide/index.md) for instructions on running the PR-Agent commands via different interfaces, including _CLI_, _online usage_, or by _automatically triggering_ them when a new PR is opened.
|
||||
|
||||
- See the [Tools Guide](./tools/index.md) for a detailed description of the different tools.
|
||||
|
||||
|
||||
## PR-Agent Features
|
||||
PR-Agent offers extensive pull request functionalities across various git providers.
|
||||
|
||||
| | | GitHub | Gitlab | Bitbucket | Azure DevOps |
|
||||
|-------|-----------------------------------------------------------------------------------------------------------------------|:------:|:------:|:---------:|:------------:|
|
||||
| TOOLS | Review | ✅ | ✅ | ✅ | ✅ |
|
||||
| | ⮑ Incremental | ✅ | | | |
|
||||
| | ⮑ [SOC2 Compliance](https://pr-agent-docs.codium.ai/tools/review/#soc2-ticket-compliance){:target="_blank"} 💎 | ✅ | ✅ | ✅ | ✅ |
|
||||
| | Ask | ✅ | ✅ | ✅ | ✅ |
|
||||
| | Describe | ✅ | ✅ | ✅ | ✅ |
|
||||
| | ⮑ [Inline file summary](https://pr-agent-docs.codium.ai/tools/describe/#inline-file-summary){:target="_blank"} 💎 | ✅ | ✅ | | ✅ |
|
||||
| | Improve | ✅ | ✅ | ✅ | ✅ |
|
||||
| | ⮑ Extended | ✅ | ✅ | ✅ | ✅ |
|
||||
| | [Custom Prompt](./tools/custom_prompt.md){:target="_blank"} 💎 | ✅ | ✅ | ✅ | ✅ |
|
||||
| | Reflect and Review | ✅ | ✅ | ✅ | ✅ |
|
||||
| | Update CHANGELOG.md | ✅ | ✅ | ✅ | ️ |
|
||||
| | Find Similar Issue | ✅ | | | ️ |
|
||||
| | [Add PR Documentation](./tools/documentation.md){:target="_blank"} 💎 | ✅ | ✅ | | ✅ |
|
||||
| | [Generate Custom Labels](./tools/describe.md#handle-custom-labels-from-the-repos-labels-page-💎){:target="_blank"} 💎 | ✅ | ✅ | | ✅ |
|
||||
| | [Analyze PR Components](./tools/analyze.md){:target="_blank"} 💎 | ✅ | ✅ | | ✅ |
|
||||
| | | | | | ️ |
|
||||
| USAGE | CLI | ✅ | ✅ | ✅ | ✅ |
|
||||
| | App / webhook | ✅ | ✅ | ✅ | ✅ |
|
||||
| | Actions | ✅ | | | ️ |
|
||||
| | | | | |
|
||||
| CORE | PR compression | ✅ | ✅ | ✅ | ✅ |
|
||||
| | Repo language prioritization | ✅ | ✅ | ✅ | ✅ |
|
||||
| | Adaptive and token-aware file patch fitting | ✅ | ✅ | ✅ | ✅ |
|
||||
| | Multiple models support | ✅ | ✅ | ✅ | ✅ |
|
||||
| | Incremental PR review | ✅ | | | |
|
||||
| | [Static code analysis](./tools/analyze.md/){:target="_blank"} 💎 | ✅ | ✅ | ✅ | ✅ |
|
||||
| | [Multiple configuration options](./usage-guide/configuration_options.md){:target="_blank"} 💎 | ✅ | ✅ | ✅ | ✅ |
|
||||
|
||||
💎 marks a feature available only in [PR-Agent Pro](https://www.codium.ai/pricing/){:target="_blank"}
|
||||
|
||||
|
||||
## Example Results
|
||||
<hr>
|
||||
|
||||
#### [/describe](https://github.com/Codium-ai/pr-agent/pull/530)
|
||||
<figure markdown="1">
|
||||
{width=512}
|
||||
</figure>
|
||||
<hr>
|
||||
|
||||
#### [/review](https://github.com/Codium-ai/pr-agent/pull/732#issuecomment-1975099151)
|
||||
<figure markdown="1">
|
||||
{width=512}
|
||||
</figure>
|
||||
<hr>
|
||||
|
||||
#### [/improve](https://github.com/Codium-ai/pr-agent/pull/732#issuecomment-1975099159)
|
||||
<figure markdown="1">
|
||||
{width=512}
|
||||
</figure>
|
||||
<hr>
|
||||
|
||||
#### [/generate_labels](https://github.com/Codium-ai/pr-agent/pull/530)
|
||||
<figure markdown="1">
|
||||
{width=300}
|
||||
</figure>
|
||||
<hr>
|
||||
|
||||
## How it Works
|
||||
|
||||
The following diagram illustrates PR-Agent tools and their flow:
|
||||
|
||||

|
||||
|
||||
Check out the [PR Compression strategy](core-abilities/index.md) page for more details on how we convert a code diff to a manageable LLM prompt
|
18
docs/docs/overview/pr_agent_pro.md
Normal file
18
docs/docs/overview/pr_agent_pro.md
Normal file
@ -0,0 +1,18 @@
|
||||
[PR-Agent Pro](https://www.codium.ai/pricing/) is a hosted version of PR-Agent, provided by CodiumAI. It is available for a monthly fee, and provides the following benefits:
|
||||
|
||||
1. **Fully managed** - We take care of everything for you - hosting, models, regular updates, and more. Installation is as simple as signing up and adding the PR-Agent app to your GitHub\GitLab\BitBucket repo.
|
||||
2. **Improved privacy** - No data will be stored or used to train models. PR-Agent Pro will employ zero data retention, and will use an OpenAI account with zero data retention.
|
||||
3. **Improved support** - PR-Agent Pro users will receive priority support, and will be able to request new features and capabilities.
|
||||
4. **Extra features** -In addition to the benefits listed above, PR-Agent Pro will emphasize more customization, and the usage of static code analysis, in addition to LLM logic, to improve results. It has the following additional tools and features:
|
||||
- (Tool): [**Analyze PR components**](./tools/analyze.md/)
|
||||
- (Tool): [**Custom Prompt Suggestions**](./tools/custom_prompt.md/)
|
||||
- (Tool): [**Tests**](./tools/test.md/)
|
||||
- (Tool): [**PR documentation**](./tools/documentation.md/)
|
||||
- (Tool): [**Improve Component**](https://pr-agent-docs.codium.ai/tools/improve_component/)
|
||||
- (Tool): [**Similar code search**](https://pr-agent-docs.codium.ai/tools/similar_code/)
|
||||
- (Tool): [**CI feedback**](./tools/ci_feedback.md/)
|
||||
- (Feature): [**Interactive triggering**](./usage-guide/automations_and_usage.md/#interactive-triggering)
|
||||
- (Feature): [**SOC2 compliance check**](./tools/review.md/#soc2-ticket-compliance)
|
||||
- (Feature): [**Custom labels**](./tools/describe.md/#handle-custom-labels-from-the-repos-labels-page)
|
||||
- (Feature): [**Global and wiki configuration**](./usage-guide/configuration_options.md/#wiki-configuration-file)
|
||||
- (Feature): [**Inline file summary**](https://pr-agent-docs.codium.ai/tools/describe/#inline-file-summary)
|
@ -1,14 +1,14 @@
|
||||
## Overview
|
||||
The `custom_suggestions` tool scans the PR code changes, and automatically generates suggestions for improving the PR code.
|
||||
It shares similarities with the `improve` tool, but with one main difference: the `custom_suggestions` tool will **only propose suggestions that follow specific guidelines defined by the prompt** in: `pr_custom_suggestions.prompt` configuration.
|
||||
The `custom_prompt` tool scans the PR code changes, and automatically generates suggestions for improving the PR code.
|
||||
It shares similarities with the `improve` tool, but with one main difference: the `custom_prompt` tool will **only propose suggestions that follow specific guidelines defined by the prompt** in: `pr_custom_prompt.prompt` configuration.
|
||||
|
||||
The tool can be triggered [automatically](../usage-guide/automations_and_usage.md#github-app-automatic-tools-when-a-new-pr-is-opened) every time a new PR is opened, or can be invoked manually by commenting on a PR.
|
||||
|
||||
When commenting, use the following template:
|
||||
|
||||
```
|
||||
/custom_suggestions --pr_custom_suggestions.prompt="
|
||||
The suggestions should focus only on the following:
|
||||
/custom_prompt --pr_custom_prompt.prompt="
|
||||
The code suggestions should focus only on the following:
|
||||
- ...
|
||||
- ...
|
||||
|
||||
@ -18,7 +18,7 @@ The suggestions should focus only on the following:
|
||||
With a [configuration file](../usage-guide/automations_and_usage.md#github-app), use the following template:
|
||||
|
||||
```
|
||||
[pr_custom_suggestions]
|
||||
[pr_custom_prompt]
|
||||
prompt="""\
|
||||
The suggestions should focus only on the following:
|
||||
-...
|
||||
@ -34,9 +34,9 @@ You might benefit from several trial-and-error iterations, until you get the cor
|
||||
|
||||
Here is an example of a possible prompt, defined in the configuration file:
|
||||
```
|
||||
[pr_custom_suggestions]
|
||||
[pr_custom_prompt]
|
||||
prompt="""\
|
||||
The suggestions should focus only on the following:
|
||||
The code suggestions should focus only on the following:
|
||||
- look for edge cases when implementing a new function
|
||||
- make sure every variable has a meaningful name
|
||||
- make sure the code is efficient
|
||||
@ -47,15 +47,12 @@ The suggestions should focus only on the following:
|
||||
|
||||
Results obtained with the prompt above:
|
||||
|
||||
[//]: # ({width=512})
|
||||
|
||||
[//]: # (→)
|
||||
{width=768}
|
||||
{width=768}
|
||||
|
||||
## Configuration options
|
||||
|
||||
`prompt`: the prompt for the tool. It should be a multi-line string.
|
||||
|
||||
`num_code_suggestions`: number of code suggestions provided by the 'custom_suggestions' tool. Default is 4.
|
||||
`num_code_suggestions`: number of code suggestions provided by the 'custom_prompt' tool. Default is 4.
|
||||
|
||||
`enable_help_text`: if set to true, the tool will display a help text in the comment. Default is true.
|
@ -44,33 +44,61 @@ publish_labels = ...
|
||||
|
||||
## Configuration options
|
||||
|
||||
### General configurations
|
||||
|
||||
!!! example "Possible configurations"
|
||||
|
||||
- `publish_labels`: if set to true, the tool will publish the labels to the PR. Default is true.
|
||||
<table>
|
||||
<tr>
|
||||
<td><b>publish_labels</b></td>
|
||||
<td>If set to true, the tool will publish the labels to the PR. Default is true.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>publish_description_as_comment</b></td>
|
||||
<td>If set to true, the tool will publish the description as a comment to the PR. If false, it will overwrite the original description. Default is false.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>publish_description_as_comment_persistent</b></td>
|
||||
<td>If set to true and `publish_description_as_comment` is true, the tool will publish the description as a persistent comment to the PR. Default is true.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>add_original_user_description</b></td>
|
||||
<td>If set to true, the tool will add the original user description to the generated description. Default is true.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>generate_ai_title</b></td>
|
||||
<td>If set to true, the tool will also generate an AI title for the PR. Default is false.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>extra_instructions</b></td>
|
||||
<td>Optional extra instructions to the tool. For example: "focus on the changes in the file X. Ignore change in ..."</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>enable_pr_type</b></td>
|
||||
<td>If set to false, it will not show the `PR type` as a text value in the description content. Default is true.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>final_update_message</b></td>
|
||||
<td>If set to true, it will add a comment message [`PR Description updated to latest commit...`](https://github.com/Codium-ai/pr-agent/pull/499#issuecomment-1837412176) after finishing calling `/describe`. Default is false.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>enable_semantic_files_types</b></td>
|
||||
<td>If set to true, "Changes walkthrough" section will be generated. Default is true.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>collapsible_file_list</b></td>
|
||||
<td>If set to true, the file list in the "Changes walkthrough" section will be collapsible. If set to "adaptive", the file list will be collapsible only if there are more than 8 files. Default is "adaptive".</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>enable_large_pr_handling</b></td>
|
||||
<td>Pro feature. If set to true, in case of a large PR the tool will make several calls to the AI and combine them to be able to cover more files. Default is true.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>enable_help_text</b></td>
|
||||
<td>If set to true, the tool will display a help text in the comment. Default is false.</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
- `publish_description_as_comment`: if set to true, the tool will publish the description as a comment to the PR. If false, it will overwrite the original description. Default is false.
|
||||
|
||||
- `publish_description_as_comment_persistent`: if set to true and `publish_description_as_comment` is true, the tool will publish the description as a persistent comment to the PR. Default is true.
|
||||
|
||||
- `add_original_user_description`: if set to true, the tool will add the original user description to the generated description. Default is true.
|
||||
|
||||
- `generate_ai_title`: if set to true, the tool will also generate an AI title for the PR. Default is false.
|
||||
|
||||
- `extra_instructions`: Optional extra instructions to the tool. For example: "focus on the changes in the file X. Ignore change in ...".
|
||||
|
||||
- To enable `custom labels`, apply the configuration changes described [here](./custom_labels.md#configuration-options)
|
||||
|
||||
- `enable_pr_type`: if set to false, it will not show the `PR type` as a text value in the description content. Default is true.
|
||||
|
||||
- `final_update_message`: if set to true, it will add a comment message [`PR Description updated to latest commit...`](https://github.com/Codium-ai/pr-agent/pull/499#issuecomment-1837412176) after finishing calling `/describe`. Default is true.
|
||||
|
||||
- `enable_semantic_files_types`: if set to true, "Changes walkthrough" section will be generated. Default is true.
|
||||
- `collapsible_file_list`: if set to true, the file list in the "Changes walkthrough" section will be collapsible. If set to "adaptive", the file list will be collapsible only if there are more than 8 files. Default is "adaptive".
|
||||
- `enable_help_text`: if set to true, the tool will display a help text in the comment. Default is false.
|
||||
|
||||
### Inline file summary 💎
|
||||
## Inline file summary 💎
|
||||
|
||||
This feature enables you to copy the `changes walkthrough` table to the "Files changed" tab, so you can quickly understand the changes in each file while reviewing the code changes (diff view).
|
||||
|
||||
@ -84,7 +112,7 @@ If you prefer to have the file summaries appear in the "Files changed" tab on ev
|
||||
|
||||
{width=512}
|
||||
|
||||
- `true`: A collapsable file comment with changes title and a changes summary for each file in the PR.
|
||||
- `true`: A collapsible file comment with changes title and a changes summary for each file in the PR.
|
||||
|
||||
{width=512}
|
||||
|
||||
@ -93,7 +121,7 @@ If you prefer to have the file summaries appear in the "Files changed" tab on ev
|
||||
**Note**: that this feature is currently available only for GitHub.
|
||||
|
||||
|
||||
### Markers template
|
||||
## Markers template
|
||||
|
||||
To enable markers, set `pr_description.use_description_markers=true`.
|
||||
Markers enable to easily integrate user's content and auto-generated content, with a template-like mechanism.
|
||||
@ -126,30 +154,33 @@ The marker `pr_agent:type` will be replaced with the PR type, `pr_agent:summary`
|
||||
- `include_generated_by_header`: if set to true, the tool will add a dedicated header: 'Generated by PR Agent at ...' to any automatic content. Default is true.
|
||||
|
||||
## Custom labels
|
||||
|
||||
The default labels of the describe tool are quite generic, since they are meant to be used in any repo: [`Bug fix`, `Tests`, `Enhancement`, `Documentation`, `Other`].
|
||||
|
||||
You can define custom labels that are relevant for your repo and use cases.
|
||||
Custom labels can be defined in a [configuration file](https://pr-agent-docs.codium.ai/tools/custom_labels/#configuration-options), or directly in the repo's [labels page](#handle-custom-labels-from-the-repos-labels-page).
|
||||
|
||||
Examples for custom labels:
|
||||
|
||||
- `Main topic:performance` - pr_agent:The main topic of this PR is performance
|
||||
- `New endpoint` - pr_agent:A new endpoint was added in this PR
|
||||
- `SQL query` - pr_agent:A new SQL query was added in this PR
|
||||
- `Dockerfile changes` - pr_agent:The PR contains changes in the Dockerfile
|
||||
- ...
|
||||
|
||||
The list above is eclectic, and aims to give an idea of different possibilities. Define custom labels that are relevant for your repo and use cases.
|
||||
Note that Labels are not mutually exclusive, so you can add multiple label categories.
|
||||
<br>
|
||||
Make sure to provide proper title, and a detailed and well-phrased description for each label, so the tool will know when to suggest it.
|
||||
Each label description should be a **conditional statement**, that indicates if to add the label to the PR or not, according to the PR content.
|
||||
|
||||
### Handle custom labels from a configuration file
|
||||
Example for a custom labels configuration setup in a configuration file:
|
||||
```
|
||||
[config]
|
||||
enable_custom_labels=true
|
||||
|
||||
|
||||
[custom_labels."sql_changes"]
|
||||
description = "Use when a PR contains changes to SQL queries"
|
||||
|
||||
[custom_labels."test"]
|
||||
description = "use when a PR primarily contains new tests"
|
||||
|
||||
...
|
||||
```
|
||||
|
||||
### Handle custom labels from the Repo's labels page 💎
|
||||
|
||||
You can control the custom labels that will be suggested by the `describe` tool, from the repo's labels page:
|
||||
You can also control the custom labels that will be suggested by the `describe` tool from the repo's labels page:
|
||||
|
||||
* GitHub : go to `https://github.com/{owner}/{repo}/labels` (or click on the "Labels" tab in the issues or PRs page)
|
||||
* GitLab : go to `https://gitlab.com/{owner}/{repo}/-/labels` (or click on "Manage" -> "Labels" on the left menu)
|
||||
@ -159,6 +190,14 @@ Now add/edit the custom labels. they should be formatted as follows:
|
||||
* Label name: The name of the custom label.
|
||||
* Description: Start the description of with prefix `pr_agent:`, for example: `pr_agent: Description of when AI should suggest this label`.<br>
|
||||
|
||||
Examples for custom labels:
|
||||
|
||||
- `Main topic:performance` - pr_agent:The main topic of this PR is performance
|
||||
- `New endpoint` - pr_agent:A new endpoint was added in this PR
|
||||
- `SQL query` - pr_agent:A new SQL query was added in this PR
|
||||
- `Dockerfile changes` - pr_agent:The PR contains changes in the Dockerfile
|
||||
- ...
|
||||
|
||||
The description should be comprehensive and detailed, indicating when to add the desired label. For example:
|
||||
{width=768}
|
||||
|
||||
|
@ -40,58 +40,124 @@ pr_commands = [
|
||||
]
|
||||
|
||||
[pr_code_suggestions]
|
||||
num_code_suggestions = ...
|
||||
num_code_suggestions_per_chunk = ...
|
||||
...
|
||||
```
|
||||
|
||||
- The `pr_commands` lists commands that will be executed automatically when a PR is opened.
|
||||
- The `[pr_code_suggestions]` section contains the configurations for the `improve` tool you want to edit (if any)
|
||||
|
||||
### Extended mode
|
||||
|
||||
An extended mode, which does not involve PR Compression and provides more comprehensive suggestions, can be invoked by commenting on any PR by setting:
|
||||
```
|
||||
[pr_code_suggestions]
|
||||
auto_extended_mode=true
|
||||
```
|
||||
(This mode is true by default).
|
||||
|
||||
Note that the extended mode divides the PR code changes into chunks, up to the token limits, where each chunk is handled separately (might use multiple calls to GPT-4 for large PRs).
|
||||
Hence, the total number of suggestions is proportional to the number of chunks, i.e., the size of the PR.
|
||||
|
||||
### Self-review
|
||||
If you set in a configuration file:
|
||||
```
|
||||
[pr_code_suggestions]
|
||||
demand_code_suggestions_self_review = true
|
||||
```
|
||||
The `improve` tool will add a checkbox below the suggestions, prompting user to acknowledge that they have reviewed the suggestions.
|
||||
You can set the content of the checkbox text via:
|
||||
```
|
||||
[pr_code_suggestions]
|
||||
code_suggestions_self_review_text = "... (your text here) ..."
|
||||
```
|
||||
{width=512}
|
||||
|
||||
💎 In addition, by setting:
|
||||
```
|
||||
[pr_code_suggestions]
|
||||
approve_pr_on_self_review = true
|
||||
```
|
||||
the tool can automatically approve the PR when the user checks the self-review checkbox.
|
||||
|
||||
!!! tip "Demanding self-review from the PR author"
|
||||
If you set the number of required reviewers for a PR to 2, this effectively means that the PR author must click the self-review checkbox before the PR can be merged (in addition to a human reviewer).
|
||||
{width=512}
|
||||
|
||||
|
||||
## Configuration options
|
||||
|
||||
!!! example "General options"
|
||||
|
||||
- `num_code_suggestions`: number of code suggestions provided by the 'improve' tool. Default is 4 for CLI, 0 for auto tools.
|
||||
- `extra_instructions`: Optional extra instructions to the tool. For example: "focus on the changes in the file X. Ignore change in ...".
|
||||
- `rank_suggestions`: if set to true, the tool will rank the suggestions, based on importance. Default is false.
|
||||
- `commitable_code_suggestions`: if set to true, the tool will display the suggestions as commitable code comments. Default is false.
|
||||
- `persistent_comment`: if set to true, the improve comment will be persistent, meaning that every new improve request will edit the previous one. Default is false.
|
||||
- `enable_help_text`: if set to true, the tool will display a help text in the comment. Default is true.
|
||||
|
||||
!!! example "params for '/improve --extended' mode"
|
||||
|
||||
- `auto_extended_mode`: enable extended mode automatically (no need for the `--extended` option). Default is true.
|
||||
- `num_code_suggestions_per_chunk`: number of code suggestions provided by the 'improve' tool, per chunk. Default is 5.
|
||||
- `rank_extended_suggestions`: if set to true, the tool will rank the suggestions, based on importance. Default is true.
|
||||
- `max_number_of_calls`: maximum number of chunks. Default is 5.
|
||||
- `final_clip_factor`: factor to remove suggestions with low confidence. Default is 0.9.;
|
||||
|
||||
## Extended mode
|
||||
|
||||
An extended mode, which does not involve PR Compression and provides more comprehensive suggestions, can be invoked by commenting on any PR:
|
||||
```
|
||||
/improve --extended
|
||||
```
|
||||
|
||||
or by setting:
|
||||
```
|
||||
[pr_code_suggestions]
|
||||
auto_extended_mode=true
|
||||
```
|
||||
(True by default).
|
||||
|
||||
Note that the extended mode divides the PR code changes into chunks, up to the token limits, where each chunk is handled separately (might use multiple calls to GPT-4 for large PRs).
|
||||
Hence, the total number of suggestions is proportional to the number of chunks, i.e., the size of the PR.
|
||||
<table>
|
||||
<tr>
|
||||
<td><b>num_code_suggestions</b></td>
|
||||
<td>Number of code suggestions provided by the 'improve' tool. Default is 4 for CLI, 0 for auto tools.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>extra_instructions</b></td>
|
||||
<td>Optional extra instructions to the tool. For example: "focus on the changes in the file X. Ignore change in ...".</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>rank_suggestions</b></td>
|
||||
<td>If set to true, the tool will rank the suggestions, based on importance. Default is false.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>commitable_code_suggestions</b></td>
|
||||
<td>If set to true, the tool will display the suggestions as commitable code comments. Default is false.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>persistent_comment</b></td>
|
||||
<td>If set to true, the improve comment will be persistent, meaning that every new improve request will edit the previous one. Default is false.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>self_reflect_on_suggestions</b></td>
|
||||
<td>If set to true, the improve tool will calculate an importance score for each suggestion [1-10], and sort the suggestion labels group based on this score. Default is true.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>suggestions_score_threshold</b></td>
|
||||
<td> Any suggestion with importance score less than this threshold will be removed. Default is 0. Highly recommend not to set this value above 7-8, since above it may clip relevant suggestions that can be useful. </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>apply_suggestions_checkbox</b></td>
|
||||
<td> Enable the checkbox to create a committable suggestion. Default is true.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>enable_help_text</b></td>
|
||||
<td>If set to true, the tool will display a help text in the comment. Default is true.</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
!!! example "params for 'extended' mode"
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<td><b>auto_extended_mode</b></td>
|
||||
<td>Enable extended mode automatically (no need for the --extended option). Default is true.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>num_code_suggestions_per_chunk</b></td>
|
||||
<td>Number of code suggestions provided by the 'improve' tool, per chunk. Default is 5.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>rank_extended_suggestions</b></td>
|
||||
<td>If set to true, the tool will rank the suggestions, based on importance. Default is true.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>max_number_of_calls</b></td>
|
||||
<td>Maximum number of chunks. Default is 5.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>final_clip_factor</b></td>
|
||||
<td>Factor to remove suggestions with low confidence. Default is 0.9.</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
## Usage Tips
|
||||
|
||||
!!! tip "Extra instructions"
|
||||
|
||||
Extra instructions are very important for the `imrpove` tool, since they enable you to guide the model to suggestions that are more relevant to the specific needs of the project.
|
||||
Extra instructions are very important for the `improve` tool, since they enable you to guide the model to suggestions that are more relevant to the specific needs of the project.
|
||||
|
||||
Be specific, clear, and concise in the instructions. With extra instructions, you are the prompter. Specify relevant aspects that you want the model to focus on.
|
||||
|
||||
@ -110,7 +176,7 @@ Hence, the total number of suggestions is proportional to the number of chunks,
|
||||
|
||||
!!! tip "Review vs. Improve tools comparison"
|
||||
|
||||
- The [`review`](https://pr-agent-docs.codium.ai/tools/review/) tool includes a section called 'Possible issues', that also provide feedback on the PR Code.
|
||||
- The [review](https://pr-agent-docs.codium.ai/tools/review/) tool includes a section called 'Possible issues', that also provide feedback on the PR Code.
|
||||
In this section, the model is instructed to focus **only** on [major bugs and issues](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/pr_reviewer_prompts.toml#L71).
|
||||
- The `improve` tool, on the other hand, has a broader mandate, and in addition to bugs and issues, it can also give suggestions for improving code quality and making the code more efficient, readable, and maintainable (see [here](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/pr_code_suggestions_prompts.toml#L34)).
|
||||
- Hence, if you are interested only in feedback about clear bugs, the `review` tool might suffice. If you want a more detailed feedback, including broader suggestions for improving the PR code, also enable the `improve` tool to run on each PR.
|
||||
@ -124,6 +190,6 @@ Hence, the total number of suggestions is proportional to the number of chunks,
|
||||
- Only if the `Category` header is relevant, the user should move to the summarized suggestion description
|
||||
- Only if the summarized suggestion description is relevant, the user should click on the collapsible, to read the full suggestion description with a code preview example.
|
||||
|
||||
In addition, we recommend to use the `exra_instructions` field to guide the model to suggestions that are more relevant to the specific needs of the project.
|
||||
In addition, we recommend to use the `extra_instructions` field to guide the model to suggestions that are more relevant to the specific needs of the project.
|
||||
<br>
|
||||
Consider also trying the [Custom Suggestions Tool](./custom_suggestions.md) 💎, that will **only** propose suggestions that follow specific guidelines defined by user.
|
||||
Consider also trying the [Custom Prompt Tool](./custom_prompt.md) 💎, that will **only** propose code suggestions that follow specific guidelines defined by user.
|
||||
|
@ -14,7 +14,7 @@ Here is a list of PR-Agent tools, each with a dedicated page that explains how t
|
||||
| **💎 [Add Documentation (`/add_docs`](./documentation.md))** | Generates documentation to methods/functions/classes that changed in the PR |
|
||||
| **💎 [Generate Custom Labels (`/generate_labels`](./custom_labels.md))** | Generates custom labels for the PR, based on specific guidelines defined by the user |
|
||||
| **💎 [Analyze (`/analyze`](./analyze.md))** | Identify code components that changed in the PR, and enables to interactively generate tests, docs, and code suggestions for each component |
|
||||
| **💎 [Custom Suggestions (`/custom_suggestions`](./custom_suggestions.md))** | Automatically generates custom suggestions for improving the PR code, based on specific guidelines defined by the user |
|
||||
| **💎 [Custom Prompt (`/custom_prompt`](./custom_prompt.md))** | Automatically generates custom suggestions for improving the PR code, based on specific guidelines defined by the user |
|
||||
| **💎 [Generate Tests (`/test component_name`](./test.md))** | Automatically generates unit tests for a selected component, based on the PR code changes |
|
||||
| **💎 [Improve Component (`/improve_component component_name`](./improve_component.md))** | Generates code suggestions for a specific code component that changed in the PR |
|
||||
| **💎 [CI Feedback (`/checks ci_job`](./ci_feedback.md))** | Automatically generates feedback and analysis for a failed CI job |
|
||||
|
@ -1,10 +1,13 @@
|
||||
## Overview
|
||||
The `review` tool scans the PR code changes, and automatically generates a PR review.
|
||||
The `review` tool scans the PR code changes, and generates a list of feedbacks about the PR, aiming to aid the reviewing process.
|
||||
<br>
|
||||
The tool can be triggered automatically every time a new PR is [opened](../usage-guide/automations_and_usage.md#github-app-automatic-tools-when-a-new-pr-is-opened), or can be invoked manually by commenting on any PR:
|
||||
```
|
||||
/review
|
||||
```
|
||||
|
||||
Note that the main purpose of the `review` tool is to provide the **PR reviewer** with useful feedbacks and insights. The PR author, in contrast, may prefer to save time and focus on the output of the [improve](./improve.md) tool, which provides actionable code suggestions.
|
||||
|
||||
## Example usage
|
||||
|
||||
### Manual triggering
|
||||
@ -40,45 +43,6 @@ num_code_suggestions = ...
|
||||
- The `pr_commands` lists commands that will be executed automatically when a PR is opened.
|
||||
- The `[pr_reviewer]` section contains the configurations for the `review` tool you want to edit (if any).
|
||||
|
||||
|
||||
## Configuration options
|
||||
|
||||
### General configurations
|
||||
|
||||
!!! example "General options"
|
||||
- <a name="num_code_suggestions"></a>`num_code_suggestions`: number of code suggestions provided by the 'review' tool. For manual comments, default is 4. For [PR-Agent app](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L142) auto tools, default is 0, meaning no code suggestions will be provided by the review tool, unless you manually edit `pr_commands`.
|
||||
- <a name="inline_code_comments"></a>`inline_code_comments`: if set to true, the tool will publish the code suggestions as comments on the code diff. Default is false.
|
||||
- <a name="persistent_comment"></a>`persistent_comment`: if set to true, the review comment will be persistent, meaning that every new review request will edit the previous one. Default is true.
|
||||
- <a name="extra_instructions"></a>`extra_instructions`: Optional extra instructions to the tool. For example: "focus on the changes in the file X. Ignore change in ...".
|
||||
- <a name="enable_help_text"></a>`enable_help_text`: if set to true, the tool will display a help text in the comment. Default is true.
|
||||
|
||||
!!! example "Enable\\disable sub-sections"
|
||||
You can enable or disable specific sub-sections of the review tool:
|
||||
|
||||
- <a name="require_score_review"></a>`require_score_review`: if set to true, the tool will add a section that scores the PR. Default is false.
|
||||
- <a name="require_tests_review"></a>`require_tests_review`: if set to true, the tool will add a section that checks if the PR contains tests. Default is true.
|
||||
- <a name="require_estimate_effort_to_review"></a>`require_estimate_effort_to_review`: if set to true, the tool will add a section that estimates the effort needed to review the PR. Default is true.
|
||||
- <a name="require_can_be_split_review"></a>`require_can_be_split_review`: if set to true, the tool will add a section that checks if the PR contains several themes, and can be split into smaller PRs. Default is false.
|
||||
|
||||
!!! example "SOC2 ticket compliance 💎"
|
||||
|
||||
This sub-tool checks if the PR description properly contains a ticket to a project management system (e.g., Jira, Asana, Trello, etc.), as required by SOC2 compliance. If not, it will add a label to the PR: "Missing SOC2 ticket".
|
||||
|
||||
- <a name="require_soc2_ticket"></a>`require_soc2_ticket`: If set to true, the SOC2 ticket checker sub-tool will be enabled. Default is false.
|
||||
- <a name="soc2_ticket_prompt"></a>`soc2_ticket_prompt`: The prompt for the SOC2 ticket review. Default is: `Does the PR description include a link to ticket in a project management system (e.g., Jira, Asana, Trello, etc.) ?`. Edit this field if your compliance requirements are different.
|
||||
|
||||
!!! example "Adding PR labels"
|
||||
You can enable the tool to add specific labels to the PR:
|
||||
|
||||
- <a name="enable_review_labels_security"></a>`enable_review_labels_security`: if set to true, the tool will publish a 'possible security issue' label if it detects a security issue. Default is true.
|
||||
- <a name="enable_review_labels_effort"></a>`enable_review_labels_effort`: if set to true, the tool will publish a 'Review effort [1-5]: x' label. Default is true.
|
||||
|
||||
!!! example "Auto-approval"
|
||||
The review tool can approve a PR when a specific comment, `/review auto_approve` is invoked.
|
||||
|
||||
- <a name="enable_auto_approval"></a>`enable_auto_approval`: if set to true, the tool will approve the PR when invoked with the 'auto_approve' command. Default is false. This flag can be changed only from configuration file.
|
||||
- <a name="maximal_review_effort"></a>`maximal_review_effort`: maximal effort level for auto-approval. If the PR's estimated review effort is above this threshold, the auto-approval will not run. Default is 5.
|
||||
|
||||
### Incremental Mode
|
||||
Incremental review only considers changes since the last PR-Agent review. This can be useful when working on the PR in an iterative manner, and you want to focus on the changes since the last review instead of reviewing the entire PR again.
|
||||
For invoking the incremental mode, the following command can be used:
|
||||
@ -89,26 +53,132 @@ Note that the incremental mode is only available for GitHub.
|
||||
|
||||
{width=512}
|
||||
|
||||
### PR Reflection
|
||||
[//]: # (### PR Reflection)
|
||||
|
||||
By invoking:
|
||||
```
|
||||
/reflect_and_review
|
||||
```
|
||||
The tool will first ask the author questions about the PR, and will guide the review based on their answers.
|
||||
[//]: # ()
|
||||
[//]: # (By invoking:)
|
||||
|
||||
{width=512}
|
||||
[//]: # (```)
|
||||
|
||||
{width=512}
|
||||
[//]: # (/reflect_and_review)
|
||||
|
||||
{width=512}
|
||||
[//]: # (```)
|
||||
|
||||
[//]: # (The tool will first ask the author questions about the PR, and will guide the review based on their answers.)
|
||||
|
||||
[//]: # ()
|
||||
[//]: # ({width=512})
|
||||
|
||||
[//]: # ()
|
||||
[//]: # ({width=512})
|
||||
|
||||
[//]: # ()
|
||||
[//]: # ({width=512})
|
||||
|
||||
|
||||
|
||||
## Configuration options
|
||||
|
||||
!!! example "General options"
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<td><b>num_code_suggestions</b></td>
|
||||
<td>Number of code suggestions provided by the 'review' tool. For manual comments, default is 4. For PR-Agent app auto tools, default is 0, meaning no code suggestions will be provided by the review tool, unless you manually edit pr_commands.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>inline_code_comments</b></td>
|
||||
<td>If set to true, the tool will publish the code suggestions as comments on the code diff. Default is false.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>persistent_comment</b></td>
|
||||
<td>If set to true, the review comment will be persistent, meaning that every new review request will edit the previous one. Default is true.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>extra_instructions</b></td>
|
||||
<td>Optional extra instructions to the tool. For example: "focus on the changes in the file X. Ignore change in ...".</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>enable_help_text</b></td>
|
||||
<td>If set to true, the tool will display a help text in the comment. Default is true.</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
!!! example "Enable\\disable specific sub-sections"
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<td><b>require_score_review</b></td>
|
||||
<td>If set to true, the tool will add a section that scores the PR. Default is false.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>require_tests_review</b></td>
|
||||
<td>If set to true, the tool will add a section that checks if the PR contains tests. Default is true.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>require_estimate_effort_to_review</b></td>
|
||||
<td>If set to true, the tool will add a section that estimates the effort needed to review the PR. Default is true.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>require_can_be_split_review</b></td>
|
||||
<td>If set to true, the tool will add a section that checks if the PR contains several themes, and can be split into smaller PRs. Default is false.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>require_security_review</b></td>
|
||||
<td>If set to true, the tool will add a section that checks if the PR contains a possible security or vulnerability issue. Default is true.</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
!!! example "SOC2 ticket compliance 💎"
|
||||
|
||||
This sub-tool checks if the PR description properly contains a ticket to a project management system (e.g., Jira, Asana, Trello, etc.), as required by SOC2 compliance. If not, it will add a label to the PR: "Missing SOC2 ticket".
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<td><b>require_soc2_ticket</b></td>
|
||||
<td>If set to true, the SOC2 ticket checker sub-tool will be enabled. Default is false.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>soc2_ticket_prompt</b></td>
|
||||
<td>The prompt for the SOC2 ticket review. Default is: `Does the PR description include a link to ticket in a project management system (e.g., Jira, Asana, Trello, etc.) ?`. Edit this field if your compliance requirements are different.</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
!!! example "Adding PR labels"
|
||||
|
||||
You can enable\disable the `review` tool to add specific labels to the PR:
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<td><b>enable_review_labels_security</b></td>
|
||||
<td>If set to true, the tool will publish a 'possible security issue' label if it detects a security issue. Default is true.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>enable_review_labels_effort</b></td>
|
||||
<td>If set to true, the tool will publish a 'Review effort [1-5]: x' label. Default is true.</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
!!! example "Auto-approval"
|
||||
|
||||
If enabled, the `review` tool can approve a PR when a specific comment, `/review auto_approve`, is invoked.
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<td><b>enable_auto_approval</b></td>
|
||||
<td>If set to true, the tool will approve the PR when invoked with the 'auto_approve' command. Default is false. This flag can be changed only from configuration file.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>maximal_review_effort</b></td>
|
||||
<td>Maximal effort level for auto-approval. If the PR's estimated review effort is above this threshold, the auto-approval will not run. Default is 5.</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
## Usage Tips
|
||||
|
||||
!!! tip "General guidelines"
|
||||
|
||||
The `review` tool provides a collection of possible feedbacks about a PR.
|
||||
The `review` tool provides a collection of configurable feedbacks about a PR.
|
||||
It is recommended to review the [Configuration options](#configuration-options) section, and choose the relevant options for your use case.
|
||||
|
||||
Some of the features that are disabled by default are quite useful, and should be considered for enabling. For example:
|
||||
@ -124,13 +194,6 @@ The tool will first ask the author questions about the PR, and will guide the re
|
||||
Meaning the `review` tool will run automatically on every PR, without providing code suggestions.
|
||||
Edit this field to enable/disable the tool, or to change the used configurations.
|
||||
|
||||
!!! tip "Code suggestions"
|
||||
|
||||
If you set `num_code_suggestions`>0 , the `review` tool will also provide code suggestions.
|
||||
|
||||
Notice If you are interested **only** in the code suggestions, it is recommended to use the [`improve`](./improve.md) feature instead, since it is a dedicated only to code suggestions, and usually gives better results.
|
||||
Use the `review` tool if you want to get more comprehensive feedback, which includes code suggestions as well.
|
||||
|
||||
!!! tip "Possible labels from the review tool"
|
||||
|
||||
The `review` tool can auto-generate two specific types of labels for a PR:
|
||||
@ -185,3 +248,14 @@ The tool will first ask the author questions about the PR, and will guide the re
|
||||
[pr_reviewer]
|
||||
maximal_review_effort = 5
|
||||
```
|
||||
|
||||
[//]: # (!!! tip "Code suggestions")
|
||||
|
||||
[//]: # ()
|
||||
[//]: # ( If you set `num_code_suggestions`>0 , the `review` tool will also provide code suggestions.)
|
||||
|
||||
[//]: # ( )
|
||||
[//]: # ( Notice If you are interested **only** in the code suggestions, it is recommended to use the [`improve`](./improve.md) feature instead, since it is a dedicated only to code suggestions, and usually gives better results.)
|
||||
|
||||
[//]: # ( Use the `review` tool if you want to get more comprehensive feedback, which includes code suggestions as well.)
|
||||
|
||||
|
@ -33,7 +33,7 @@ environment = "..."
|
||||
These parameters can be obtained by registering to [Pinecone](https://app.pinecone.io/?sessionType=signup/).
|
||||
|
||||
|
||||
## How to use:
|
||||
## How to use
|
||||
- To invoke the 'similar issue' tool from **CLI**, run:
|
||||
`python3 cli.py --issue_url=... similar_issue`
|
||||
|
||||
|
@ -2,20 +2,32 @@
|
||||
|
||||
In some cases, you may want to exclude specific files or directories from the analysis performed by CodiumAI PR-Agent. This can be useful, for example, when you have files that are generated automatically or files that shouldn't be reviewed, like vendored code.
|
||||
|
||||
To ignore files or directories, edit the **[ignore.toml](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/ignore.toml)** configuration file. This setting also exposes the following environment variables:
|
||||
|
||||
You can ignore files or folders using the following methods:
|
||||
- `IGNORE.GLOB`
|
||||
- `IGNORE.REGEX`
|
||||
|
||||
For example, to ignore Python files in a PR with online usage, comment on a PR:
|
||||
`/review --ignore.glob=['*.py']`
|
||||
which you can edit to ignore files or folders based on glob or regex patterns.
|
||||
|
||||
To ignore Python files in all PRs, set in a configuration file:
|
||||
### Example usage
|
||||
|
||||
Let's look at an example where we want to ignore all files with `.py` extension from the analysis.
|
||||
|
||||
To ignore Python files in a PR with online usage, comment on a PR:
|
||||
`/review --ignore.glob="['*.py']"`
|
||||
|
||||
|
||||
To ignore Python files in all PRs using `glob` pattern, set in a configuration file:
|
||||
```
|
||||
[ignore]
|
||||
glob = ['*.py']
|
||||
```
|
||||
|
||||
And to ignore Python files in all PRs using `regex` pattern, set in a configuration file:
|
||||
```
|
||||
[regex]
|
||||
regex = ['.*\.py$']
|
||||
```
|
||||
|
||||
## Extra instructions
|
||||
|
||||
All PR-Agent tools have a parameter called `extra_instructions`, that enables to add free-text extra instructions. Example usage:
|
||||
@ -59,12 +71,12 @@ and set in your configuration file:
|
||||
model="" # the OpenAI model you've deployed on Azure (e.g. gpt-3.5-turbo)
|
||||
```
|
||||
|
||||
### Huggingface
|
||||
### Hugging Face
|
||||
|
||||
**Local**
|
||||
You can run Huggingface models locally through either [VLLM](https://docs.litellm.ai/docs/providers/vllm) or [Ollama](https://docs.litellm.ai/docs/providers/ollama)
|
||||
You can run Hugging Face models locally through either [VLLM](https://docs.litellm.ai/docs/providers/vllm) or [Ollama](https://docs.litellm.ai/docs/providers/ollama)
|
||||
|
||||
E.g. to use a new Huggingface model locally via Ollama, set:
|
||||
E.g. to use a new Hugging Face model locally via Ollama, set:
|
||||
```
|
||||
[__init__.py]
|
||||
MAX_TOKENS = {
|
||||
@ -82,14 +94,14 @@ model = "ollama/llama2"
|
||||
model_turbo = "ollama/llama2"
|
||||
|
||||
[ollama] # in .secrets.toml
|
||||
api_base = ... # the base url for your huggingface inference endpoint
|
||||
api_base = ... # the base url for your Hugging Face inference endpoint
|
||||
# e.g. if running Ollama locally, you may use:
|
||||
api_base = "http://localhost:11434/"
|
||||
```
|
||||
|
||||
### Inference Endpoints
|
||||
|
||||
To use a new model with Huggingface Inference Endpoints, for example, set:
|
||||
To use a new model with Hugging Face Inference Endpoints, for example, set:
|
||||
```
|
||||
[__init__.py]
|
||||
MAX_TOKENS = {
|
||||
@ -105,8 +117,8 @@ model = "huggingface/meta-llama/Llama-2-7b-chat-hf"
|
||||
model_turbo = "huggingface/meta-llama/Llama-2-7b-chat-hf"
|
||||
|
||||
[huggingface] # in .secrets.toml
|
||||
key = ... # your huggingface api key
|
||||
api_base = ... # the base url for your huggingface inference endpoint
|
||||
key = ... # your Hugging Face api key
|
||||
api_base = ... # the base url for your Hugging Face inference endpoint
|
||||
```
|
||||
(you can obtain a Llama2 key from [here](https://replicate.com/replicate/llama-2-70b-chat/api))
|
||||
|
||||
|
@ -81,7 +81,7 @@ Every time you run the `describe` tool, including automatic runs, the PR title w
|
||||
To cancel the automatic run of all the tools, set:
|
||||
```
|
||||
[github_app]
|
||||
handle_pr_actions = []
|
||||
pr_commands = []
|
||||
```
|
||||
|
||||
You can also disable automatic runs for PRs with specific titles, by setting the `ignore_pr_titles` parameter with the relevant regex. For example:
|
||||
@ -118,7 +118,6 @@ Specifically, start by setting the following environment variables:
|
||||
github_action_config.auto_review: "true" # enable\disable auto review
|
||||
github_action_config.auto_describe: "true" # enable\disable auto describe
|
||||
github_action_config.auto_improve: "true" # enable\disable auto improve
|
||||
github_action_config.enable_output: "true" # enable\disable github actions output parameter
|
||||
```
|
||||
`github_action_config.auto_review`, `github_action_config.auto_describe` and `github_action_config.auto_improve` are used to enable/disable automatic tools that run when a new PR is opened.
|
||||
If not set, the default configuration is for all three tools to run automatically when a new PR is opened.
|
||||
@ -137,7 +136,7 @@ publish_labels = false
|
||||
to prevent PR-Agent from publishing labels when running the `describe` tool.
|
||||
|
||||
## GitLab Webhook
|
||||
After setting up a GitLab webhook, to control which commands will run automatically when a new PR is opened, you can set the `pr_commands` parameter in the configuration file, similar to the GitHub App:
|
||||
After setting up a GitLab webhook, to control which commands will run automatically when a new MR is opened, you can set the `pr_commands` parameter in the configuration file, similar to the GitHub App:
|
||||
```
|
||||
[gitlab]
|
||||
pr_commands = [
|
||||
@ -147,6 +146,20 @@ pr_commands = [
|
||||
]
|
||||
```
|
||||
|
||||
the GitLab webhook can also respond to new code that is pushed to an open MR.
|
||||
The configuration toggle `handle_push_trigger` can be used to enable this feature.
|
||||
The configuration parameter `push_commands` defines the list of tools that will be **run automatically** when new code is pushed to the MR.
|
||||
```
|
||||
[gitlab]
|
||||
handle_push_trigger = true
|
||||
push_commands = [
|
||||
"/describe",
|
||||
"/review --pr_reviewer.num_code_suggestions=0 --pr_reviewer.final_update_message=false",
|
||||
]
|
||||
```
|
||||
|
||||
Note that to use the 'handle_push_trigger' feature, you need to give the gitlab webhook also the "Push events" scope.
|
||||
|
||||
## BitBucket App
|
||||
Similar to GitHub app, when running PR-Agent from BitBucket App, the default [configuration file](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml) from a pre-built docker will be initially loaded.
|
||||
|
||||
@ -169,9 +182,11 @@ Specifically, set the following values:
|
||||
[bitbucket_app]
|
||||
pr_commands = [
|
||||
"/review --pr_reviewer.num_code_suggestions=0",
|
||||
"/improve --pr_code_suggestions.commitable_code_suggestions=true",
|
||||
"/improve --pr_code_suggestions.commitable_code_suggestions=true --pr_code_suggestions.suggestions_score_threshold=7",
|
||||
]
|
||||
```
|
||||
Note that we set specifically for bitbucket, we recommend using: `--pr_code_suggestions.suggestions_score_threshold=7` and that is the default value we set for bitbucket.
|
||||
Since this platform only supports inline code suggestions, we want to limit the number of suggestions, and only present a limited number.
|
||||
|
||||
## Azure DevOps provider
|
||||
|
||||
|
@ -11,9 +11,14 @@ There are three ways to set persistent configurations:
|
||||
|
||||
In terms of precedence, wiki configurations will override local configurations, and local configurations will override global configurations.
|
||||
|
||||
!!! tip "Tip1: edit only what you need"
|
||||
Your configuration file should be minimal, and edit only the relevant values. Don't copy the entire configuration options, since it can lead to legacy problems when something changes.
|
||||
!!! tip "Tip2: show relevant configurations"
|
||||
If you set `config.output_relevant_configurations=true`, each tool will also output in a collapsible section its relevant configurations. This can be useful for debugging, or getting to know the configurations better.
|
||||
|
||||
## Wiki configuration file 💎
|
||||
|
||||
Specifically for GitHub, with PR-Agent-Pro you can set configurations by creating a page called `.pr_agent.toml` in the [wiki](https://github.com/Codium-ai/pr-agent/wiki/pr_agent.toml) of the repo.
|
||||
For GitHub and GitLab, with PR-Agent-Pro you can set configurations by creating a page called `.pr_agent.toml` in the [wiki](https://github.com/Codium-ai/pr-agent/wiki/pr_agent.toml) of the repo.
|
||||
The advantage of this method is that it allows to set configurations without needing to commit new content to the repo - just edit the wiki page and **save**.
|
||||
|
||||
{width=512}
|
||||
|
@ -1,17 +1,17 @@
|
||||
|
||||
After [installation](https://codium-ai.github.io/Docs-PR-Agent/installation/), there are three basic ways to invoke CodiumAI PR-Agent:
|
||||
After [installation](https://pr-agent-docs.codium.ai/installation/), there are three basic ways to invoke CodiumAI PR-Agent:
|
||||
|
||||
1. Locally running a CLI command
|
||||
2. Online usage - by [commenting](https://github.com/Codium-ai/pr-agent/pull/229#issuecomment-1695021901) on a PR
|
||||
3. Enabling PR-Agent tools to run automatically when a new PR is opened
|
||||
|
||||
|
||||
Specifically, CLI commands can be issued by invoking a pre-built [docker image](https://codium-ai.github.io/Docs-PR-Agent/installation/#run-from-source), or by invoking a [locally cloned repo](https://codium-ai.github.io/Docs-PR-Agent/installation/#locally).
|
||||
For online usage, you will need to setup either a [GitHub App](https://codium-ai.github.io/Docs-PR-Agent/installation/#run-as-a-github-app), or a [GitHub Action](https://codium-ai.github.io/Docs-PR-Agent/installation/#run-as-a-github-action).
|
||||
Specifically, CLI commands can be issued by invoking a pre-built [docker image](https://pr-agent-docs.codium.ai/installation/locally/#using-docker-image), or by invoking a [locally cloned repo](https://pr-agent-docs.codium.ai/installation/locally/#run-from-source).
|
||||
For online usage, you will need to setup either a [GitHub App](https://pr-agent-docs.codium.ai/installation/github/#run-as-a-github-app), or a [GitHub Action](https://pr-agent-docs.codium.ai/installation/github/#run-as-a-github-action).
|
||||
GitHub App and GitHub Action also enable to run PR-Agent specific tool automatically when a new PR is opened.
|
||||
|
||||
|
||||
**git provider**: The [git_provider](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L4) field in the configuration file determines the GIT provider that will be used by PR-Agent. Currently, the following providers are supported:
|
||||
**git provider**: The [git_provider](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L5) field in the configuration file determines the GIT provider that will be used by PR-Agent. Currently, the following providers are supported:
|
||||
`
|
||||
"github", "gitlab", "bitbucket", "azure", "codecommit", "local", "gerrit"
|
||||
`
|
||||
|
@ -3,7 +3,10 @@ repo_url: https://github.com/Codium-ai/pr-agent
|
||||
repo_name: Codium-ai/pr-agent
|
||||
|
||||
nav:
|
||||
- Overview: 'index.md'
|
||||
- Overview:
|
||||
- 'index.md'
|
||||
- 💎 PR-Agent Pro: 'overview/pr_agent_pro.md'
|
||||
- Data Privacy: 'overview/data_privacy.md'
|
||||
- Installation:
|
||||
- 'installation/index.md'
|
||||
- Locally: 'installation/locally.md'
|
||||
@ -33,10 +36,12 @@ nav:
|
||||
- 💎 Improve Component: 'tools/improve_component.md'
|
||||
- 💎 Documentation: 'tools/documentation.md'
|
||||
- 💎 Custom Labels: 'tools/custom_labels.md'
|
||||
- 💎 Custom Suggestions: 'tools/custom_suggestions.md'
|
||||
- 💎 Custom Prompt: 'tools/custom_prompt.md'
|
||||
- 💎 CI Feedback: 'tools/ci_feedback.md'
|
||||
- 💎 Similar Code: 'tools/similar_code.md'
|
||||
- Core Abilities: 'core-abilities/index.md'
|
||||
- Chrome Extension: 'chrome-extension/index.md'
|
||||
- Code Fine-tuning Benchmark: 'finetuning_benchmark/index.md'
|
||||
|
||||
theme:
|
||||
logo: assets/logo.svg
|
||||
@ -56,7 +61,7 @@ theme:
|
||||
- content.tabs.link
|
||||
- content.code.annotation
|
||||
- content.code.copy
|
||||
- toc.integrate
|
||||
- content.tabs.link
|
||||
language: en
|
||||
custom_dir: overrides
|
||||
|
||||
@ -126,8 +131,9 @@ markdown_extensions:
|
||||
emoji_generator: !!python/name:material.extensions.emoji.to_svg
|
||||
- toc:
|
||||
title: On this page
|
||||
toc_depth: 3
|
||||
toc_depth: 2
|
||||
permalink: true
|
||||
|
||||
|
||||
copyright: |
|
||||
© 2024 <a href="https://www.codium.ai/" target="_blank" rel="noopener">CodiumAI</a>
|
||||
|
@ -46,6 +46,7 @@ command2class = {
|
||||
|
||||
commands = list(command2class.keys())
|
||||
|
||||
|
||||
class PRAgent:
|
||||
def __init__(self, ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):
|
||||
self.ai_handler = ai_handler # will be initialized in run_action
|
||||
@ -68,7 +69,9 @@ class PRAgent:
|
||||
for forbidden_arg in self.forbidden_cli_args:
|
||||
for arg in args:
|
||||
if forbidden_arg in arg:
|
||||
get_logger().error(f"CLI argument for param '{forbidden_arg}' is forbidden. Use instead a configuration file.")
|
||||
get_logger().error(
|
||||
f"CLI argument for param '{forbidden_arg}' is forbidden. Use instead a configuration file."
|
||||
)
|
||||
return False
|
||||
args = update_settings_from_args(args)
|
||||
|
||||
@ -94,4 +97,3 @@ class PRAgent:
|
||||
else:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
@ -11,6 +11,8 @@ MAX_TOKENS = {
|
||||
'gpt-4-32k': 32000,
|
||||
'gpt-4-1106-preview': 128000, # 128K, but may be limited by config.max_model_tokens
|
||||
'gpt-4-0125-preview': 128000, # 128K, but may be limited by config.max_model_tokens
|
||||
'gpt-4o': 128000, # 128K, but may be limited by config.max_model_tokens
|
||||
'gpt-4o-2024-05-13': 128000, # 128K, but may be limited by config.max_model_tokens
|
||||
'gpt-4-turbo-preview': 128000, # 128K, but may be limited by config.max_model_tokens
|
||||
'gpt-4-turbo-2024-04-09': 128000, # 128K, but may be limited by config.max_model_tokens
|
||||
'gpt-4-turbo': 128000, # 128K, but may be limited by config.max_model_tokens
|
||||
@ -21,17 +23,25 @@ MAX_TOKENS = {
|
||||
'meta-llama/Llama-2-7b-chat-hf': 4096,
|
||||
'vertex_ai/codechat-bison': 6144,
|
||||
'vertex_ai/codechat-bison-32k': 32000,
|
||||
'vertex_ai/claude-3-haiku@20240307': 100000,
|
||||
'vertex_ai/claude-3-sonnet@20240229': 100000,
|
||||
'vertex_ai/claude-3-opus@20240229': 100000,
|
||||
'vertex_ai/claude-3-5-sonnet@20240620': 100000,
|
||||
'vertex_ai/gemini-1.5-pro': 1048576,
|
||||
'codechat-bison': 6144,
|
||||
'codechat-bison-32k': 32000,
|
||||
'anthropic.claude-instant-v1': 100000,
|
||||
'anthropic.claude-v1': 100000,
|
||||
'anthropic.claude-v2': 100000,
|
||||
'anthropic/claude-3-opus-20240229': 100000,
|
||||
'anthropic/claude-3-5-sonnet-20240620': 100000,
|
||||
'bedrock/anthropic.claude-instant-v1': 100000,
|
||||
'bedrock/anthropic.claude-v2': 100000,
|
||||
'bedrock/anthropic.claude-v2:1': 100000,
|
||||
'bedrock/anthropic.claude-3-sonnet-20240229-v1:0': 100000,
|
||||
'bedrock/anthropic.claude-3-haiku-20240307-v1:0': 100000,
|
||||
'bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0': 100000,
|
||||
'groq/llama3-8b-8192': 8192,
|
||||
'groq/llama3-70b-8192': 8192,
|
||||
'ollama/llama3': 4096,
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class BaseAiHandler(ABC):
|
||||
"""
|
||||
This class defines the interface for an AI handler to be used by the PR Agents.
|
||||
@ -25,4 +26,3 @@ class BaseAiHandler(ABC):
|
||||
temperature (float): the temperature to use for the chat completion
|
||||
"""
|
||||
pass
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
try:
|
||||
from langchain.chat_models import ChatOpenAI, AzureChatOpenAI
|
||||
from langchain.schema import SystemMessage, HumanMessage
|
||||
from langchain_openai import ChatOpenAI, AzureChatOpenAI
|
||||
from langchain_core.messages import SystemMessage, HumanMessage
|
||||
except: # we don't enforce langchain as a dependency, so if it's not installed, just move on
|
||||
pass
|
||||
|
||||
@ -8,12 +8,13 @@ from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
|
||||
from pr_agent.config_loader import get_settings
|
||||
from pr_agent.log import get_logger
|
||||
|
||||
from openai.error import APIError, RateLimitError, Timeout, TryAgain
|
||||
from openai import APIError, RateLimitError, Timeout
|
||||
from retry import retry
|
||||
import functools
|
||||
|
||||
OPENAI_RETRIES = 5
|
||||
|
||||
|
||||
class LangChainOpenAIHandler(BaseAiHandler):
|
||||
def __init__(self):
|
||||
# Initialize OpenAIHandler specific attributes here
|
||||
@ -30,20 +31,24 @@ class LangChainOpenAIHandler(BaseAiHandler):
|
||||
openai_api_version=get_settings().openai.api_version,
|
||||
)
|
||||
else:
|
||||
# for llms that compatible with openai, should use custom api base
|
||||
openai_api_base = get_settings().get("OPENAI.API_BASE", None)
|
||||
if openai_api_base is None or len(openai_api_base) == 0:
|
||||
self._chat = ChatOpenAI(openai_api_key=get_settings().openai.key)
|
||||
else:
|
||||
self._chat = ChatOpenAI(openai_api_key=get_settings().openai.key, openai_api_base=openai_api_base)
|
||||
except AttributeError as e:
|
||||
if getattr(e, "name"):
|
||||
raise ValueError(f"OpenAI {e.name} is required") from e
|
||||
else:
|
||||
raise e
|
||||
|
||||
@property
|
||||
def chat(self):
|
||||
def chat(self, messages: list, model: str, temperature: float):
|
||||
if self.azure:
|
||||
# we must set the deployment_id only here (instead of the __init__ method) to support fallback_deployments
|
||||
return self._chat(deployment_name=self.deployment_id)
|
||||
return self._chat.invoke(input = messages, model=model, temperature=temperature, deployment_name=self.deployment_id)
|
||||
else:
|
||||
return self._chat
|
||||
return self._chat.invoke(input = messages, model=model, temperature=temperature)
|
||||
|
||||
@property
|
||||
def deployment_id(self):
|
||||
@ -51,7 +56,8 @@ class LangChainOpenAIHandler(BaseAiHandler):
|
||||
Returns the deployment ID for the OpenAI API.
|
||||
"""
|
||||
return get_settings().get("OPENAI.DEPLOYMENT_ID", None)
|
||||
@retry(exceptions=(APIError, Timeout, TryAgain, AttributeError, RateLimitError),
|
||||
|
||||
@retry(exceptions=(APIError, Timeout, AttributeError, RateLimitError),
|
||||
tries=OPENAI_RETRIES, delay=2, backoff=2, jitter=(1, 3))
|
||||
async def chat_completion(self, model: str, system: str, user: str, temperature: float = 0.2):
|
||||
try:
|
||||
|
@ -25,12 +25,18 @@ class LiteLLMAIHandler(BaseAiHandler):
|
||||
Raises a ValueError if the OpenAI key is missing.
|
||||
"""
|
||||
self.azure = False
|
||||
self.aws_bedrock_client = None
|
||||
self.api_base = None
|
||||
self.repetition_penalty = None
|
||||
if get_settings().get("OPENAI.KEY", None):
|
||||
openai.api_key = get_settings().openai.key
|
||||
litellm.openai_key = get_settings().openai.key
|
||||
elif 'OPENAI_API_KEY' not in os.environ:
|
||||
litellm.api_key = "dummy_key"
|
||||
if get_settings().get("aws.AWS_ACCESS_KEY_ID"):
|
||||
assert get_settings().aws.AWS_SECRET_ACCESS_KEY and get_settings().aws.AWS_REGION_NAME, "AWS credentials are incomplete"
|
||||
os.environ["AWS_ACCESS_KEY_ID"] = get_settings().aws.AWS_ACCESS_KEY_ID
|
||||
os.environ["AWS_SECRET_ACCESS_KEY"] = get_settings().aws.AWS_SECRET_ACCESS_KEY
|
||||
os.environ["AWS_REGION_NAME"] = get_settings().aws.AWS_REGION_NAME
|
||||
if get_settings().get("litellm.use_client"):
|
||||
litellm_token = get_settings().get("litellm.LITELLM_TOKEN")
|
||||
assert litellm_token, "LITELLM_TOKEN is required"
|
||||
@ -64,21 +70,13 @@ class LiteLLMAIHandler(BaseAiHandler):
|
||||
if get_settings().get("OLLAMA.API_BASE", None):
|
||||
litellm.api_base = get_settings().ollama.api_base
|
||||
self.api_base = get_settings().ollama.api_base
|
||||
if get_settings().get("HUGGINGFACE.REPITITION_PENALTY", None):
|
||||
if get_settings().get("HUGGINGFACE.REPETITION_PENALTY", None):
|
||||
self.repetition_penalty = float(get_settings().huggingface.repetition_penalty)
|
||||
if get_settings().get("VERTEXAI.VERTEX_PROJECT", None):
|
||||
litellm.vertex_project = get_settings().vertexai.vertex_project
|
||||
litellm.vertex_location = get_settings().get(
|
||||
"VERTEXAI.VERTEX_LOCATION", None
|
||||
)
|
||||
if get_settings().get("AWS.BEDROCK_REGION", None):
|
||||
litellm.AmazonAnthropicConfig.max_tokens_to_sample = 2000
|
||||
litellm.AmazonAnthropicClaude3Config.max_tokens = 2000
|
||||
self.aws_bedrock_client = boto3.client(
|
||||
service_name="bedrock-runtime",
|
||||
region_name=get_settings().aws.bedrock_region,
|
||||
)
|
||||
|
||||
def prepare_logs(self, response, system, user, resp, finish_reason):
|
||||
response_log = response.dict().copy()
|
||||
response_log['system'] = system
|
||||
@ -99,7 +97,7 @@ class LiteLLMAIHandler(BaseAiHandler):
|
||||
return get_settings().get("OPENAI.DEPLOYMENT_ID", None)
|
||||
|
||||
@retry(
|
||||
retry=retry_if_exception_type((openai.APIError, openai.APIConnectionError, openai.Timeout)), # No retry on RateLimitError
|
||||
retry=retry_if_exception_type((openai.APIError, openai.APIConnectionError, openai.APITimeoutError)), # No retry on RateLimitError
|
||||
stop=stop_after_attempt(OPENAI_RETRIES)
|
||||
)
|
||||
async def chat_completion(self, model: str, system: str, user: str, temperature: float = 0.2, img_path: str = None):
|
||||
@ -131,8 +129,6 @@ class LiteLLMAIHandler(BaseAiHandler):
|
||||
"force_timeout": get_settings().config.ai_timeout,
|
||||
"api_base": self.api_base,
|
||||
}
|
||||
if self.aws_bedrock_client:
|
||||
kwargs["aws_bedrock_client"] = self.aws_bedrock_client
|
||||
if self.repetition_penalty:
|
||||
kwargs["repetition_penalty"] = self.repetition_penalty
|
||||
|
||||
@ -143,7 +139,7 @@ class LiteLLMAIHandler(BaseAiHandler):
|
||||
get_logger().info(f"\nUser prompt:\n{user}")
|
||||
|
||||
response = await acompletion(**kwargs)
|
||||
except (openai.APIError, openai.Timeout) as e:
|
||||
except (openai.APIError, openai.APITimeoutError) as e:
|
||||
get_logger().error("Error during OpenAI inference: ", e)
|
||||
raise
|
||||
except (openai.RateLimitError) as e:
|
||||
|
@ -28,6 +28,7 @@ class OpenAIHandler(BaseAiHandler):
|
||||
|
||||
except AttributeError as e:
|
||||
raise ValueError("OpenAI key is required") from e
|
||||
|
||||
@property
|
||||
def deployment_id(self):
|
||||
"""
|
||||
|
@ -3,7 +3,8 @@ import re
|
||||
|
||||
from pr_agent.config_loader import get_settings
|
||||
|
||||
def filter_ignored(files):
|
||||
|
||||
def filter_ignored(files, platform = 'github'):
|
||||
"""
|
||||
Filter out files that match the ignore patterns.
|
||||
"""
|
||||
@ -27,8 +28,16 @@ def filter_ignored(files):
|
||||
pass
|
||||
|
||||
# keep filenames that _don't_ match the ignore regex
|
||||
if files and isinstance(files, list):
|
||||
for r in compiled_patterns:
|
||||
if platform == 'github':
|
||||
files = [f for f in files if (f.filename and not r.match(f.filename))]
|
||||
elif platform == 'bitbucket':
|
||||
files = [f for f in files if (f.new.path and not r.match(f.new.path))]
|
||||
elif platform == 'gitlab':
|
||||
files = [f for f in files if (f['new_path'] and not r.match(f['new_path']))]
|
||||
elif platform == 'azure':
|
||||
files = [f for f in files if not r.match(f)]
|
||||
|
||||
except Exception as e:
|
||||
print(f"Could not filter file list: {e}")
|
||||
|
@ -23,7 +23,10 @@ def extend_patch(original_file_str, patch_str, num_lines) -> str:
|
||||
return patch_str
|
||||
|
||||
if type(original_file_str) == bytes:
|
||||
try:
|
||||
original_file_str = original_file_str.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
return ""
|
||||
|
||||
original_lines = original_file_str.splitlines()
|
||||
patch_lines = patch_str.splitlines()
|
||||
@ -133,7 +136,7 @@ def handle_patch_deletions(patch: str, original_file_content_str: str,
|
||||
str: The modified patch with deletion hunks omitted.
|
||||
|
||||
"""
|
||||
if not new_file_content_str and edit_type != EDIT_TYPE.ADDED:
|
||||
if not new_file_content_str and (edit_type == EDIT_TYPE.DELETED or edit_type == EDIT_TYPE.UNKNOWN):
|
||||
# logic for handling deleted files - don't show patch, just show that the file was deleted
|
||||
if get_settings().config.verbosity_level > 0:
|
||||
get_logger().info(f"Processing file: {file_name}, minimizing deletion file")
|
||||
|
@ -5,17 +5,20 @@ from pr_agent.config_loader import get_settings
|
||||
|
||||
|
||||
|
||||
|
||||
def filter_bad_extensions(files):
|
||||
# Bad Extensions, source: https://github.com/EleutherAI/github-downloader/blob/345e7c4cbb9e0dc8a0615fd995a08bf9d73b3fe6/download_repo_text.py # noqa: E501
|
||||
bad_extensions = get_settings().bad_extensions.default
|
||||
if get_settings().config.use_extra_bad_extensions:
|
||||
bad_extensions += get_settings().bad_extensions.extra
|
||||
return [f for f in files if f.filename is not None and is_valid_file(f.filename, bad_extensions)]
|
||||
|
||||
|
||||
def filter_bad_extensions(files):
|
||||
return [f for f in files if f.filename is not None and is_valid_file(f.filename)]
|
||||
|
||||
|
||||
def is_valid_file(filename):
|
||||
def is_valid_file(filename, bad_extensions=None):
|
||||
if not bad_extensions:
|
||||
bad_extensions = get_settings().bad_extensions.default
|
||||
if get_settings().config.use_extra_bad_extensions:
|
||||
bad_extensions += get_settings().bad_extensions.extra
|
||||
return filename.split('.')[-1] not in bad_extensions
|
||||
|
||||
|
||||
|
@ -21,29 +21,17 @@ MORE_MODIFIED_FILES_ = "Additional modified files (insufficient token budget to
|
||||
|
||||
ADDED_FILES_ = "Additional added files (insufficient token budget to process):\n"
|
||||
|
||||
OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD = 1000
|
||||
OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD = 600
|
||||
OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD = 1500
|
||||
OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD = 1000
|
||||
|
||||
def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: str,
|
||||
add_line_numbers_to_hunks: bool = False, disable_extra_lines: bool = False) -> str:
|
||||
"""
|
||||
Returns a string with the diff of the pull request, applying diff minimization techniques if needed.
|
||||
|
||||
Args:
|
||||
git_provider (GitProvider): An object of the GitProvider class representing the Git provider used for the pull
|
||||
request.
|
||||
token_handler (TokenHandler): An object of the TokenHandler class used for handling tokens in the context of the
|
||||
pull request.
|
||||
model (str): The name of the model used for tokenization.
|
||||
add_line_numbers_to_hunks (bool, optional): A boolean indicating whether to add line numbers to the hunks in the
|
||||
diff. Defaults to False.
|
||||
disable_extra_lines (bool, optional): A boolean indicating whether to disable the extension of each patch with
|
||||
extra lines of context. Defaults to False.
|
||||
|
||||
Returns:
|
||||
str: A string with the diff of the pull request, applying diff minimization techniques if needed.
|
||||
"""
|
||||
|
||||
def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler,
|
||||
model: str,
|
||||
add_line_numbers_to_hunks: bool = False,
|
||||
disable_extra_lines: bool = False,
|
||||
large_pr_handling=False,
|
||||
return_remaining_files=False):
|
||||
if disable_extra_lines:
|
||||
PATCH_EXTRA_LINES = 0
|
||||
else:
|
||||
@ -87,37 +75,100 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: s
|
||||
# if we are over the limit, start pruning
|
||||
get_logger().info(f"Tokens: {total_tokens}, total tokens over limit: {get_max_tokens(model)}, "
|
||||
f"pruning diff.")
|
||||
patches_compressed, modified_file_names, deleted_file_names, added_file_names, total_tokens_new = \
|
||||
pr_generate_compressed_diff(pr_languages, token_handler, model, add_line_numbers_to_hunks)
|
||||
patches_compressed_list, total_tokens_list, deleted_files_list, remaining_files_list, file_dict, files_in_patches_list = \
|
||||
pr_generate_compressed_diff(pr_languages, token_handler, model, add_line_numbers_to_hunks, large_pr_handling)
|
||||
|
||||
if large_pr_handling and len(patches_compressed_list) > 1:
|
||||
get_logger().info(f"Large PR handling mode, and found {len(patches_compressed_list)} patches with original diff.")
|
||||
return "" # return empty string, as we generate multiple patches with a different prompt
|
||||
|
||||
# return the first patch
|
||||
patches_compressed = patches_compressed_list[0]
|
||||
total_tokens_new = total_tokens_list[0]
|
||||
files_in_patch = files_in_patches_list[0]
|
||||
|
||||
# Insert additional information about added, modified, and deleted files if there is enough space
|
||||
max_tokens = get_max_tokens(model) - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD
|
||||
curr_token = total_tokens_new # == token_handler.count_tokens(final_diff)+token_handler.prompt_tokens
|
||||
final_diff = "\n".join(patches_compressed)
|
||||
delta_tokens = 10
|
||||
if added_file_names and (max_tokens - curr_token) > delta_tokens:
|
||||
added_list_str = ADDED_FILES_ + "\n".join(added_file_names)
|
||||
added_list_str = modified_list_str = deleted_list_str = ""
|
||||
unprocessed_files = []
|
||||
# generate the added, modified, and deleted files lists
|
||||
if (max_tokens - curr_token) > delta_tokens:
|
||||
for filename, file_values in file_dict.items():
|
||||
if filename in files_in_patch:
|
||||
continue
|
||||
if file_values['edit_type'] == EDIT_TYPE.ADDED:
|
||||
unprocessed_files.append(filename)
|
||||
if not added_list_str:
|
||||
added_list_str = ADDED_FILES_ + f"\n{filename}"
|
||||
else:
|
||||
added_list_str = added_list_str + f"\n{filename}"
|
||||
elif file_values['edit_type'] == EDIT_TYPE.MODIFIED or EDIT_TYPE.RENAMED:
|
||||
unprocessed_files.append(filename)
|
||||
if not modified_list_str:
|
||||
modified_list_str = MORE_MODIFIED_FILES_ + f"\n{filename}"
|
||||
else:
|
||||
modified_list_str = modified_list_str + f"\n{filename}"
|
||||
elif file_values['edit_type'] == EDIT_TYPE.DELETED:
|
||||
# unprocessed_files.append(filename) # not needed here, because the file was deleted, so no need to process it
|
||||
if not deleted_list_str:
|
||||
deleted_list_str = DELETED_FILES_ + f"\n{filename}"
|
||||
else:
|
||||
deleted_list_str = deleted_list_str + f"\n{filename}"
|
||||
|
||||
# prune the added, modified, and deleted files lists, and add them to the final diff
|
||||
added_list_str = clip_tokens(added_list_str, max_tokens - curr_token)
|
||||
if added_list_str:
|
||||
final_diff = final_diff + "\n\n" + added_list_str
|
||||
curr_token += token_handler.count_tokens(added_list_str) + 2
|
||||
if modified_file_names and (max_tokens - curr_token) > delta_tokens:
|
||||
modified_list_str = MORE_MODIFIED_FILES_ + "\n".join(modified_file_names)
|
||||
modified_list_str = clip_tokens(modified_list_str, max_tokens - curr_token)
|
||||
if modified_list_str:
|
||||
final_diff = final_diff + "\n\n" + modified_list_str
|
||||
curr_token += token_handler.count_tokens(modified_list_str) + 2
|
||||
if deleted_file_names and (max_tokens - curr_token) > delta_tokens:
|
||||
deleted_list_str = DELETED_FILES_ + "\n".join(deleted_file_names)
|
||||
deleted_list_str = clip_tokens(deleted_list_str, max_tokens - curr_token)
|
||||
if deleted_list_str:
|
||||
final_diff = final_diff + "\n\n" + deleted_list_str
|
||||
try:
|
||||
|
||||
get_logger().debug(f"After pruning, added_list_str: {added_list_str}, modified_list_str: {modified_list_str}, "
|
||||
f"deleted_list_str: {deleted_list_str}")
|
||||
if not return_remaining_files:
|
||||
return final_diff
|
||||
else:
|
||||
return final_diff, remaining_files_list
|
||||
|
||||
|
||||
def get_pr_diff_multiple_patchs(git_provider: GitProvider, token_handler: TokenHandler, model: str,
|
||||
add_line_numbers_to_hunks: bool = False, disable_extra_lines: bool = False):
|
||||
try:
|
||||
diff_files_original = git_provider.get_diff_files()
|
||||
except RateLimitExceededException as e:
|
||||
get_logger().error(f"Rate limit exceeded for git provider API. original message {e}")
|
||||
raise
|
||||
|
||||
diff_files = filter_ignored(diff_files_original)
|
||||
if diff_files != diff_files_original:
|
||||
try:
|
||||
get_logger().info(f"Filtered out {len(diff_files_original) - len(diff_files)} files")
|
||||
new_names = set([a.filename for a in diff_files])
|
||||
orig_names = set([a.filename for a in diff_files_original])
|
||||
get_logger().info(f"Filtered out files: {orig_names - new_names}")
|
||||
except Exception as e:
|
||||
pass
|
||||
return final_diff
|
||||
|
||||
# get pr languages
|
||||
pr_languages = sort_files_by_main_languages(git_provider.get_languages(), diff_files)
|
||||
if pr_languages:
|
||||
try:
|
||||
get_logger().info(f"PR main language: {pr_languages[0]['language']}")
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
patches_compressed_list, total_tokens_list, deleted_files_list, remaining_files_list, file_dict, files_in_patches_list = \
|
||||
pr_generate_compressed_diff(pr_languages, token_handler, model, add_line_numbers_to_hunks, large_pr_handling=True)
|
||||
|
||||
return patches_compressed_list, total_tokens_list, deleted_files_list, remaining_files_list, file_dict, files_in_patches_list
|
||||
|
||||
|
||||
def pr_generate_extended_diff(pr_languages: list,
|
||||
@ -146,6 +197,9 @@ def pr_generate_extended_diff(pr_languages: list,
|
||||
|
||||
# extend each patch with extra lines of context
|
||||
extended_patch = extend_patch(original_file_content_str, patch, num_lines=patch_extra_lines)
|
||||
if not extended_patch:
|
||||
get_logger().warning(f"Failed to extend patch for file: {file.filename}")
|
||||
continue
|
||||
full_extended_patch = f"\n\n## {file.filename}\n\n{extended_patch}\n"
|
||||
|
||||
if add_line_numbers_to_hunks:
|
||||
@ -161,41 +215,17 @@ def pr_generate_extended_diff(pr_languages: list,
|
||||
|
||||
|
||||
def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, model: str,
|
||||
convert_hunks_to_line_numbers: bool) -> Tuple[list, list, list, list, int]:
|
||||
"""
|
||||
Generate a compressed diff string for a pull request, using diff minimization techniques to reduce the number of
|
||||
tokens used.
|
||||
Args:
|
||||
top_langs (list): A list of dictionaries representing the languages used in the pull request and their
|
||||
corresponding files.
|
||||
token_handler (TokenHandler): An object of the TokenHandler class used for handling tokens in the context of the
|
||||
pull request.
|
||||
model (str): The model used for tokenization.
|
||||
convert_hunks_to_line_numbers (bool): A boolean indicating whether to convert hunks to line numbers in the diff.
|
||||
Returns:
|
||||
Tuple[list, list, list]: A tuple containing the following lists:
|
||||
- patches: A list of compressed diff patches for each file in the pull request.
|
||||
- modified_files_list: A list of file names that were skipped due to large patch size.
|
||||
- deleted_files_list: A list of file names that were deleted in the pull request.
|
||||
|
||||
Minimization techniques to reduce the number of tokens:
|
||||
0. Start from the largest diff patch to smaller ones
|
||||
1. Don't use extend context lines around diff
|
||||
2. Minimize deleted files
|
||||
3. Minimize deleted hunks
|
||||
4. Minimize all remaining files when you reach token limit
|
||||
"""
|
||||
|
||||
patches = []
|
||||
added_files_list = []
|
||||
modified_files_list = []
|
||||
convert_hunks_to_line_numbers: bool,
|
||||
large_pr_handling: bool) -> Tuple[list, list, list, list, dict, list]:
|
||||
deleted_files_list = []
|
||||
|
||||
# sort each one of the languages in top_langs by the number of tokens in the diff
|
||||
sorted_files = []
|
||||
for lang in top_langs:
|
||||
sorted_files.extend(sorted(lang['files'], key=lambda x: x.tokens, reverse=True))
|
||||
|
||||
total_tokens = token_handler.prompt_tokens
|
||||
# generate patches for each file, and count tokens
|
||||
file_dict = {}
|
||||
for file in sorted_files:
|
||||
original_file_content_str = file.base_file
|
||||
new_file_content_str = file.head_file
|
||||
@ -207,55 +237,87 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo
|
||||
patch = handle_patch_deletions(patch, original_file_content_str,
|
||||
new_file_content_str, file.filename, file.edit_type)
|
||||
if patch is None:
|
||||
# if not deleted_files_list:
|
||||
# total_tokens += token_handler.count_tokens(DELETED_FILES_)
|
||||
if file.filename not in deleted_files_list:
|
||||
deleted_files_list.append(file.filename)
|
||||
# total_tokens += token_handler.count_tokens(file.filename) + 1
|
||||
continue
|
||||
|
||||
if convert_hunks_to_line_numbers:
|
||||
patch = convert_to_hunks_with_lines_numbers(patch, file)
|
||||
|
||||
new_patch_tokens = token_handler.count_tokens(patch)
|
||||
file_dict[file.filename] = {'patch': patch, 'tokens': new_patch_tokens, 'edit_type': file.edit_type}
|
||||
|
||||
max_tokens_model = get_max_tokens(model)
|
||||
|
||||
# first iteration
|
||||
files_in_patches_list = []
|
||||
remaining_files_list = [file.filename for file in sorted_files]
|
||||
patches_list =[]
|
||||
total_tokens_list = []
|
||||
total_tokens, patches, remaining_files_list, files_in_patch_list = generate_full_patch(convert_hunks_to_line_numbers, file_dict,
|
||||
max_tokens_model, remaining_files_list, token_handler)
|
||||
patches_list.append(patches)
|
||||
total_tokens_list.append(total_tokens)
|
||||
files_in_patches_list.append(files_in_patch_list)
|
||||
|
||||
# additional iterations (if needed)
|
||||
if large_pr_handling:
|
||||
NUMBER_OF_ALLOWED_ITERATIONS = get_settings().pr_description.max_ai_calls - 1 # one more call is to summarize
|
||||
for i in range(NUMBER_OF_ALLOWED_ITERATIONS-1):
|
||||
if remaining_files_list:
|
||||
total_tokens, patches, remaining_files_list, files_in_patch_list = generate_full_patch(convert_hunks_to_line_numbers,
|
||||
file_dict,
|
||||
max_tokens_model,
|
||||
remaining_files_list, token_handler)
|
||||
if patches:
|
||||
patches_list.append(patches)
|
||||
total_tokens_list.append(total_tokens)
|
||||
files_in_patches_list.append(files_in_patch_list)
|
||||
else:
|
||||
break
|
||||
|
||||
return patches_list, total_tokens_list, deleted_files_list, remaining_files_list, file_dict, files_in_patches_list
|
||||
|
||||
|
||||
def generate_full_patch(convert_hunks_to_line_numbers, file_dict, max_tokens_model,remaining_files_list_prev, token_handler):
|
||||
total_tokens = token_handler.prompt_tokens # initial tokens
|
||||
patches = []
|
||||
remaining_files_list_new = []
|
||||
files_in_patch_list = []
|
||||
for filename, data in file_dict.items():
|
||||
if filename not in remaining_files_list_prev:
|
||||
continue
|
||||
|
||||
patch = data['patch']
|
||||
new_patch_tokens = data['tokens']
|
||||
edit_type = data['edit_type']
|
||||
|
||||
# Hard Stop, no more tokens
|
||||
if total_tokens > get_max_tokens(model) - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD:
|
||||
get_logger().warning(f"File was fully skipped, no more tokens: {file.filename}.")
|
||||
if total_tokens > max_tokens_model - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD:
|
||||
get_logger().warning(f"File was fully skipped, no more tokens: {filename}.")
|
||||
continue
|
||||
|
||||
# If the patch is too large, just show the file name
|
||||
if total_tokens + new_patch_tokens > get_max_tokens(model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD:
|
||||
if total_tokens + new_patch_tokens > max_tokens_model - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD:
|
||||
# Current logic is to skip the patch if it's too large
|
||||
# TODO: Option for alternative logic to remove hunks from the patch to reduce the number of tokens
|
||||
# until we meet the requirements
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
get_logger().warning(f"Patch too large, minimizing it, {file.filename}")
|
||||
if file.edit_type == EDIT_TYPE.ADDED:
|
||||
# if not added_files_list:
|
||||
# total_tokens += token_handler.count_tokens(ADDED_FILES_)
|
||||
if file.filename not in added_files_list:
|
||||
added_files_list.append(file.filename)
|
||||
# total_tokens += token_handler.count_tokens(file.filename) + 1
|
||||
else:
|
||||
# if not modified_files_list:
|
||||
# total_tokens += token_handler.count_tokens(MORE_MODIFIED_FILES_)
|
||||
if file.filename not in modified_files_list:
|
||||
modified_files_list.append(file.filename)
|
||||
# total_tokens += token_handler.count_tokens(file.filename) + 1
|
||||
get_logger().warning(f"Patch too large, skipping it, {filename}")
|
||||
remaining_files_list_new.append(filename)
|
||||
continue
|
||||
|
||||
if patch:
|
||||
if not convert_hunks_to_line_numbers:
|
||||
patch_final = f"\n\n## file: '{file.filename.strip()}\n\n{patch.strip()}\n'"
|
||||
patch_final = f"\n\n## file: '{filename.strip()}\n\n{patch.strip()}\n'"
|
||||
else:
|
||||
patch_final = "\n\n" + patch.strip()
|
||||
patches.append(patch_final)
|
||||
total_tokens += token_handler.count_tokens(patch_final)
|
||||
files_in_patch_list.append(filename)
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
get_logger().info(f"Tokens: {total_tokens}, last filename: {file.filename}")
|
||||
|
||||
return patches, modified_files_list, deleted_files_list, added_files_list, total_tokens
|
||||
get_logger().info(f"Tokens: {total_tokens}, last filename: {filename}")
|
||||
return total_tokens, patches, remaining_files_list_new, files_in_patch_list
|
||||
|
||||
|
||||
async def retry_with_fallback_models(f: Callable, model_type: ModelType = ModelType.REGULAR):
|
||||
@ -374,7 +436,23 @@ def get_pr_multi_diffs(git_provider: GitProvider,
|
||||
patch = convert_to_hunks_with_lines_numbers(patch, file)
|
||||
new_patch_tokens = token_handler.count_tokens(patch)
|
||||
|
||||
if patch and (token_handler.prompt_tokens + new_patch_tokens) > get_max_tokens(model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD:
|
||||
if patch and (token_handler.prompt_tokens + new_patch_tokens) > get_max_tokens(
|
||||
model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD:
|
||||
if get_settings().config.get('large_patch_policy', 'skip') == 'skip':
|
||||
get_logger().warning(f"Patch too large, skipping: {file.filename}")
|
||||
continue
|
||||
elif get_settings().config.get('large_patch_policy') == 'clip':
|
||||
delta_tokens = get_max_tokens(model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD - token_handler.prompt_tokens
|
||||
patch_clipped = clip_tokens(patch, delta_tokens, delete_last_line=True, num_input_tokens=new_patch_tokens)
|
||||
new_patch_tokens = token_handler.count_tokens(patch_clipped)
|
||||
if patch_clipped and (token_handler.prompt_tokens + new_patch_tokens) > get_max_tokens(
|
||||
model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD:
|
||||
get_logger().warning(f"Patch too large, skipping: {file.filename}")
|
||||
continue
|
||||
else:
|
||||
get_logger().info(f"Clipped large patch for file: {file.filename}")
|
||||
patch = patch_clipped
|
||||
else:
|
||||
get_logger().warning(f"Patch too large, skipping: {file.filename}")
|
||||
continue
|
||||
|
||||
|
@ -5,6 +5,7 @@ import json
|
||||
import os
|
||||
import re
|
||||
import textwrap
|
||||
import time
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Any, List, Tuple
|
||||
@ -22,6 +23,12 @@ class ModelType(str, Enum):
|
||||
REGULAR = "regular"
|
||||
TURBO = "turbo"
|
||||
|
||||
|
||||
class PRReviewHeader(str, Enum):
|
||||
REGULAR = "## PR Reviewer Guide"
|
||||
INCREMENTAL = "## Incremental PR Reviewer Guide"
|
||||
|
||||
|
||||
def get_setting(key: str) -> Any:
|
||||
try:
|
||||
key = key.upper()
|
||||
@ -30,7 +37,7 @@ def get_setting(key: str) -> Any:
|
||||
return global_settings.get(key, None)
|
||||
|
||||
|
||||
def emphasize_header(text: str) -> str:
|
||||
def emphasize_header(text: str, only_markdown=False) -> str:
|
||||
try:
|
||||
# Finding the position of the first occurrence of ": "
|
||||
colon_position = text.find(": ")
|
||||
@ -38,7 +45,10 @@ def emphasize_header(text: str) -> str:
|
||||
# Splitting the string and wrapping the first part in <strong> tags
|
||||
if colon_position != -1:
|
||||
# Everything before the colon (inclusive) is wrapped in <strong> tags
|
||||
transformed_string = "<strong>" + text[:colon_position + 1] + "</strong>" + text[colon_position + 1:]
|
||||
if only_markdown:
|
||||
transformed_string = f"**{text[:colon_position + 1]}**\n" + text[colon_position + 1:]
|
||||
else:
|
||||
transformed_string = "<strong>" + text[:colon_position + 1] + "</strong>" +'<br>' + text[colon_position + 1:]
|
||||
else:
|
||||
# If there's no ": ", return the original string
|
||||
transformed_string = text
|
||||
@ -60,8 +70,7 @@ def unique_strings(input_list: List[str]) -> List[str]:
|
||||
seen.add(item)
|
||||
return unique_list
|
||||
|
||||
|
||||
def convert_to_markdown(output_data: dict, gfm_supported: bool = True, incremental_review=None) -> str:
|
||||
def convert_to_markdown_v2(output_data: dict, gfm_supported: bool = True, incremental_review=None) -> str:
|
||||
"""
|
||||
Convert a dictionary of data into markdown format.
|
||||
Args:
|
||||
@ -73,9 +82,11 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
|
||||
emojis = {
|
||||
"Can be split": "🔀",
|
||||
"Possible issues": "⚡",
|
||||
"Key issues to review": "⚡",
|
||||
"Score": "🏅",
|
||||
"Relevant tests": "🧪",
|
||||
"Focused PR": "✨",
|
||||
"Relevant ticket": "🎫",
|
||||
"Security concerns": "🔒",
|
||||
"Insights from user's answers": "📝",
|
||||
"Code feedback": "🤖",
|
||||
@ -83,58 +94,117 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
|
||||
}
|
||||
markdown_text = ""
|
||||
if not incremental_review:
|
||||
markdown_text += f"## PR Review 🔍\n\n"
|
||||
markdown_text += f"{PRReviewHeader.REGULAR.value} 🔍\n\n"
|
||||
else:
|
||||
markdown_text += f"## Incremental PR Review 🔍 \n\n"
|
||||
markdown_text += f"{PRReviewHeader.INCREMENTAL.value} 🔍\n\n"
|
||||
markdown_text += f"⏮️ Review for commits since previous PR-Agent review {incremental_review}.\n\n"
|
||||
if gfm_supported:
|
||||
markdown_text += "<table>\n<tr>\n"
|
||||
# markdown_text += """<td> Feedback </td> <td></td></tr>"""
|
||||
|
||||
if not output_data or not output_data.get('review', {}):
|
||||
return ""
|
||||
|
||||
if gfm_supported:
|
||||
markdown_text += "<table>\n"
|
||||
|
||||
for key, value in output_data['review'].items():
|
||||
if value is None or value == '' or value == {} or value == []:
|
||||
if key.lower() != 'can_be_split':
|
||||
continue
|
||||
key_nice = key.replace('_', ' ').capitalize()
|
||||
emoji = emojis.get(key_nice, "")
|
||||
if gfm_supported:
|
||||
if 'Estimated effort to review' in key_nice:
|
||||
key_nice = 'Estimated effort to review [1-5]'
|
||||
if 'security concerns' in key_nice.lower():
|
||||
key_nice = 'Estimated effort to review'
|
||||
if value.isnumeric():
|
||||
value_int = int(value)
|
||||
else:
|
||||
try:
|
||||
value_int = int(value.split(',')[0])
|
||||
except ValueError:
|
||||
continue
|
||||
blue_bars = '🔵' * value_int
|
||||
white_bars = '⚪' * (5 - value_int)
|
||||
value = f"{value_int} {blue_bars}{white_bars}"
|
||||
if gfm_supported:
|
||||
markdown_text += f"<tr><td>"
|
||||
markdown_text += f"{emoji} <strong>{key_nice}</strong>: {value}"
|
||||
markdown_text += f"</td></tr>\n"
|
||||
else:
|
||||
markdown_text += f"### {emoji} {key_nice}: {value}\n\n"
|
||||
elif 'relevant tests' in key_nice.lower():
|
||||
value = value.strip().lower()
|
||||
if gfm_supported:
|
||||
markdown_text += f"<tr><td>"
|
||||
if is_value_no(value):
|
||||
markdown_text += f"{emoji} <strong>No relevant tests</strong>"
|
||||
else:
|
||||
markdown_text += f"{emoji} <strong>PR contains tests</strong>"
|
||||
markdown_text += f"</td></tr>\n"
|
||||
else:
|
||||
if gfm_supported:
|
||||
markdown_text += f"<tr><td>"
|
||||
if is_value_no(value):
|
||||
markdown_text += f"{emoji} <strong>No relevant tests</strong>"
|
||||
else:
|
||||
markdown_text += f"{emoji} <strong>PR contains tests</strong>"
|
||||
else:
|
||||
if is_value_no(value):
|
||||
markdown_text += f'### {emoji} No relevant tests\n\n'
|
||||
else:
|
||||
markdown_text += f"### PR contains tests\n\n"
|
||||
elif 'security concerns' in key_nice.lower():
|
||||
if gfm_supported:
|
||||
markdown_text += f"<tr><td>"
|
||||
if is_value_no(value):
|
||||
markdown_text += f"{emoji} <strong>No security concerns identified</strong>"
|
||||
else:
|
||||
markdown_text += f"{emoji} <strong>Security concerns</strong><br><br>\n\n"
|
||||
value = emphasize_header(value.strip())
|
||||
markdown_text += f"<tr><td> {emoji} <strong>{key_nice}</strong></td><td>\n\n{value}\n\n</td></tr>\n"
|
||||
markdown_text += f"{value}"
|
||||
markdown_text += f"</td></tr>\n"
|
||||
else:
|
||||
if is_value_no(value):
|
||||
markdown_text += f'### {emoji} No security concerns identified\n\n'
|
||||
else:
|
||||
markdown_text += f"### {emoji} Security concerns\n\n"
|
||||
value = emphasize_header(value.strip())
|
||||
markdown_text += f"{value}\n\n"
|
||||
elif 'can be split' in key_nice.lower():
|
||||
if gfm_supported:
|
||||
markdown_text += f"<tr><td>"
|
||||
markdown_text += process_can_be_split(emoji, value)
|
||||
elif 'possible issues' in key_nice.lower():
|
||||
markdown_text += f"</td></tr>\n"
|
||||
elif 'key issues to review' in key_nice.lower():
|
||||
value = value.strip()
|
||||
if is_value_no(value):
|
||||
if gfm_supported:
|
||||
markdown_text += f"<tr><td>"
|
||||
markdown_text += f"{emoji} <strong>No key issues to review</strong>"
|
||||
markdown_text += f"</td></tr>\n"
|
||||
else:
|
||||
markdown_text += f"### {emoji} No key issues to review\n\n"
|
||||
else:
|
||||
issues = value.split('\n- ')
|
||||
for i, _ in enumerate(issues):
|
||||
issues[i] = issues[i].strip().strip('-').strip()
|
||||
issues = unique_strings(issues) # remove duplicates
|
||||
number_of_issues = len(issues)
|
||||
if number_of_issues > 1:
|
||||
markdown_text += f"<tr><td rowspan={number_of_issues}> {emoji} <strong>{key_nice}</strong></td>\n"
|
||||
if gfm_supported:
|
||||
markdown_text += f"<tr><td>"
|
||||
markdown_text += f"{emoji} <strong>{key_nice}</strong><br><br>\n\n"
|
||||
else:
|
||||
markdown_text += f"### {emoji} Key issues to review:\n\n"
|
||||
for i, issue in enumerate(issues):
|
||||
if not issue:
|
||||
continue
|
||||
issue = emphasize_header(issue)
|
||||
if i == 0:
|
||||
markdown_text += f"<td>\n\n{issue}</td></tr>\n"
|
||||
issue = emphasize_header(issue, only_markdown=True)
|
||||
markdown_text += f"{issue}\n\n"
|
||||
if gfm_supported:
|
||||
markdown_text += f"</td></tr>\n"
|
||||
else:
|
||||
markdown_text += f"<tr>\n<td>\n\n{issue}</td></tr>\n"
|
||||
if gfm_supported:
|
||||
markdown_text += f"<tr><td>"
|
||||
markdown_text += f"{emoji} <strong>{key_nice}</strong>: {value}"
|
||||
markdown_text += f"</td></tr>\n"
|
||||
else:
|
||||
value = emphasize_header(value.strip('-').strip())
|
||||
markdown_text += f"<tr><td> {emoji} <strong>{key_nice}</strong></td><td>\n\n{value}\n\n</td></tr>\n"
|
||||
else:
|
||||
markdown_text += f"<tr><td> {emoji} <strong>{key_nice}</strong></td><td>\n\n{value}\n\n</td></tr>\n"
|
||||
else:
|
||||
if len(value.split()) > 1:
|
||||
markdown_text += f"{emoji} **{key_nice}:**\n\n {value}\n\n"
|
||||
else:
|
||||
markdown_text += f"{emoji} **{key_nice}:** {value}\n\n"
|
||||
markdown_text += f"### {emoji} {key_nice}: {value}\n\n"
|
||||
|
||||
if gfm_supported:
|
||||
markdown_text += "</table>\n"
|
||||
|
||||
@ -144,7 +214,7 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
|
||||
markdown_text += f"<details><summary> <strong>Code feedback:</strong></summary>\n\n"
|
||||
markdown_text += "<hr>"
|
||||
else:
|
||||
markdown_text += f"\n\n** Code feedback:**\n\n"
|
||||
markdown_text += f"\n\n### Code feedback:\n\n"
|
||||
for i, value in enumerate(output_data['code_feedback']):
|
||||
if value is None or value == '' or value == {} or value == []:
|
||||
continue
|
||||
@ -153,41 +223,61 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
|
||||
markdown_text= markdown_text[:-4]
|
||||
if gfm_supported:
|
||||
markdown_text += f"</details>"
|
||||
#print(markdown_text)
|
||||
|
||||
|
||||
return markdown_text
|
||||
|
||||
|
||||
def process_can_be_split(emoji, value):
|
||||
try:
|
||||
# key_nice = "Can this PR be split?"
|
||||
key_nice = "Multiple PR themes"
|
||||
markdown_text = ""
|
||||
if not value or isinstance(value, list) and len(value) == 1:
|
||||
value = "No"
|
||||
markdown_text += f"<tr><td> {emoji} <strong>{key_nice}</strong></td><td>\n\n{value}\n\n</td></tr>\n"
|
||||
# markdown_text += f"<tr><td> {emoji} <strong>{key_nice}</strong></td><td>\n\n{value}\n\n</td></tr>\n"
|
||||
# markdown_text += f"### {emoji} No multiple PR themes\n\n"
|
||||
markdown_text += f"{emoji} <strong>No multiple PR themes</strong>\n\n"
|
||||
else:
|
||||
number_of_splits = len(value)
|
||||
markdown_text += f"<tr><td rowspan={number_of_splits}> {emoji} <strong>{key_nice}</strong></td>\n"
|
||||
markdown_text += f"{emoji} <strong>{key_nice}</strong><br><br>\n\n"
|
||||
for i, split in enumerate(value):
|
||||
title = split.get('title', '')
|
||||
relevant_files = split.get('relevant_files', [])
|
||||
if i == 0:
|
||||
markdown_text += f"<td><details><summary>\nSub-PR theme: <strong>{title}</strong></summary>\n\n"
|
||||
markdown_text += f"<hr>\n"
|
||||
markdown_text += f"Relevant files:\n"
|
||||
markdown_text += f"<ul>\n"
|
||||
markdown_text += f"<details><summary>\nSub-PR theme: <b>{title}</b></summary>\n\n"
|
||||
markdown_text += f"___\n\nRelevant files:\n\n"
|
||||
for file in relevant_files:
|
||||
markdown_text += f"<li>{file}</li>\n"
|
||||
markdown_text += f"</ul>\n\n</details></td></tr>\n"
|
||||
else:
|
||||
markdown_text += f"<tr>\n<td><details><summary>\nSub-PR theme: <strong>{title}</strong></summary>\n\n"
|
||||
markdown_text += f"<hr>\n"
|
||||
markdown_text += f"Relevant files:\n"
|
||||
markdown_text += f"<ul>\n"
|
||||
for file in relevant_files:
|
||||
markdown_text += f"<li>{file}</li>\n"
|
||||
markdown_text += f"</ul>\n\n</details></td></tr>\n"
|
||||
markdown_text += f"- {file}\n"
|
||||
markdown_text += f"___\n\n"
|
||||
markdown_text += f"</details>\n\n"
|
||||
|
||||
# markdown_text += f"#### Sub-PR theme: {title}\n\n"
|
||||
# markdown_text += f"Relevant files:\n\n"
|
||||
# for file in relevant_files:
|
||||
# markdown_text += f"- {file}\n"
|
||||
# markdown_text += "\n"
|
||||
# number_of_splits = len(value)
|
||||
# markdown_text += f"<tr><td rowspan={number_of_splits}> {emoji} <strong>{key_nice}</strong></td>\n"
|
||||
# for i, split in enumerate(value):
|
||||
# title = split.get('title', '')
|
||||
# relevant_files = split.get('relevant_files', [])
|
||||
# if i == 0:
|
||||
# markdown_text += f"<td><details><summary>\nSub-PR theme:<br><strong>{title}</strong></summary>\n\n"
|
||||
# markdown_text += f"<hr>\n"
|
||||
# markdown_text += f"Relevant files:\n"
|
||||
# markdown_text += f"<ul>\n"
|
||||
# for file in relevant_files:
|
||||
# markdown_text += f"<li>{file}</li>\n"
|
||||
# markdown_text += f"</ul>\n\n</details></td></tr>\n"
|
||||
# else:
|
||||
# markdown_text += f"<tr>\n<td><details><summary>\nSub-PR theme:<br><strong>{title}</strong></summary>\n\n"
|
||||
# markdown_text += f"<hr>\n"
|
||||
# markdown_text += f"Relevant files:\n"
|
||||
# markdown_text += f"<ul>\n"
|
||||
# for file in relevant_files:
|
||||
# markdown_text += f"<li>{file}</li>\n"
|
||||
# markdown_text += f"</ul>\n\n</details></td></tr>\n"
|
||||
except Exception as e:
|
||||
get_logger().exception(f"Failed to process can be split: {e}")
|
||||
return ""
|
||||
return markdown_text
|
||||
|
||||
|
||||
@ -356,7 +446,7 @@ def convert_str_to_datetime(date_str):
|
||||
return datetime.strptime(date_str, datetime_format)
|
||||
|
||||
|
||||
def load_large_diff(filename, new_file_content_str: str, original_file_content_str: str) -> str:
|
||||
def load_large_diff(filename, new_file_content_str: str, original_file_content_str: str, show_warning: bool = True) -> str:
|
||||
"""
|
||||
Generate a patch for a modified file by comparing the original content of the file with the new content provided as
|
||||
input.
|
||||
@ -375,7 +465,7 @@ def load_large_diff(filename, new_file_content_str: str, original_file_content_s
|
||||
try:
|
||||
diff = difflib.unified_diff(original_file_content_str.splitlines(keepends=True),
|
||||
new_file_content_str.splitlines(keepends=True))
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
if get_settings().config.verbosity_level >= 2 and show_warning:
|
||||
get_logger().warning(f"File was modified, but no patch was found. Manually creating patch: {filename}.")
|
||||
patch = ''.join(diff)
|
||||
except Exception:
|
||||
@ -429,25 +519,28 @@ def _fix_key_value(key: str, value: str):
|
||||
return key, value
|
||||
|
||||
|
||||
def load_yaml(response_text: str, keys_fix_yaml: List[str] = []) -> dict:
|
||||
def load_yaml(response_text: str, keys_fix_yaml: List[str] = [], first_key="", last_key="") -> dict:
|
||||
response_text = response_text.removeprefix('```yaml').rstrip('`')
|
||||
try:
|
||||
data = yaml.safe_load(response_text)
|
||||
except Exception as e:
|
||||
get_logger().error(f"Failed to parse AI prediction: {e}")
|
||||
data = try_fix_yaml(response_text, keys_fix_yaml=keys_fix_yaml)
|
||||
data = try_fix_yaml(response_text, keys_fix_yaml=keys_fix_yaml, first_key=first_key, last_key=last_key)
|
||||
return data
|
||||
|
||||
|
||||
def try_fix_yaml(response_text: str, keys_fix_yaml: List[str] = []) -> dict:
|
||||
def try_fix_yaml(response_text: str,
|
||||
keys_fix_yaml: List[str] = [],
|
||||
first_key="",
|
||||
last_key="",) -> dict:
|
||||
response_text_lines = response_text.split('\n')
|
||||
|
||||
keys = ['relevant line:', 'suggestion content:', 'relevant file:', 'existing code:', 'improved code:']
|
||||
keys = keys + keys_fix_yaml
|
||||
keys_yaml = ['relevant line:', 'suggestion content:', 'relevant file:', 'existing code:', 'improved code:']
|
||||
keys_yaml = keys_yaml + keys_fix_yaml
|
||||
# first fallback - try to convert 'relevant line: ...' to relevant line: |-\n ...'
|
||||
response_text_lines_copy = response_text_lines.copy()
|
||||
for i in range(0, len(response_text_lines_copy)):
|
||||
for key in keys:
|
||||
for key in keys_yaml:
|
||||
if key in response_text_lines_copy[i] and not '|-' in response_text_lines_copy[i]:
|
||||
response_text_lines_copy[i] = response_text_lines_copy[i].replace(f'{key}',
|
||||
f'{key} |-\n ')
|
||||
@ -458,18 +551,19 @@ def try_fix_yaml(response_text: str, keys_fix_yaml: List[str] = []) -> dict:
|
||||
except:
|
||||
get_logger().info(f"Failed to parse AI prediction after adding |-\n")
|
||||
|
||||
# second fallback - try to extract only range from first ```yaml to ````
|
||||
# second fallback - try to extract only range from first ```yaml to ```
|
||||
snippet_pattern = r'```(yaml)?[\s\S]*?```'
|
||||
snippet = re.search(snippet_pattern, '\n'.join(response_text_lines_copy))
|
||||
if snippet:
|
||||
snippet_text = snippet.group()
|
||||
try:
|
||||
data = yaml.safe_load(snippet_text.removeprefix('```yaml').rstrip('`'))
|
||||
get_logger().info(f"Successfully parsed AI prediction after extracting yaml snippet")
|
||||
get_logger().info(f"Successfully parsed AI prediction after extracting yaml snippet with second fallback")
|
||||
return data
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
# third fallback - try to remove leading and trailing curly brackets
|
||||
response_text_copy = response_text.strip().rstrip().removeprefix('{').removesuffix('}').rstrip(':\n')
|
||||
try:
|
||||
@ -479,7 +573,27 @@ def try_fix_yaml(response_text: str, keys_fix_yaml: List[str] = []) -> dict:
|
||||
except:
|
||||
pass
|
||||
|
||||
# fourth fallback - try to remove last lines
|
||||
# forth fallback - try to extract yaml snippet by 'first_key' and 'last_key'
|
||||
# note that 'last_key' can be in practice a key that is not the last key in the yaml snippet.
|
||||
# it just needs to be some inner key, so we can look for newlines after it
|
||||
if first_key and last_key:
|
||||
index_start = response_text.find(f"\n{first_key}:")
|
||||
if index_start == -1:
|
||||
index_start = response_text.find(f"{first_key}:")
|
||||
index_last_code = response_text.rfind(f"{last_key}:")
|
||||
index_end = response_text.find("\n\n", index_last_code) # look for newlines after last_key
|
||||
if index_end == -1:
|
||||
index_end = len(response_text)
|
||||
response_text_copy = response_text[index_start:index_end].strip().strip('```yaml').strip('`').strip()
|
||||
try:
|
||||
data = yaml.safe_load(response_text_copy)
|
||||
get_logger().info(f"Successfully parsed AI prediction after extracting yaml snippet")
|
||||
return data
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
# fifth fallback - try to remove last lines
|
||||
data = {}
|
||||
for i in range(1, len(response_text_lines)):
|
||||
response_text_lines_tmp = '\n'.join(response_text_lines[:-i])
|
||||
@ -495,7 +609,7 @@ def set_custom_labels(variables, git_provider=None):
|
||||
if not get_settings().config.enable_custom_labels:
|
||||
return
|
||||
|
||||
labels = get_settings().custom_labels
|
||||
labels = get_settings().get('custom_labels', {})
|
||||
if not labels:
|
||||
# set default labels
|
||||
labels = ['Bug fix', 'Tests', 'Bug fix with tests', 'Enhancement', 'Documentation', 'Other']
|
||||
@ -552,7 +666,7 @@ def get_max_tokens(model):
|
||||
return max_tokens_model
|
||||
|
||||
|
||||
def clip_tokens(text: str, max_tokens: int, add_three_dots=True) -> str:
|
||||
def clip_tokens(text: str, max_tokens: int, add_three_dots=True, num_input_tokens=None, delete_last_line=False) -> str:
|
||||
"""
|
||||
Clip the number of tokens in a string to a maximum number of tokens.
|
||||
|
||||
@ -567,16 +681,30 @@ def clip_tokens(text: str, max_tokens: int, add_three_dots=True) -> str:
|
||||
return text
|
||||
|
||||
try:
|
||||
if num_input_tokens is None:
|
||||
encoder = TokenEncoder.get_token_encoder()
|
||||
num_input_tokens = len(encoder.encode(text))
|
||||
if num_input_tokens <= max_tokens:
|
||||
return text
|
||||
if max_tokens < 0:
|
||||
return ""
|
||||
|
||||
# calculate the number of characters to keep
|
||||
num_chars = len(text)
|
||||
chars_per_token = num_chars / num_input_tokens
|
||||
num_output_chars = int(chars_per_token * max_tokens)
|
||||
factor = 0.9 # reduce by 10% to be safe
|
||||
num_output_chars = int(factor * chars_per_token * max_tokens)
|
||||
|
||||
# clip the text
|
||||
if num_output_chars > 0:
|
||||
clipped_text = text[:num_output_chars]
|
||||
if delete_last_line:
|
||||
clipped_text = clipped_text.rsplit('\n', 1)[0]
|
||||
if add_three_dots:
|
||||
clipped_text += "\n...(truncated)"
|
||||
else: # if the text is empty
|
||||
clipped_text = ""
|
||||
|
||||
return clipped_text
|
||||
except Exception as e:
|
||||
get_logger().warning(f"Failed to clip tokens: {e}")
|
||||
@ -663,6 +791,57 @@ def find_line_number_of_relevant_line_in_file(diff_files: List[FilePatchInfo],
|
||||
break
|
||||
return position, absolute_position
|
||||
|
||||
def validate_and_await_rate_limit(rate_limit_status=None, git_provider=None, get_rate_limit_status_func=None):
|
||||
if git_provider and not rate_limit_status:
|
||||
rate_limit_status = {'resources': git_provider.github_client.get_rate_limit().raw_data}
|
||||
|
||||
if not rate_limit_status:
|
||||
rate_limit_status = get_rate_limit_status_func()
|
||||
# validate that the rate limit is not exceeded
|
||||
is_rate_limit = False
|
||||
for key, value in rate_limit_status['resources'].items():
|
||||
if value['remaining'] == 0:
|
||||
print(f"key: {key}, value: {value}")
|
||||
is_rate_limit = True
|
||||
sleep_time_sec = value['reset'] - datetime.now().timestamp()
|
||||
sleep_time_hour = sleep_time_sec / 3600.0
|
||||
print(f"Rate limit exceeded. Sleeping for {sleep_time_hour} hours")
|
||||
if sleep_time_sec > 0:
|
||||
time.sleep(sleep_time_sec+1)
|
||||
|
||||
if git_provider:
|
||||
rate_limit_status = {'resources': git_provider.github_client.get_rate_limit().raw_data}
|
||||
else:
|
||||
rate_limit_status = get_rate_limit_status_func()
|
||||
|
||||
return is_rate_limit
|
||||
|
||||
|
||||
def get_largest_component(pr_url):
|
||||
from pr_agent.tools.pr_analyzer import PRAnalyzer
|
||||
publish_output = get_settings().config.publish_output
|
||||
get_settings().config.publish_output = False # disable publish output
|
||||
analyzer = PRAnalyzer(pr_url)
|
||||
methods_dict_files = analyzer.run_sync()
|
||||
get_settings().config.publish_output = publish_output
|
||||
max_lines_changed = 0
|
||||
file_b = ""
|
||||
component_name_b = ""
|
||||
for file in methods_dict_files:
|
||||
for method in methods_dict_files[file]:
|
||||
try:
|
||||
if methods_dict_files[file][method]['num_plus_lines'] > max_lines_changed:
|
||||
max_lines_changed = methods_dict_files[file][method]['num_plus_lines']
|
||||
file_b = file
|
||||
component_name_b = method
|
||||
except:
|
||||
pass
|
||||
if component_name_b:
|
||||
get_logger().info(f"Using the largest changed component: '{component_name_b}'")
|
||||
return component_name_b, file_b
|
||||
else:
|
||||
return None, None
|
||||
|
||||
def github_action_output(output_data: dict, key_name: str):
|
||||
try:
|
||||
if not get_settings().get('github_action_config.enable_output', False):
|
||||
@ -674,3 +853,34 @@ def github_action_output(output_data: dict, key_name: str):
|
||||
except Exception as e:
|
||||
get_logger().error(f"Failed to write to GitHub Action output: {e}")
|
||||
return
|
||||
|
||||
|
||||
def show_relevant_configurations(relevant_section: str) -> str:
|
||||
forbidden_keys = ['ai_disclaimer', 'ai_disclaimer_title', 'ANALYTICS_FOLDER', 'secret_provider',
|
||||
'trial_prefix_message', 'no_eligible_message', 'identity_provider', 'ALLOWED_REPOS','APP_NAME']
|
||||
|
||||
markdown_text = ""
|
||||
markdown_text += "\n<hr>\n<details> <summary><strong>🛠️ Relevant configurations:</strong></summary> \n\n"
|
||||
markdown_text +="<br>These are the relevant [configurations](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml) for this tool:\n\n"
|
||||
markdown_text += f"**[config**]\n```yaml\n\n"
|
||||
for key, value in get_settings().config.items():
|
||||
if key in forbidden_keys:
|
||||
continue
|
||||
markdown_text += f"{key}: {value}\n"
|
||||
markdown_text += "\n```\n"
|
||||
markdown_text += f"\n**[{relevant_section}]**\n```yaml\n\n"
|
||||
for key, value in get_settings().get(relevant_section, {}).items():
|
||||
if key in forbidden_keys:
|
||||
continue
|
||||
markdown_text += f"{key}: {value}\n"
|
||||
markdown_text += "\n```"
|
||||
markdown_text += "\n</details>\n"
|
||||
return markdown_text
|
||||
|
||||
def is_value_no(value):
|
||||
if value is None:
|
||||
return True
|
||||
value_str = str(value).strip().lower()
|
||||
if value_str == 'no' or value_str == 'none' or value_str == 'false':
|
||||
return True
|
||||
return False
|
||||
|
@ -9,6 +9,7 @@ from pr_agent.log import setup_logger
|
||||
log_level = os.environ.get("LOG_LEVEL", "INFO")
|
||||
setup_logger(log_level)
|
||||
|
||||
|
||||
def set_parser():
|
||||
parser = argparse.ArgumentParser(description='AI based pull request analyzer', usage=
|
||||
"""\
|
||||
@ -50,6 +51,7 @@ def set_parser():
|
||||
parser.add_argument('rest', nargs=argparse.REMAINDER, default=[])
|
||||
return parser
|
||||
|
||||
|
||||
def run_command(pr_url, command):
|
||||
# Preparing the command
|
||||
run_command_str = f"--pr_url={pr_url} {command.lstrip('/')}"
|
||||
@ -58,6 +60,7 @@ def run_command(pr_url, command):
|
||||
# Run the command. Feedback will appear in GitHub PR comments
|
||||
run(args=args)
|
||||
|
||||
|
||||
def run(inargs=None, args=None):
|
||||
parser = set_parser()
|
||||
if not args:
|
||||
|
@ -21,6 +21,7 @@ global_settings = Dynaconf(
|
||||
"settings/pr_line_questions_prompts.toml",
|
||||
"settings/pr_description_prompts.toml",
|
||||
"settings/pr_code_suggestions_prompts.toml",
|
||||
"settings/pr_code_suggestions_reflect_prompts.toml",
|
||||
"settings/pr_sort_code_suggestions_prompts.toml",
|
||||
"settings/pr_information_from_user_prompts.toml",
|
||||
"settings/pr_update_changelog_prompts.toml",
|
||||
@ -33,6 +34,15 @@ global_settings = Dynaconf(
|
||||
|
||||
|
||||
def get_settings():
|
||||
"""
|
||||
Retrieves the current settings.
|
||||
|
||||
This function attempts to fetch the settings from the starlette_context's context object. If it fails,
|
||||
it defaults to the global settings defined outside of this function.
|
||||
|
||||
Returns:
|
||||
Dynaconf: The current settings object, either from the context or the global default.
|
||||
"""
|
||||
try:
|
||||
return context["settings"]
|
||||
except Exception:
|
||||
@ -40,7 +50,7 @@ def get_settings():
|
||||
|
||||
|
||||
# Add local configuration from pyproject.toml of the project being reviewed
|
||||
def _find_repository_root() -> Path:
|
||||
def _find_repository_root() -> Optional[Path]:
|
||||
"""
|
||||
Identify project root directory by recursively searching for the .git directory in the parent directories.
|
||||
"""
|
||||
@ -60,7 +70,7 @@ def _find_pyproject() -> Optional[Path]:
|
||||
"""
|
||||
repo_root = _find_repository_root()
|
||||
if repo_root:
|
||||
pyproject = _find_repository_root() / "pyproject.toml"
|
||||
pyproject = repo_root / "pyproject.toml"
|
||||
return pyproject if pyproject.is_file() else None
|
||||
return None
|
||||
|
||||
|
@ -2,12 +2,13 @@ from pr_agent.config_loader import get_settings
|
||||
from pr_agent.git_providers.bitbucket_provider import BitbucketProvider
|
||||
from pr_agent.git_providers.bitbucket_server_provider import BitbucketServerProvider
|
||||
from pr_agent.git_providers.codecommit_provider import CodeCommitProvider
|
||||
from pr_agent.git_providers.git_provider import GitProvider
|
||||
from pr_agent.git_providers.github_provider import GithubProvider
|
||||
from pr_agent.git_providers.gitlab_provider import GitLabProvider
|
||||
from pr_agent.git_providers.local_git_provider import LocalGitProvider
|
||||
from pr_agent.git_providers.azuredevops_provider import AzureDevopsProvider
|
||||
from pr_agent.git_providers.gerrit_provider import GerritProvider
|
||||
|
||||
from starlette_context import context
|
||||
|
||||
_GIT_PROVIDERS = {
|
||||
'github': GithubProvider,
|
||||
@ -20,6 +21,7 @@ _GIT_PROVIDERS = {
|
||||
'gerrit': GerritProvider,
|
||||
}
|
||||
|
||||
|
||||
def get_git_provider():
|
||||
try:
|
||||
provider_id = get_settings().config.git_provider
|
||||
@ -28,3 +30,33 @@ def get_git_provider():
|
||||
if provider_id not in _GIT_PROVIDERS:
|
||||
raise ValueError(f"Unknown git provider: {provider_id}")
|
||||
return _GIT_PROVIDERS[provider_id]
|
||||
|
||||
|
||||
def get_git_provider_with_context(pr_url) -> GitProvider:
|
||||
"""
|
||||
Get a GitProvider instance for the given PR URL. If the GitProvider instance is already in the context, return it.
|
||||
"""
|
||||
|
||||
is_context_env = None
|
||||
try:
|
||||
is_context_env = context.get("settings", None)
|
||||
except Exception:
|
||||
pass # we are not in a context environment (CLI)
|
||||
|
||||
# check if context["git_provider"]["pr_url"] exists
|
||||
if is_context_env and context.get("git_provider", {}).get("pr_url", {}):
|
||||
git_provider = context["git_provider"]["pr_url"]
|
||||
# possibly check if the git_provider is still valid, or if some reset is needed
|
||||
# ...
|
||||
return git_provider
|
||||
else:
|
||||
try:
|
||||
provider_id = get_settings().config.git_provider
|
||||
if provider_id not in _GIT_PROVIDERS:
|
||||
raise ValueError(f"Unknown git provider: {provider_id}")
|
||||
git_provider = _GIT_PROVIDERS[provider_id](pr_url)
|
||||
if is_context_env:
|
||||
context["git_provider"] = {pr_url: git_provider}
|
||||
return git_provider
|
||||
except Exception as e:
|
||||
raise ValueError(f"Failed to get git provider for {pr_url}") from e
|
||||
|
@ -2,6 +2,7 @@ import os
|
||||
from typing import Optional, Tuple
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from ..algo.file_filter import filter_ignored
|
||||
from ..log import get_logger
|
||||
from ..algo.language_handler import is_valid_file
|
||||
from ..algo.utils import clip_tokens, find_line_number_of_relevant_line_in_file, load_large_diff
|
||||
@ -26,6 +27,7 @@ try:
|
||||
CommentThread,
|
||||
GitVersionDescriptor,
|
||||
GitPullRequest,
|
||||
GitPullRequestIterationChanges,
|
||||
)
|
||||
except ImportError:
|
||||
AZURE_DEVOPS_AVAILABLE = False
|
||||
@ -230,32 +232,73 @@ class AzureDevopsProvider(GitProvider):
|
||||
base_sha = self.pr.last_merge_target_commit
|
||||
head_sha = self.pr.last_merge_source_commit
|
||||
|
||||
commits = self.azure_devops_client.get_pull_request_commits(
|
||||
project=self.workspace_slug,
|
||||
# Get PR iterations
|
||||
iterations = self.azure_devops_client.get_pull_request_iterations(
|
||||
repository_id=self.repo_slug,
|
||||
pull_request_id=self.pr_num,
|
||||
project=self.workspace_slug
|
||||
)
|
||||
changes = None
|
||||
if iterations:
|
||||
iteration_id = iterations[-1].id # Get the last iteration (most recent changes)
|
||||
|
||||
# Get changes for the iteration
|
||||
changes = self.azure_devops_client.get_pull_request_iteration_changes(
|
||||
repository_id=self.repo_slug,
|
||||
pull_request_id=self.pr_num,
|
||||
iteration_id=iteration_id,
|
||||
project=self.workspace_slug
|
||||
)
|
||||
diff_files = []
|
||||
diffs = []
|
||||
diff_types = {}
|
||||
if changes:
|
||||
for change in changes.change_entries:
|
||||
item = change.additional_properties.get('item', {})
|
||||
path = item.get('path', None)
|
||||
if path:
|
||||
diffs.append(path)
|
||||
diff_types[path] = change.additional_properties.get('changeType', 'Unknown')
|
||||
|
||||
for c in commits:
|
||||
changes_obj = self.azure_devops_client.get_changes(
|
||||
project=self.workspace_slug,
|
||||
repository_id=self.repo_slug,
|
||||
commit_id=c.commit_id,
|
||||
)
|
||||
for i in changes_obj.changes:
|
||||
if i["item"]["gitObjectType"] == "tree":
|
||||
continue
|
||||
diffs.append(i["item"]["path"])
|
||||
diff_types[i["item"]["path"]] = i["changeType"]
|
||||
# wrong implementation - gets all the files that were changed in any commit in the PR
|
||||
# commits = self.azure_devops_client.get_pull_request_commits(
|
||||
# project=self.workspace_slug,
|
||||
# repository_id=self.repo_slug,
|
||||
# pull_request_id=self.pr_num,
|
||||
# )
|
||||
#
|
||||
# diff_files = []
|
||||
# diffs = []
|
||||
# diff_types = {}
|
||||
|
||||
diffs = list(set(diffs))
|
||||
# for c in commits:
|
||||
# changes_obj = self.azure_devops_client.get_changes(
|
||||
# project=self.workspace_slug,
|
||||
# repository_id=self.repo_slug,
|
||||
# commit_id=c.commit_id,
|
||||
# )
|
||||
# for i in changes_obj.changes:
|
||||
# if i["item"]["gitObjectType"] == "tree":
|
||||
# continue
|
||||
# diffs.append(i["item"]["path"])
|
||||
# diff_types[i["item"]["path"]] = i["changeType"]
|
||||
#
|
||||
# diffs = list(set(diffs))
|
||||
|
||||
diffs_original = diffs
|
||||
diffs = filter_ignored(diffs_original, 'azure')
|
||||
if diffs_original != diffs:
|
||||
try:
|
||||
get_logger().info(f"Filtered out [ignore] files for pull request:", extra=
|
||||
{"files": diffs_original, # diffs is just a list of names
|
||||
"filtered_files": diffs})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
invalid_files_names = []
|
||||
for file in diffs:
|
||||
if not is_valid_file(file):
|
||||
invalid_files_names.append(file)
|
||||
continue
|
||||
|
||||
version = GitVersionDescriptor(
|
||||
@ -273,12 +316,13 @@ class AzureDevopsProvider(GitProvider):
|
||||
|
||||
new_file_content_str = new_file_content_str.content
|
||||
except Exception as error:
|
||||
get_logger().error(
|
||||
"Failed to retrieve new file content of %s at version %s. Error: %s",
|
||||
file,
|
||||
version,
|
||||
str(error),
|
||||
)
|
||||
get_logger().error(f"Failed to retrieve new file content of {file} at version {version}. Error: {str(error)}")
|
||||
# get_logger().error(
|
||||
# "Failed to retrieve new file content of %s at version %s. Error: %s",
|
||||
# file,
|
||||
# version,
|
||||
# str(error),
|
||||
# )
|
||||
new_file_content_str = ""
|
||||
|
||||
edit_type = EDIT_TYPE.MODIFIED
|
||||
@ -303,17 +347,17 @@ class AzureDevopsProvider(GitProvider):
|
||||
)
|
||||
original_file_content_str = original_file_content_str.content
|
||||
except Exception as error:
|
||||
get_logger().error(
|
||||
"Failed to retrieve original file content of %s at version %s. Error: %s",
|
||||
file,
|
||||
version,
|
||||
str(error),
|
||||
)
|
||||
get_logger().error(f"Failed to retrieve original file content of {file} at version {version}. Error: {str(error)}")
|
||||
original_file_content_str = ""
|
||||
|
||||
patch = load_large_diff(
|
||||
file, new_file_content_str, original_file_content_str
|
||||
)
|
||||
file, new_file_content_str, original_file_content_str, show_warning=False
|
||||
).rstrip()
|
||||
|
||||
# count number of lines added and removed
|
||||
patch_lines = patch.splitlines(keepends=True)
|
||||
num_plus_lines = len([line for line in patch_lines if line.startswith('+')])
|
||||
num_minus_lines = len([line for line in patch_lines if line.startswith('-')])
|
||||
|
||||
diff_files.append(
|
||||
FilePatchInfo(
|
||||
@ -322,8 +366,12 @@ class AzureDevopsProvider(GitProvider):
|
||||
patch=patch,
|
||||
filename=file,
|
||||
edit_type=edit_type,
|
||||
num_plus_lines=num_plus_lines,
|
||||
num_minus_lines=num_minus_lines,
|
||||
)
|
||||
)
|
||||
get_logger().info(f"Invalid files: {invalid_files_names}")
|
||||
|
||||
self.diff_files = diff_files
|
||||
return diff_files
|
||||
except Exception as e:
|
||||
@ -404,7 +452,7 @@ class AzureDevopsProvider(GitProvider):
|
||||
return dict(body=body, path=path, position=position, absolute_position=absolute_position) if subject_type == "LINE" else {}
|
||||
|
||||
def publish_inline_comments(self, comments: list[dict], disable_fallback: bool = False):
|
||||
overall_sucess = True
|
||||
overall_success = True
|
||||
for comment in comments:
|
||||
try:
|
||||
self.publish_comment(comment["body"],
|
||||
@ -426,8 +474,8 @@ class AzureDevopsProvider(GitProvider):
|
||||
except Exception as e:
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
get_logger().error(f"Failed to publish code suggestion, error: {e}")
|
||||
overall_sucess = False
|
||||
return overall_sucess
|
||||
overall_success = False
|
||||
return overall_success
|
||||
|
||||
def get_title(self):
|
||||
return self.pr.title
|
||||
|
@ -7,6 +7,8 @@ from atlassian.bitbucket import Cloud
|
||||
from starlette_context import context
|
||||
|
||||
from pr_agent.algo.types import FilePatchInfo, EDIT_TYPE
|
||||
from ..algo.file_filter import filter_ignored
|
||||
from ..algo.language_handler import is_valid_file
|
||||
from ..algo.utils import find_line_number_of_relevant_line_in_file
|
||||
from ..config_loader import get_settings
|
||||
from ..log import get_logger
|
||||
@ -122,13 +124,30 @@ class BitbucketProvider(GitProvider):
|
||||
if self.diff_files:
|
||||
return self.diff_files
|
||||
|
||||
diffs = self.pr.diffstat()
|
||||
diffs_original = list(self.pr.diffstat())
|
||||
diffs = filter_ignored(diffs_original, 'bitbucket')
|
||||
if diffs != diffs_original:
|
||||
try:
|
||||
names_original = [d.new.path for d in diffs_original]
|
||||
names_filtered = [d.new.path for d in diffs]
|
||||
get_logger().info(f"Filtered out [ignore] files for PR", extra={
|
||||
'original_files': names_original,
|
||||
'filtered_files': names_filtered
|
||||
})
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
diff_split = [
|
||||
"diff --git%s" % x for x in self.pr.diff().split("diff --git") if x.strip()
|
||||
]
|
||||
|
||||
invalid_files_names = []
|
||||
diff_files = []
|
||||
for index, diff in enumerate(diffs):
|
||||
if not is_valid_file(diff.new.path):
|
||||
invalid_files_names.append(diff.new.path)
|
||||
continue
|
||||
|
||||
original_file_content_str = self._get_pr_file_content(
|
||||
diff.old.get_data("links")
|
||||
)
|
||||
@ -150,6 +169,9 @@ class BitbucketProvider(GitProvider):
|
||||
file_patch_canonic_structure.edit_type = EDIT_TYPE.RENAMED
|
||||
diff_files.append(file_patch_canonic_structure)
|
||||
|
||||
if invalid_files_names:
|
||||
get_logger().info(f"Invalid file names: {invalid_files_names}")
|
||||
|
||||
|
||||
self.diff_files = diff_files
|
||||
return diff_files
|
||||
@ -172,7 +194,7 @@ class BitbucketProvider(GitProvider):
|
||||
latest_commit_url = self.get_latest_commit_url()
|
||||
comment_url = self.get_comment_url(comment)
|
||||
if update_header:
|
||||
updated_header = f"{initial_header}\n\n### ({name.capitalize()} updated until commit {latest_commit_url})\n"
|
||||
updated_header = f"{initial_header}\n\n#### ({name.capitalize()} updated until commit {latest_commit_url})\n"
|
||||
pr_comment_updated = pr_comment.replace(initial_header, updated_header)
|
||||
else:
|
||||
pr_comment_updated = pr_comment
|
||||
@ -213,7 +235,7 @@ class BitbucketProvider(GitProvider):
|
||||
except Exception as e:
|
||||
get_logger().exception(f"Failed to remove comment, error: {e}")
|
||||
|
||||
# funtion to create_inline_comment
|
||||
# function to create_inline_comment
|
||||
def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, absolute_position: int = None):
|
||||
position, absolute_position = find_line_number_of_relevant_line_in_file(self.get_diff_files(),
|
||||
relevant_file.strip('`'),
|
||||
|
@ -1,13 +1,14 @@
|
||||
import json
|
||||
from typing import Optional, Tuple
|
||||
from urllib.parse import urlparse
|
||||
from urllib.parse import quote_plus, urlparse
|
||||
|
||||
import requests
|
||||
from atlassian.bitbucket import Bitbucket
|
||||
from starlette_context import context
|
||||
|
||||
from .git_provider import GitProvider
|
||||
from pr_agent.algo.types import FilePatchInfo
|
||||
from ..algo.types import EDIT_TYPE, FilePatchInfo
|
||||
from ..algo.language_handler import is_valid_file
|
||||
from ..algo.utils import load_large_diff, find_line_number_of_relevant_line_in_file
|
||||
from ..config_loader import get_settings
|
||||
from ..log import get_logger
|
||||
@ -59,6 +60,9 @@ class BitbucketServerProvider(GitProvider):
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
def get_pr_id(self):
|
||||
return self.pr_num
|
||||
|
||||
def publish_code_suggestions(self, code_suggestions: list) -> bool:
|
||||
"""
|
||||
Publishes code suggestions as comments on the PR.
|
||||
@ -140,14 +144,8 @@ class BitbucketServerProvider(GitProvider):
|
||||
if self.diff_files:
|
||||
return self.diff_files
|
||||
|
||||
commits_in_pr = self.bitbucket_client.get_pull_requests_commits(
|
||||
self.workspace_slug,
|
||||
self.repo_slug,
|
||||
self.pr_num
|
||||
)
|
||||
|
||||
commit_list = list(commits_in_pr)
|
||||
base_sha, head_sha = commit_list[0]['parents'][0]['id'], commit_list[-1]['id']
|
||||
base_sha = self.pr.toRef['latestCommit']
|
||||
head_sha = self.pr.fromRef['latestCommit']
|
||||
|
||||
diff_files = []
|
||||
original_file_content_str = ""
|
||||
@ -156,6 +154,10 @@ class BitbucketServerProvider(GitProvider):
|
||||
changes = self.bitbucket_client.get_pull_requests_changes(self.workspace_slug, self.repo_slug, self.pr_num)
|
||||
for change in changes:
|
||||
file_path = change['path']['toString']
|
||||
if not is_valid_file(file_path.split("/")[-1]):
|
||||
get_logger().info(f"Skipping a non-code file: {file_path}")
|
||||
continue
|
||||
|
||||
match change['type']:
|
||||
case 'ADD':
|
||||
edit_type = EDIT_TYPE.ADDED
|
||||
@ -209,7 +211,7 @@ class BitbucketServerProvider(GitProvider):
|
||||
def remove_comment(self, comment):
|
||||
pass
|
||||
|
||||
# funtion to create_inline_comment
|
||||
# function to create_inline_comment
|
||||
def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str,
|
||||
absolute_position: int = None):
|
||||
|
||||
@ -241,8 +243,11 @@ class BitbucketServerProvider(GitProvider):
|
||||
}
|
||||
}
|
||||
|
||||
response = requests.post(url=self._get_pr_comments_url(), json=payload, headers=self.headers)
|
||||
return response
|
||||
try:
|
||||
requests.post(url=self._get_pr_comments_url(), json=payload, headers=self.headers).raise_for_status()
|
||||
except Exception as e:
|
||||
get_logger().error(f"Failed to publish inline comment to '{file}' at line {from_line}, error: {e}")
|
||||
raise e
|
||||
|
||||
def generate_link_to_relevant_line_number(self, suggestion) -> str:
|
||||
try:
|
||||
@ -255,18 +260,37 @@ class BitbucketServerProvider(GitProvider):
|
||||
position, absolute_position = find_line_number_of_relevant_line_in_file \
|
||||
(diff_files, relevant_file, relevant_line_str)
|
||||
|
||||
if absolute_position != -1:
|
||||
if self.pr:
|
||||
link = f"{self.pr_url}/diff#{quote_plus(relevant_file)}?t={absolute_position}"
|
||||
return link
|
||||
else:
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
get_logger().info(f"Failed adding line link to '{relevant_file}' since PR not set")
|
||||
else:
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
get_logger().info(f"Failed adding line link to '{relevant_file}' since position not found")
|
||||
|
||||
if absolute_position != -1 and self.pr_url:
|
||||
link = f"{self.pr_url}/#L{relevant_file}T{absolute_position}"
|
||||
link = f"{self.pr_url}/diff#{quote_plus(relevant_file)}?t={absolute_position}"
|
||||
return link
|
||||
except Exception as e:
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
get_logger().info(f"Failed adding line link, error: {e}")
|
||||
get_logger().info(f"Failed adding line link to '{relevant_file}', error: {e}")
|
||||
|
||||
return ""
|
||||
|
||||
def publish_inline_comments(self, comments: list[dict]):
|
||||
for comment in comments:
|
||||
if 'position' in comment:
|
||||
self.publish_inline_comment(comment['body'], comment['position'], comment['path'])
|
||||
elif 'start_line' in comment: # multi-line comment
|
||||
# note that bitbucket does not seem to support range - only a comment on a single line - https://community.developer.atlassian.com/t/api-post-endpoint-for-inline-pull-request-comments/60452
|
||||
self.publish_inline_comment(comment['body'], comment['start_line'], comment['path'])
|
||||
elif 'line' in comment: # single-line comment
|
||||
self.publish_inline_comment(comment['body'], comment['line'], comment['path'])
|
||||
else:
|
||||
get_logger().error(f"Could not publish inline comment: {comment}")
|
||||
|
||||
def get_title(self):
|
||||
return self.pr.title
|
||||
@ -278,7 +302,10 @@ class BitbucketServerProvider(GitProvider):
|
||||
return self.pr.fromRef['displayId']
|
||||
|
||||
def get_pr_description_full(self):
|
||||
if hasattr(self.pr, "description"):
|
||||
return self.pr.description
|
||||
else:
|
||||
return None
|
||||
|
||||
def get_user_id(self):
|
||||
return 0
|
||||
@ -305,7 +332,7 @@ class BitbucketServerProvider(GitProvider):
|
||||
path_parts = parsed_url.path.strip("/").split("/")
|
||||
if len(path_parts) < 6 or path_parts[4] != "pull-requests":
|
||||
raise ValueError(
|
||||
"The provided URL does not appear to be a Bitbucket PR URL"
|
||||
f"The provided URL '{pr_url}' does not appear to be a Bitbucket PR URL"
|
||||
)
|
||||
|
||||
workspace_slug = path_parts[1]
|
||||
@ -313,7 +340,7 @@ class BitbucketServerProvider(GitProvider):
|
||||
try:
|
||||
pr_number = int(path_parts[5])
|
||||
except ValueError as e:
|
||||
raise ValueError("Unable to convert PR number to integer") from e
|
||||
raise ValueError(f"Unable to convert PR number '{path_parts[5]}' to integer") from e
|
||||
|
||||
return workspace_slug, repo_slug, pr_number
|
||||
|
||||
@ -334,13 +361,18 @@ class BitbucketServerProvider(GitProvider):
|
||||
raise NotImplementedError("Get commit messages function not implemented yet.")
|
||||
# bitbucket does not support labels
|
||||
def publish_description(self, pr_title: str, description: str):
|
||||
payload = json.dumps({
|
||||
payload = {
|
||||
"version": self.pr.version,
|
||||
"description": description,
|
||||
"title": pr_title
|
||||
})
|
||||
"title": pr_title,
|
||||
"reviewers": self.pr.reviewers # needs to be sent otherwise gets wiped
|
||||
}
|
||||
try:
|
||||
self.bitbucket_client.update_pull_request(self.workspace_slug, self.repo_slug, str(self.pr_num), payload)
|
||||
except Exception as e:
|
||||
get_logger().error(f"Failed to update pull request, error: {e}")
|
||||
raise e
|
||||
|
||||
response = requests.put(url=self.bitbucket_pull_request_api_url, headers=self.headers, data=payload)
|
||||
return response
|
||||
|
||||
# bitbucket does not support labels
|
||||
def publish_labels(self, pr_types: list):
|
||||
|
@ -13,10 +13,17 @@ class GitProvider(ABC):
|
||||
def is_supported(self, capability: str) -> bool:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_files(self) -> list:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_diff_files(self) -> list[FilePatchInfo]:
|
||||
pass
|
||||
|
||||
def get_incremental_commits(self, is_incremental):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def publish_description(self, pr_title: str, pr_body: str):
|
||||
pass
|
||||
@ -131,6 +138,34 @@ class GitProvider(ABC):
|
||||
final_update_message=True):
|
||||
self.publish_comment(pr_comment)
|
||||
|
||||
def publish_persistent_comment_full(self, pr_comment: str,
|
||||
initial_header: str,
|
||||
update_header: bool = True,
|
||||
name='review',
|
||||
final_update_message=True):
|
||||
try:
|
||||
prev_comments = list(self.get_issue_comments())
|
||||
for comment in prev_comments:
|
||||
if comment.body.startswith(initial_header):
|
||||
latest_commit_url = self.get_latest_commit_url()
|
||||
comment_url = self.get_comment_url(comment)
|
||||
if update_header:
|
||||
updated_header = f"{initial_header}\n\n#### ({name.capitalize()} updated until commit {latest_commit_url})\n"
|
||||
pr_comment_updated = pr_comment.replace(initial_header, updated_header)
|
||||
else:
|
||||
pr_comment_updated = pr_comment
|
||||
get_logger().info(f"Persistent mode - updating comment {comment_url} to latest {name} message")
|
||||
# response = self.mr.notes.update(comment.id, {'body': pr_comment_updated})
|
||||
self.edit_comment(comment, pr_comment_updated)
|
||||
if final_update_message:
|
||||
self.publish_comment(
|
||||
f"**[Persistent {name}]({comment_url})** updated to latest commit {latest_commit_url}")
|
||||
return
|
||||
except Exception as e:
|
||||
get_logger().exception(f"Failed to update persistent review, error: {e}")
|
||||
pass
|
||||
self.publish_comment(pr_comment)
|
||||
|
||||
@abstractmethod
|
||||
def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
|
||||
pass
|
||||
|
@ -8,8 +8,9 @@ from github import AppAuthentication, Auth, Github, GithubException
|
||||
from retry import retry
|
||||
from starlette_context import context
|
||||
|
||||
from ..algo.file_filter import filter_ignored
|
||||
from ..algo.language_handler import is_valid_file
|
||||
from ..algo.utils import load_large_diff, clip_tokens, find_line_number_of_relevant_line_in_file
|
||||
from ..algo.utils import PRReviewHeader, load_large_diff, clip_tokens, find_line_number_of_relevant_line_in_file
|
||||
from ..config_loader import get_settings
|
||||
from ..log import get_logger
|
||||
from ..servers.utils import RateLimitExceeded
|
||||
@ -18,7 +19,7 @@ from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo
|
||||
|
||||
|
||||
class GithubProvider(GitProvider):
|
||||
def __init__(self, pr_url: Optional[str] = None, incremental=IncrementalPR(False)):
|
||||
def __init__(self, pr_url: Optional[str] = None):
|
||||
self.repo_obj = None
|
||||
try:
|
||||
self.installation_id = context.get("installation_id", None)
|
||||
@ -33,18 +34,21 @@ class GithubProvider(GitProvider):
|
||||
self.github_user_id = None
|
||||
self.diff_files = None
|
||||
self.git_files = None
|
||||
self.incremental = incremental
|
||||
self.incremental = IncrementalPR(False)
|
||||
if pr_url and 'pull' in pr_url:
|
||||
self.set_pr(pr_url)
|
||||
self.pr_commits = list(self.pr.get_commits())
|
||||
if self.incremental.is_incremental:
|
||||
self.unreviewed_files_set = dict()
|
||||
self.get_incremental_commits()
|
||||
self.last_commit_id = self.pr_commits[-1]
|
||||
self.pr_url = self.get_pr_url() # pr_url for github actions can be as api.github.com, so we need to get the url from the pr object
|
||||
else:
|
||||
self.pr_commits = None
|
||||
|
||||
def get_incremental_commits(self, incremental=IncrementalPR(False)):
|
||||
self.incremental = incremental
|
||||
if self.incremental.is_incremental:
|
||||
self.unreviewed_files_set = dict()
|
||||
self._get_incremental_commits()
|
||||
|
||||
def is_supported(self, capability: str) -> bool:
|
||||
return True
|
||||
|
||||
@ -55,7 +59,7 @@ class GithubProvider(GitProvider):
|
||||
self.repo, self.pr_num = self._parse_pr_url(pr_url)
|
||||
self.pr = self._get_pr()
|
||||
|
||||
def get_incremental_commits(self):
|
||||
def _get_incremental_commits(self):
|
||||
if not self.pr_commits:
|
||||
self.pr_commits = list(self.pr.get_commits())
|
||||
|
||||
@ -92,9 +96,9 @@ class GithubProvider(GitProvider):
|
||||
self.comments = list(self.pr.get_issue_comments())
|
||||
prefixes = []
|
||||
if full:
|
||||
prefixes.append("## PR Review")
|
||||
prefixes.append(PRReviewHeader.REGULAR.value)
|
||||
if incremental:
|
||||
prefixes.append("## Incremental PR Review")
|
||||
prefixes.append(PRReviewHeader.INCREMENTAL.value)
|
||||
for index in range(len(self.comments) - 1, -1, -1):
|
||||
if any(self.comments[index].body.startswith(prefix) for prefix in prefixes):
|
||||
return self.comments[index]
|
||||
@ -106,18 +110,21 @@ class GithubProvider(GitProvider):
|
||||
git_files = context.get("git_files", None)
|
||||
if git_files:
|
||||
return git_files
|
||||
self.git_files = self.pr.get_files()
|
||||
self.git_files = list(self.pr.get_files()) # 'list' to handle pagination
|
||||
context["git_files"] = self.git_files
|
||||
return self.git_files
|
||||
except Exception:
|
||||
if not self.git_files:
|
||||
self.git_files = self.pr.get_files()
|
||||
self.git_files = list(self.pr.get_files())
|
||||
return self.git_files
|
||||
|
||||
def get_num_of_files(self):
|
||||
if self.git_files:
|
||||
if hasattr(self.git_files, "totalCount"):
|
||||
return self.git_files.totalCount
|
||||
else:
|
||||
try:
|
||||
return len(self.git_files)
|
||||
except Exception as e:
|
||||
return -1
|
||||
|
||||
@retry(exceptions=RateLimitExceeded,
|
||||
@ -142,11 +149,24 @@ class GithubProvider(GitProvider):
|
||||
if self.diff_files:
|
||||
return self.diff_files
|
||||
|
||||
files = self.get_files()
|
||||
diff_files = []
|
||||
# filter files using [ignore] patterns
|
||||
files_original = self.get_files()
|
||||
files = filter_ignored(files_original)
|
||||
if files_original != files:
|
||||
try:
|
||||
names_original = [file.filename for file in files_original]
|
||||
names_new = [file.filename for file in files]
|
||||
get_logger().info(f"Filtered out [ignore] files for pull request:", extra=
|
||||
{"files": names_original,
|
||||
"filtered_files": names_new})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
diff_files = []
|
||||
invalid_files_names = []
|
||||
for file in files:
|
||||
if not is_valid_file(file.filename):
|
||||
invalid_files_names.append(file.filename)
|
||||
continue
|
||||
|
||||
new_file_content_str = self._get_pr_file_content(file, self.pr.head.sha) # communication with GitHub
|
||||
@ -182,6 +202,8 @@ class GithubProvider(GitProvider):
|
||||
num_plus_lines=num_plus_lines,
|
||||
num_minus_lines=num_minus_lines,)
|
||||
diff_files.append(file_patch_canonical_structure)
|
||||
if invalid_files_names:
|
||||
get_logger().info(f"Filtered out files with invalid extensions: {invalid_files_names}")
|
||||
|
||||
self.diff_files = diff_files
|
||||
try:
|
||||
@ -209,24 +231,7 @@ class GithubProvider(GitProvider):
|
||||
update_header: bool = True,
|
||||
name='review',
|
||||
final_update_message=True):
|
||||
prev_comments = list(self.pr.get_issue_comments())
|
||||
for comment in prev_comments:
|
||||
body = comment.body
|
||||
if body.startswith(initial_header):
|
||||
latest_commit_url = self.get_latest_commit_url()
|
||||
comment_url = self.get_comment_url(comment)
|
||||
if update_header:
|
||||
updated_header = f"{initial_header}\n\n### ({name.capitalize()} updated until commit {latest_commit_url})\n"
|
||||
pr_comment_updated = pr_comment.replace(initial_header, updated_header)
|
||||
else:
|
||||
pr_comment_updated = pr_comment
|
||||
get_logger().info(f"Persistent mode- updating comment {comment_url} to latest review message")
|
||||
response = comment.edit(pr_comment_updated)
|
||||
if final_update_message:
|
||||
self.publish_comment(
|
||||
f"**[Persistent {name}]({comment_url})** updated to latest commit {latest_commit_url}")
|
||||
return
|
||||
self.publish_comment(pr_comment)
|
||||
self.publish_persistent_comment_full(pr_comment, initial_header, update_header, name, final_update_message)
|
||||
|
||||
def publish_comment(self, pr_comment: str, is_temporary: bool = False):
|
||||
if is_temporary and not get_settings().config.publish_output_progress:
|
||||
|
@ -6,6 +6,7 @@ from urllib.parse import urlparse
|
||||
import gitlab
|
||||
from gitlab import GitlabGetError
|
||||
|
||||
from ..algo.file_filter import filter_ignored
|
||||
from ..algo.language_handler import is_valid_file
|
||||
from ..algo.utils import load_large_diff, clip_tokens, find_line_number_of_relevant_line_in_file
|
||||
from ..config_loader import get_settings
|
||||
@ -84,13 +85,29 @@ class GitLabProvider(GitProvider):
|
||||
if self.diff_files:
|
||||
return self.diff_files
|
||||
|
||||
diffs = self.mr.changes()['changes']
|
||||
# filter files using [ignore] patterns
|
||||
diffs_original = self.mr.changes()['changes']
|
||||
diffs = filter_ignored(diffs_original, 'gitlab')
|
||||
if diffs != diffs_original:
|
||||
try:
|
||||
names_original = [diff['new_path'] for diff in diffs_original]
|
||||
names_filtered = [diff['new_path'] for diff in diffs]
|
||||
get_logger().info(f"Filtered out [ignore] files for merge request {self.id_mr}", extra={
|
||||
'original_files': names_original,
|
||||
'filtered_files': names_filtered
|
||||
})
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
diff_files = []
|
||||
invalid_files_names = []
|
||||
for diff in diffs:
|
||||
if is_valid_file(diff['new_path']):
|
||||
if not is_valid_file(diff['new_path']):
|
||||
invalid_files_names.append(diff['new_path'])
|
||||
continue
|
||||
|
||||
original_file_content_str = self.get_pr_file_content(diff['old_path'], self.mr.diff_refs['base_sha'])
|
||||
new_file_content_str = self.get_pr_file_content(diff['new_path'], self.mr.diff_refs['head_sha'])
|
||||
|
||||
try:
|
||||
if isinstance(original_file_content_str, bytes):
|
||||
original_file_content_str = bytes.decode(original_file_content_str, 'utf-8')
|
||||
@ -126,11 +143,13 @@ class GitLabProvider(GitProvider):
|
||||
old_filename=None if diff['old_path'] == diff['new_path'] else diff['old_path'],
|
||||
num_plus_lines=num_plus_lines,
|
||||
num_minus_lines=num_minus_lines, ))
|
||||
if invalid_files_names:
|
||||
get_logger().info(f"Filtered out files with invalid extensions: {invalid_files_names}")
|
||||
|
||||
self.diff_files = diff_files
|
||||
return diff_files
|
||||
|
||||
def get_files(self):
|
||||
def get_files(self) -> list:
|
||||
if not self.git_files:
|
||||
self.git_files = [change['new_path'] for change in self.mr.changes()['changes']]
|
||||
return self.git_files
|
||||
@ -154,26 +173,7 @@ class GitLabProvider(GitProvider):
|
||||
update_header: bool = True,
|
||||
name='review',
|
||||
final_update_message=True):
|
||||
try:
|
||||
for comment in self.mr.notes.list(get_all=True)[::-1]:
|
||||
if comment.body.startswith(initial_header):
|
||||
latest_commit_url = self.get_latest_commit_url()
|
||||
comment_url = self.get_comment_url(comment)
|
||||
if update_header:
|
||||
updated_header = f"{initial_header}\n\n### ({name.capitalize()} updated until commit {latest_commit_url})\n"
|
||||
pr_comment_updated = pr_comment.replace(initial_header, updated_header)
|
||||
else:
|
||||
pr_comment_updated = pr_comment
|
||||
get_logger().info(f"Persistent mode - updating comment {comment_url} to latest {name} message")
|
||||
response = self.mr.notes.update(comment.id, {'body': pr_comment_updated})
|
||||
if final_update_message:
|
||||
self.publish_comment(
|
||||
f"**[Persistent {name}]({comment_url})** updated to latest commit {latest_commit_url}")
|
||||
return
|
||||
except Exception as e:
|
||||
get_logger().exception(f"Failed to update persistent review, error: {e}")
|
||||
pass
|
||||
self.publish_comment(pr_comment)
|
||||
self.publish_persistent_comment_full(pr_comment, initial_header, update_header, name, final_update_message)
|
||||
|
||||
def publish_comment(self, mr_comment: str, is_temporary: bool = False):
|
||||
comment = self.mr.notes.create({'body': mr_comment})
|
||||
@ -184,6 +184,11 @@ class GitLabProvider(GitProvider):
|
||||
def edit_comment(self, comment, body: str):
|
||||
self.mr.notes.update(comment.id,{'body': body} )
|
||||
|
||||
def edit_comment_from_comment_id(self, comment_id: int, body: str):
|
||||
comment = self.mr.notes.get(comment_id)
|
||||
comment.body = body
|
||||
comment.save()
|
||||
|
||||
def reply_to_comment_from_comment_id(self, comment_id: int, body: str):
|
||||
discussion = self.mr.discussions.get(comment_id)
|
||||
discussion.notes.create({'body': body})
|
||||
@ -200,6 +205,10 @@ class GitLabProvider(GitProvider):
|
||||
def create_inline_comments(self, comments: list[dict]):
|
||||
raise NotImplementedError("Gitlab provider does not support publishing inline comments yet")
|
||||
|
||||
def get_comment_body_from_comment_id(self, comment_id: int):
|
||||
comment = self.mr.notes.get(comment_id)
|
||||
return comment
|
||||
|
||||
def send_inline_comment(self,body: str,edit_type: str,found: bool,relevant_file: str,relevant_line_in_file: int,
|
||||
source_line_no: int, target_file: str,target_line_no: int) -> None:
|
||||
if not found:
|
||||
@ -362,7 +371,7 @@ class GitLabProvider(GitProvider):
|
||||
return self.mr.description
|
||||
|
||||
def get_issue_comments(self):
|
||||
raise NotImplementedError("GitLab provider does not support issue comments yet")
|
||||
return self.mr.notes.list(get_all=True)[::-1]
|
||||
|
||||
def get_repo_settings(self):
|
||||
try:
|
||||
|
@ -5,12 +5,13 @@ import tempfile
|
||||
from dynaconf import Dynaconf
|
||||
|
||||
from pr_agent.config_loader import get_settings
|
||||
from pr_agent.git_providers import get_git_provider
|
||||
from pr_agent.git_providers import get_git_provider, get_git_provider_with_context
|
||||
from pr_agent.log import get_logger
|
||||
from starlette_context import context
|
||||
|
||||
|
||||
def apply_repo_settings(pr_url):
|
||||
git_provider = get_git_provider_with_context(pr_url)
|
||||
if get_settings().config.use_repo_settings_file:
|
||||
repo_settings_file = None
|
||||
try:
|
||||
@ -20,7 +21,6 @@ def apply_repo_settings(pr_url):
|
||||
repo_settings = None
|
||||
pass
|
||||
if repo_settings is None: # None is different from "", which is a valid value
|
||||
git_provider = get_git_provider()(pr_url)
|
||||
repo_settings = git_provider.get_repo_settings()
|
||||
try:
|
||||
context["repo_settings"] = repo_settings
|
||||
|
@ -26,8 +26,9 @@ from pr_agent.git_providers.utils import apply_repo_settings
|
||||
from pr_agent.log import get_logger
|
||||
from fastapi import Request, Depends
|
||||
from fastapi.security import HTTPBasic, HTTPBasicCredentials
|
||||
from pr_agent.log import get_logger
|
||||
from pr_agent.log import LoggingFormat, get_logger, setup_logger
|
||||
|
||||
setup_logger(fmt=LoggingFormat.JSON, level="DEBUG")
|
||||
security = HTTPBasic()
|
||||
router = APIRouter()
|
||||
available_commands_rgx = re.compile(r"^\/(" + "|".join(command2class.keys()) + r")\s*")
|
||||
@ -40,8 +41,15 @@ def handle_request(
|
||||
):
|
||||
log_context["action"] = body
|
||||
log_context["api_url"] = url
|
||||
|
||||
async def inner():
|
||||
try:
|
||||
with get_logger().contextualize(**log_context):
|
||||
background_tasks.add_task(PRAgent().handle_request, url, body)
|
||||
await PRAgent().handle_request(url, body)
|
||||
except Exception as e:
|
||||
get_logger().error(f"Failed to handle webhook: {e}")
|
||||
|
||||
background_tasks.add_task(inner)
|
||||
|
||||
|
||||
# currently only basic auth is supported with azure webhooks
|
||||
@ -123,7 +131,7 @@ async def handle_webhook(background_tasks: BackgroundTasks, request: Request):
|
||||
content=json.dumps({"message": "Internal server error"}),
|
||||
)
|
||||
return JSONResponse(
|
||||
status_code=status.HTTP_202_ACCEPTED, content=jsonable_encoder({"message": "webhook triggerd successfully"})
|
||||
status_code=status.HTTP_202_ACCEPTED, content=jsonable_encoder({"message": "webhook triggered successfully"})
|
||||
)
|
||||
|
||||
@router.get("/")
|
||||
|
@ -13,9 +13,10 @@ from starlette_context.middleware import RawContextMiddleware
|
||||
|
||||
from pr_agent.agent.pr_agent import PRAgent
|
||||
from pr_agent.config_loader import get_settings
|
||||
from pr_agent.log import get_logger
|
||||
from pr_agent.log import LoggingFormat, get_logger, setup_logger
|
||||
from pr_agent.servers.utils import verify_signature
|
||||
|
||||
setup_logger(fmt=LoggingFormat.JSON, level="DEBUG")
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@ -24,8 +25,15 @@ def handle_request(
|
||||
):
|
||||
log_context["action"] = body
|
||||
log_context["api_url"] = url
|
||||
|
||||
async def inner():
|
||||
try:
|
||||
with get_logger().contextualize(**log_context):
|
||||
background_tasks.add_task(PRAgent().handle_request, url, body)
|
||||
await PRAgent().handle_request(url, body)
|
||||
except Exception as e:
|
||||
get_logger().error(f"Failed to handle webhook: {e}")
|
||||
|
||||
background_tasks.add_task(inner)
|
||||
|
||||
|
||||
@router.post("/")
|
||||
|
@ -99,6 +99,7 @@ async def run_action():
|
||||
|
||||
# invoke by default all three tools
|
||||
if auto_describe is None or is_true(auto_describe):
|
||||
get_settings().pr_description.final_update_message = False # No final update message when auto_describe is enabled
|
||||
await PRDescription(pr_url).run()
|
||||
if auto_review is None or is_true(auto_review):
|
||||
await PRReviewer(pr_url).run()
|
||||
@ -138,8 +139,11 @@ async def run_action():
|
||||
comment_id = event_payload.get("comment", {}).get("id")
|
||||
provider = get_git_provider()(pr_url=url)
|
||||
if is_pr:
|
||||
await PRAgent().handle_request(url, body,
|
||||
notify=lambda: provider.add_eyes_reaction(comment_id, disable_eyes=disable_eyes))
|
||||
await PRAgent().handle_request(
|
||||
url, body, notify=lambda: provider.add_eyes_reaction(
|
||||
comment_id, disable_eyes=disable_eyes
|
||||
)
|
||||
)
|
||||
else:
|
||||
await PRAgent().handle_request(url, body)
|
||||
|
||||
|
@ -7,6 +7,7 @@ from typing import Any, Dict, Tuple
|
||||
|
||||
import uvicorn
|
||||
from fastapi import APIRouter, FastAPI, HTTPException, Request, Response
|
||||
from starlette.background import BackgroundTasks
|
||||
from starlette.middleware import Middleware
|
||||
from starlette_context import context
|
||||
from starlette_context.middleware import RawContextMiddleware
|
||||
@ -14,7 +15,7 @@ from starlette_context.middleware import RawContextMiddleware
|
||||
from pr_agent.agent.pr_agent import PRAgent
|
||||
from pr_agent.algo.utils import update_settings_from_args
|
||||
from pr_agent.config_loader import get_settings, global_settings
|
||||
from pr_agent.git_providers import get_git_provider
|
||||
from pr_agent.git_providers import get_git_provider, get_git_provider_with_context
|
||||
from pr_agent.git_providers.git_provider import IncrementalPR
|
||||
from pr_agent.git_providers.utils import apply_repo_settings
|
||||
from pr_agent.identity_providers import get_identity_provider
|
||||
@ -34,7 +35,7 @@ router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/api/v1/github_webhooks")
|
||||
async def handle_github_webhooks(request: Request, response: Response):
|
||||
async def handle_github_webhooks(background_tasks: BackgroundTasks, request: Request, response: Response):
|
||||
"""
|
||||
Receives and processes incoming GitHub webhook requests.
|
||||
Verifies the request signature, parses the request body, and passes it to the handle_request function for further
|
||||
@ -47,9 +48,9 @@ async def handle_github_webhooks(request: Request, response: Response):
|
||||
installation_id = body.get("installation", {}).get("id")
|
||||
context["installation_id"] = installation_id
|
||||
context["settings"] = copy.deepcopy(global_settings)
|
||||
|
||||
response = await handle_request(body, event=request.headers.get("X-GitHub-Event", None))
|
||||
return response or {}
|
||||
context["git_provider"] = {}
|
||||
background_tasks.add_task(handle_request, body, event=request.headers.get("X-GitHub-Event", None))
|
||||
return {}
|
||||
|
||||
|
||||
@router.post("/api/v1/marketplace_webhooks")
|
||||
@ -110,7 +111,7 @@ async def handle_comments_on_pr(body: Dict[str, Any],
|
||||
return {}
|
||||
log_context["api_url"] = api_url
|
||||
comment_id = body.get("comment", {}).get("id")
|
||||
provider = get_git_provider()(pr_url=api_url)
|
||||
provider = get_git_provider_with_context(pr_url=api_url)
|
||||
with get_logger().contextualize(**log_context):
|
||||
if get_identity_provider().verify_eligibility("github", sender_id, api_url) is not Eligibility.NOT_ELIGIBLE:
|
||||
get_logger().info(f"Processing comment on PR {api_url=}, comment_body={comment_body}")
|
||||
@ -127,8 +128,16 @@ async def handle_new_pr_opened(body: Dict[str, Any],
|
||||
log_context: Dict[str, Any],
|
||||
agent: PRAgent):
|
||||
title = body.get("pull_request", {}).get("title", "")
|
||||
get_settings().config.is_auto_command = True
|
||||
|
||||
|
||||
pull_request, api_url = _check_pull_request_event(action, body, log_context)
|
||||
if not (pull_request and api_url):
|
||||
get_logger().info(f"Invalid PR event: {action=} {api_url=}")
|
||||
return {}
|
||||
if action in get_settings().github_app.handle_pr_actions: # ['opened', 'reopened', 'ready_for_review']
|
||||
# logic to ignore PRs with specific titles (e.g. "[Auto] ...")
|
||||
apply_repo_settings(api_url)
|
||||
ignore_pr_title_re = get_settings().get("GITHUB_APP.IGNORE_PR_TITLE", [])
|
||||
if not isinstance(ignore_pr_title_re, list):
|
||||
ignore_pr_title_re = [ignore_pr_title_re]
|
||||
@ -136,11 +145,6 @@ async def handle_new_pr_opened(body: Dict[str, Any],
|
||||
get_logger().info(f"Ignoring PR with title '{title}' due to github_app.ignore_pr_title setting")
|
||||
return {}
|
||||
|
||||
pull_request, api_url = _check_pull_request_event(action, body, log_context)
|
||||
if not (pull_request and api_url):
|
||||
get_logger().info(f"Invalid PR event: {action=} {api_url=}")
|
||||
return {}
|
||||
if action in get_settings().github_app.handle_pr_actions: # ['opened', 'reopened', 'ready_for_review']
|
||||
if get_identity_provider().verify_eligibility("github", sender_id, api_url) is not Eligibility.NOT_ELIGIBLE:
|
||||
await _perform_auto_commands_github("pr_commands", agent, body, api_url, log_context)
|
||||
else:
|
||||
@ -199,11 +203,6 @@ async def handle_push_trigger_for_new_commits(body: Dict[str, Any],
|
||||
get_logger().info(f"Finished waiting to process push trigger for {api_url=} - continue with flow")
|
||||
|
||||
try:
|
||||
if get_settings().github_app.push_trigger_wait_for_initial_review and not get_git_provider()(api_url,
|
||||
incremental=IncrementalPR(
|
||||
True)).previous_review:
|
||||
get_logger().info(f"Skipping incremental review because there was no initial review for {api_url=} yet")
|
||||
return {}
|
||||
if get_identity_provider().verify_eligibility("github", sender_id, api_url) is not Eligibility.NOT_ELIGIBLE:
|
||||
get_logger().info(f"Performing incremental review for {api_url=} because of {event=} and {action=}")
|
||||
await _perform_auto_commands_github("push_commands", agent, body, api_url, log_context)
|
||||
@ -267,17 +266,22 @@ async def handle_request(body: Dict[str, Any], event: str):
|
||||
get_logger().info(f"Ignoring PR from '{sender=}' because it is a bot")
|
||||
return {}
|
||||
|
||||
if 'check_run' in body: # handle failed checks
|
||||
# get_logger().debug(f'Request body', artifact=body, event=event) # added inside handle_checks
|
||||
pass
|
||||
# handle comments on PRs
|
||||
if action == 'created':
|
||||
elif action == 'created':
|
||||
get_logger().debug(f'Request body', artifact=body, event=event)
|
||||
await handle_comments_on_pr(body, event, sender, sender_id, action, log_context, agent)
|
||||
# handle new PRs
|
||||
elif event == 'pull_request' and action != 'synchronize' and action != 'closed':
|
||||
get_logger().debug(f'Request body', artifact=body, event=event)
|
||||
await handle_new_pr_opened(body, event, sender, sender_id, action, log_context, agent)
|
||||
elif event == "issue_comment" and 'edited' in action:
|
||||
pass # handle_checkbox_clicked
|
||||
# handle pull_request event with synchronize action - "push trigger" for new commits
|
||||
elif event == 'pull_request' and action == 'synchronize':
|
||||
get_logger().debug(f'Request body', artifact=body, event=event)
|
||||
# get_logger().debug(f'Request body', artifact=body, event=event) # added inside handle_push_trigger_for_new_commits
|
||||
await handle_push_trigger_for_new_commits(body, event, sender,sender_id, action, log_context, agent)
|
||||
elif event == 'pull_request' and action == 'closed':
|
||||
if get_settings().get("CONFIG.ANALYTICS_FOLDER", ""):
|
||||
@ -325,7 +329,6 @@ async def _perform_auto_commands_github(commands_conf: str, agent: PRAgent, body
|
||||
apply_repo_settings(api_url)
|
||||
commands = get_settings().get(f"github_app.{commands_conf}")
|
||||
if not commands:
|
||||
with get_logger().contextualize(**log_context):
|
||||
get_logger().info(f"New PR, but no auto commands configured")
|
||||
return
|
||||
for command in commands:
|
||||
@ -334,7 +337,6 @@ async def _perform_auto_commands_github(commands_conf: str, agent: PRAgent, body
|
||||
args = split_command[1:]
|
||||
other_args = update_settings_from_args(args)
|
||||
new_command = ' '.join([command] + other_args)
|
||||
with get_logger().contextualize(**log_context):
|
||||
get_logger().info(f"{commands_conf}. Performing auto command '{new_command}', for {api_url=}")
|
||||
await agent.handle_request(api_url, new_command)
|
||||
|
||||
@ -356,5 +358,6 @@ app.include_router(router)
|
||||
def start():
|
||||
uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", "3000")))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
start()
|
||||
|
@ -1,5 +1,6 @@
|
||||
import copy
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
import uvicorn
|
||||
from fastapi import APIRouter, FastAPI, Request, status
|
||||
@ -23,15 +24,40 @@ router = APIRouter()
|
||||
secret_provider = get_secret_provider() if get_settings().get("CONFIG.SECRET_PROVIDER") else None
|
||||
|
||||
|
||||
def handle_request(background_tasks: BackgroundTasks, url: str, body: str, log_context: dict):
|
||||
async def get_mr_url_from_commit_sha(commit_sha, gitlab_token, project_id):
|
||||
try:
|
||||
import requests
|
||||
headers = {
|
||||
'Private-Token': f'{gitlab_token}'
|
||||
}
|
||||
# API endpoint to find MRs containing the commit
|
||||
gitlab_url = get_settings().get("GITLAB.URL", 'https://gitlab.com')
|
||||
response = requests.get(
|
||||
f'{gitlab_url}/api/v4/projects/{project_id}/repository/commits/{commit_sha}/merge_requests',
|
||||
headers=headers
|
||||
)
|
||||
merge_requests = response.json()
|
||||
if merge_requests and response.status_code == 200:
|
||||
pr_url = merge_requests[0]['web_url']
|
||||
return pr_url
|
||||
else:
|
||||
get_logger().info(f"No merge requests found for commit: {commit_sha}")
|
||||
return None
|
||||
except Exception as e:
|
||||
get_logger().error(f"Failed to get MR url from commit sha: {e}")
|
||||
return None
|
||||
|
||||
async def handle_request(api_url: str, body: str, log_context: dict, sender_id: str):
|
||||
log_context["action"] = body
|
||||
log_context["event"] = "pull_request" if body == "/review" else "comment"
|
||||
log_context["api_url"] = url
|
||||
log_context["api_url"] = api_url
|
||||
|
||||
with get_logger().contextualize(**log_context):
|
||||
background_tasks.add_task(PRAgent().handle_request, url, body)
|
||||
await PRAgent().handle_request(api_url, body)
|
||||
|
||||
|
||||
async def _perform_commands_gitlab(commands_conf: str, agent: PRAgent, api_url: str, log_context: dict):
|
||||
async def _perform_commands_gitlab(commands_conf: str, agent: PRAgent, api_url: str,
|
||||
log_context: dict):
|
||||
apply_repo_settings(api_url)
|
||||
commands = get_settings().get(f"gitlab.{commands_conf}", {})
|
||||
for command in commands:
|
||||
@ -48,19 +74,22 @@ async def _perform_commands_gitlab(commands_conf: str, agent: PRAgent, api_url:
|
||||
get_logger().error(f"Failed to perform command {command}: {e}")
|
||||
|
||||
|
||||
|
||||
@router.post("/webhook")
|
||||
async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request):
|
||||
start_time = datetime.now()
|
||||
request_json = await request.json()
|
||||
|
||||
async def inner(data: dict):
|
||||
log_context = {"server_type": "gitlab_app"}
|
||||
get_logger().debug("Received a GitLab webhook")
|
||||
|
||||
# Check if the request is authorized
|
||||
if request.headers.get("X-Gitlab-Token") and secret_provider:
|
||||
request_token = request.headers.get("X-Gitlab-Token")
|
||||
secret = secret_provider.get_secret(request_token)
|
||||
try:
|
||||
secret_dict = json.loads(secret)
|
||||
gitlab_token = secret_dict["gitlab_token"]
|
||||
log_context["sender"] = secret_dict.get("token_name", secret_dict.get("id", "unknown"))
|
||||
log_context["token_id"] = secret_dict.get("token_name", secret_dict.get("id", "unknown"))
|
||||
context["settings"] = copy.deepcopy(global_settings)
|
||||
context["settings"].gitlab.personal_access_token = gitlab_token
|
||||
except Exception as e:
|
||||
@ -69,24 +98,32 @@ async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request):
|
||||
elif get_settings().get("GITLAB.SHARED_SECRET"):
|
||||
secret = get_settings().get("GITLAB.SHARED_SECRET")
|
||||
if not request.headers.get("X-Gitlab-Token") == secret:
|
||||
get_logger().error(f"Failed to validate secret")
|
||||
get_logger().error("Failed to validate secret")
|
||||
return JSONResponse(status_code=status.HTTP_401_UNAUTHORIZED, content=jsonable_encoder({"message": "unauthorized"}))
|
||||
else:
|
||||
get_logger().error(f"Failed to validate secret")
|
||||
get_logger().error("Failed to validate secret")
|
||||
return JSONResponse(status_code=status.HTTP_401_UNAUTHORIZED, content=jsonable_encoder({"message": "unauthorized"}))
|
||||
gitlab_token = get_settings().get("GITLAB.PERSONAL_ACCESS_TOKEN", None)
|
||||
if not gitlab_token:
|
||||
get_logger().error(f"No gitlab token found")
|
||||
get_logger().error("No gitlab token found")
|
||||
return JSONResponse(status_code=status.HTTP_401_UNAUTHORIZED, content=jsonable_encoder({"message": "unauthorized"}))
|
||||
|
||||
data = await request.json()
|
||||
get_logger().info("GitLab data", artifact=data)
|
||||
sender = data.get("user", {}).get("username", "unknown")
|
||||
sender_id = data.get("user", {}).get("id", "unknown")
|
||||
|
||||
# logic to ignore bot users (unlike Github, no direct flag for bot users in gitlab)
|
||||
sender_name = data.get("user", {}).get("name", "unknown").lower()
|
||||
if 'codium' in sender_name or 'bot_' in sender_name or 'bot-' in sender_name or '_bot' in sender_name or '-bot' in sender_name:
|
||||
get_logger().info(f"Skipping bot user: {sender_name}")
|
||||
return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"}))
|
||||
|
||||
log_context["sender"] = sender
|
||||
if data.get('object_kind') == 'merge_request' and data['object_attributes'].get('action') in ['open', 'reopen']:
|
||||
url = data['object_attributes'].get('url')
|
||||
get_logger().info(f"New merge request: {url}")
|
||||
await _perform_commands_gitlab("pr_commands", PRAgent(), url, log_context)
|
||||
elif data.get('object_kind') == 'note' and data['event_type'] == 'note': # comment on MR
|
||||
elif data.get('object_kind') == 'note' and data.get('event_type') == 'note': # comment on MR
|
||||
if 'merge_request' in data:
|
||||
mr = data['merge_request']
|
||||
url = mr.get('url')
|
||||
@ -95,8 +132,34 @@ async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request):
|
||||
if data.get('object_attributes', {}).get('type') == 'DiffNote' and '/ask' in body: # /ask_line
|
||||
body = handle_ask_line(body, data)
|
||||
|
||||
handle_request(background_tasks, url, body, log_context)
|
||||
await handle_request(url, body, log_context, sender_id)
|
||||
elif data.get('object_kind') == 'push' and data.get('event_name') == 'push':
|
||||
try:
|
||||
project_id = data['project_id']
|
||||
commit_sha = data['checkout_sha']
|
||||
url = await get_mr_url_from_commit_sha(commit_sha, gitlab_token, project_id)
|
||||
if not url:
|
||||
get_logger().info(f"No MR found for commit: {commit_sha}")
|
||||
return JSONResponse(status_code=status.HTTP_200_OK,
|
||||
content=jsonable_encoder({"message": "success"}))
|
||||
|
||||
# we need first to apply_repo_settings
|
||||
apply_repo_settings(url)
|
||||
commands_on_push = get_settings().get(f"gitlab.push_commands", {})
|
||||
handle_push_trigger = get_settings().get(f"gitlab.handle_push_trigger", False)
|
||||
if not commands_on_push or not handle_push_trigger:
|
||||
get_logger().info("Push event, but no push commands found or push trigger is disabled")
|
||||
return JSONResponse(status_code=status.HTTP_200_OK,
|
||||
content=jsonable_encoder({"message": "success"}))
|
||||
|
||||
get_logger().debug(f'A push event has been received: {url}')
|
||||
await _perform_commands_gitlab("push_commands", PRAgent(), url, log_context)
|
||||
except Exception as e:
|
||||
get_logger().error(f"Failed to handle push event: {e}")
|
||||
|
||||
background_tasks.add_task(inner, request_json)
|
||||
end_time = datetime.now()
|
||||
get_logger().info(f"Processing time: {end_time - start_time}", request=request_json)
|
||||
return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"}))
|
||||
|
||||
|
||||
@ -113,7 +176,7 @@ def handle_ask_line(body, data):
|
||||
path = data['object_attributes']['position']['new_path']
|
||||
side = 'RIGHT' # if line_range_['start']['type'] == 'new' else 'LEFT'
|
||||
comment_id = data['object_attributes']["discussion_id"]
|
||||
get_logger().info(f"Handling line comment")
|
||||
get_logger().info("Handling line comment")
|
||||
body = f"/ask_line --line_start={start_line} --line_end={end_line} --side={side} --file_name={path} --comment_id={comment_id} {question}"
|
||||
except Exception as e:
|
||||
get_logger().error(f"Failed to handle ask line comment: {e}")
|
||||
@ -124,7 +187,6 @@ def handle_ask_line(body, data):
|
||||
async def root():
|
||||
return {"status": "ok"}
|
||||
|
||||
def start():
|
||||
gitlab_url = get_settings().get("GITLAB.URL", None)
|
||||
if not gitlab_url:
|
||||
raise ValueError("GITLAB.URL is not set")
|
||||
@ -133,6 +195,8 @@ def start():
|
||||
app = FastAPI(middleware=middleware)
|
||||
app.include_router(router)
|
||||
|
||||
|
||||
def start():
|
||||
uvicorn.run(app, host="0.0.0.0", port=3000)
|
||||
|
||||
|
||||
|
191
pr_agent/servers/gunicorn_config.py
Normal file
191
pr_agent/servers/gunicorn_config.py
Normal file
@ -0,0 +1,191 @@
|
||||
import multiprocessing
|
||||
import os
|
||||
|
||||
# from prometheus_client import multiprocess
|
||||
|
||||
# Sample Gunicorn configuration file.
|
||||
|
||||
#
|
||||
# Server socket
|
||||
#
|
||||
# bind - The socket to bind.
|
||||
#
|
||||
# A string of the form: 'HOST', 'HOST:PORT', 'unix:PATH'.
|
||||
# An IP is a valid HOST.
|
||||
#
|
||||
# backlog - The number of pending connections. This refers
|
||||
# to the number of clients that can be waiting to be
|
||||
# served. Exceeding this number results in the client
|
||||
# getting an error when attempting to connect. It should
|
||||
# only affect servers under significant load.
|
||||
#
|
||||
# Must be a positive integer. Generally set in the 64-2048
|
||||
# range.
|
||||
#
|
||||
|
||||
# bind = '0.0.0.0:5000'
|
||||
bind = '0.0.0.0:3000'
|
||||
backlog = 2048
|
||||
|
||||
#
|
||||
# Worker processes
|
||||
#
|
||||
# workers - The number of worker processes that this server
|
||||
# should keep alive for handling requests.
|
||||
#
|
||||
# A positive integer generally in the 2-4 x $(NUM_CORES)
|
||||
# range. You'll want to vary this a bit to find the best
|
||||
# for your particular application's work load.
|
||||
#
|
||||
# worker_class - The type of workers to use. The default
|
||||
# sync class should handle most 'normal' types of work
|
||||
# loads. You'll want to read
|
||||
# http://docs.gunicorn.org/en/latest/design.html#choosing-a-worker-type
|
||||
# for information on when you might want to choose one
|
||||
# of the other worker classes.
|
||||
#
|
||||
# A string referring to a Python path to a subclass of
|
||||
# gunicorn.workers.base.Worker. The default provided values
|
||||
# can be seen at
|
||||
# http://docs.gunicorn.org/en/latest/settings.html#worker-class
|
||||
#
|
||||
# worker_connections - For the eventlet and gevent worker classes
|
||||
# this limits the maximum number of simultaneous clients that
|
||||
# a single process can handle.
|
||||
#
|
||||
# A positive integer generally set to around 1000.
|
||||
#
|
||||
# timeout - If a worker does not notify the master process in this
|
||||
# number of seconds it is killed and a new worker is spawned
|
||||
# to replace it.
|
||||
#
|
||||
# Generally set to thirty seconds. Only set this noticeably
|
||||
# higher if you're sure of the repercussions for sync workers.
|
||||
# For the non sync workers it just means that the worker
|
||||
# process is still communicating and is not tied to the length
|
||||
# of time required to handle a single request.
|
||||
#
|
||||
# keepalive - The number of seconds to wait for the next request
|
||||
# on a Keep-Alive HTTP connection.
|
||||
#
|
||||
# A positive integer. Generally set in the 1-5 seconds range.
|
||||
#
|
||||
|
||||
if os.getenv('GUNICORN_WORKERS', None):
|
||||
workers = int(os.getenv('GUNICORN_WORKERS'))
|
||||
else:
|
||||
cores = multiprocessing.cpu_count()
|
||||
workers = cores * 2 + 1
|
||||
worker_connections = 1000
|
||||
timeout = 240
|
||||
keepalive = 2
|
||||
|
||||
#
|
||||
# spew - Install a trace function that spews every line of Python
|
||||
# that is executed when running the server. This is the
|
||||
# nuclear option.
|
||||
#
|
||||
# True or False
|
||||
#
|
||||
|
||||
spew = False
|
||||
|
||||
#
|
||||
# Server mechanics
|
||||
#
|
||||
# daemon - Detach the main Gunicorn process from the controlling
|
||||
# terminal with a standard fork/fork sequence.
|
||||
#
|
||||
# True or False
|
||||
#
|
||||
# raw_env - Pass environment variables to the execution environment.
|
||||
#
|
||||
# pidfile - The path to a pid file to write
|
||||
#
|
||||
# A path string or None to not write a pid file.
|
||||
#
|
||||
# user - Switch worker processes to run as this user.
|
||||
#
|
||||
# A valid user id (as an integer) or the name of a user that
|
||||
# can be retrieved with a call to pwd.getpwnam(value) or None
|
||||
# to not change the worker process user.
|
||||
#
|
||||
# group - Switch worker process to run as this group.
|
||||
#
|
||||
# A valid group id (as an integer) or the name of a user that
|
||||
# can be retrieved with a call to pwd.getgrnam(value) or None
|
||||
# to change the worker processes group.
|
||||
#
|
||||
# umask - A mask for file permissions written by Gunicorn. Note that
|
||||
# this affects unix socket permissions.
|
||||
#
|
||||
# A valid value for the os.umask(mode) call or a string
|
||||
# compatible with int(value, 0) (0 means Python guesses
|
||||
# the base, so values like "0", "0xFF", "0022" are valid
|
||||
# for decimal, hex, and octal representations)
|
||||
#
|
||||
# tmp_upload_dir - A directory to store temporary request data when
|
||||
# requests are read. This will most likely be disappearing soon.
|
||||
#
|
||||
# A path to a directory where the process owner can write. Or
|
||||
# None to signal that Python should choose one on its own.
|
||||
#
|
||||
|
||||
daemon = False
|
||||
raw_env = []
|
||||
pidfile = None
|
||||
umask = 0
|
||||
user = None
|
||||
group = None
|
||||
tmp_upload_dir = None
|
||||
|
||||
#
|
||||
# Logging
|
||||
#
|
||||
# logfile - The path to a log file to write to.
|
||||
#
|
||||
# A path string. "-" means log to stdout.
|
||||
#
|
||||
# loglevel - The granularity of log output
|
||||
#
|
||||
# A string of "debug", "info", "warning", "error", "critical"
|
||||
#
|
||||
|
||||
errorlog = '-'
|
||||
loglevel = 'info'
|
||||
accesslog = None
|
||||
access_log_format = '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"'
|
||||
|
||||
#
|
||||
# Process naming
|
||||
#
|
||||
# proc_name - A base to use with setproctitle to change the way
|
||||
# that Gunicorn processes are reported in the system process
|
||||
# table. This affects things like 'ps' and 'top'. If you're
|
||||
# going to be running more than one instance of Gunicorn you'll
|
||||
# probably want to set a name to tell them apart. This requires
|
||||
# that you install the setproctitle module.
|
||||
#
|
||||
# A string or None to choose a default of something like 'gunicorn'.
|
||||
#
|
||||
|
||||
proc_name = None
|
||||
|
||||
|
||||
#
|
||||
# Server hooks
|
||||
#
|
||||
# post_fork - Called just after a worker has been forked.
|
||||
#
|
||||
# A callable that takes a server and worker instance
|
||||
# as arguments.
|
||||
#
|
||||
# pre_fork - Called just prior to forking the worker subprocess.
|
||||
#
|
||||
# A callable that accepts the same arguments as after_fork
|
||||
#
|
||||
# pre_exec - Called just prior to forking off a secondary
|
||||
# master process during things like config reloading.
|
||||
#
|
||||
# A callable that takes a server instance as the sole argument.
|
||||
#
|
@ -1,6 +1,6 @@
|
||||
[config]
|
||||
model="gpt-4-turbo-2024-04-09"
|
||||
model_turbo="gpt-4-turbo-2024-04-09"
|
||||
model_turbo="gpt-4o"
|
||||
fallback_models=["gpt-4-0125-preview"]
|
||||
git_provider="github"
|
||||
publish_output=true
|
||||
@ -15,10 +15,13 @@ max_description_tokens = 500
|
||||
max_commits_tokens = 500
|
||||
max_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities.
|
||||
patch_extra_lines = 1
|
||||
secret_provider="google_cloud_storage"
|
||||
secret_provider=""
|
||||
cli_mode=false
|
||||
ai_disclaimer_title="" # Pro feature, title for a collapsible disclaimer to AI outputs
|
||||
ai_disclaimer="" # Pro feature, full text for the AI disclaimer
|
||||
output_relevant_configurations=false
|
||||
large_patch_policy = "clip" # "clip", "skip"
|
||||
is_auto_command=false
|
||||
|
||||
[pr_reviewer] # /review #
|
||||
# enable/disable features
|
||||
@ -26,11 +29,12 @@ require_score_review=false
|
||||
require_tests_review=true
|
||||
require_estimate_effort_to_review=true
|
||||
require_can_be_split_review=false
|
||||
require_security_review=true
|
||||
# soc2
|
||||
require_soc2_ticket=false
|
||||
soc2_ticket_prompt="Does the PR description include a link to ticket in a project management system (e.g., Jira, Asana, Trello, etc.) ?"
|
||||
# general options
|
||||
num_code_suggestions=4
|
||||
num_code_suggestions=0
|
||||
inline_code_comments = false
|
||||
ask_and_reflect=false
|
||||
#automatic_review=true
|
||||
@ -70,7 +74,11 @@ inline_file_summary=false # false, true, 'table'
|
||||
# markers
|
||||
use_description_markers=false
|
||||
include_generated_by_header=true
|
||||
|
||||
# large pr mode 💎
|
||||
enable_large_pr_handling=true
|
||||
max_ai_calls=4
|
||||
async_ai_calls=true
|
||||
mention_extra_files=true
|
||||
#custom_labels = ['Bug fix', 'Tests', 'Bug fix with tests', 'Enhancement', 'Documentation', 'Other']
|
||||
|
||||
[pr_questions] # /ask #
|
||||
@ -78,30 +86,58 @@ enable_help_text=false
|
||||
|
||||
|
||||
[pr_code_suggestions] # /improve #
|
||||
max_context_tokens=8000
|
||||
max_context_tokens=10000
|
||||
num_code_suggestions=4
|
||||
commitable_code_suggestions = false
|
||||
extra_instructions = ""
|
||||
rank_suggestions = false
|
||||
enable_help_text=false
|
||||
persistent_comment=false
|
||||
persistent_comment=true
|
||||
max_history_len=4
|
||||
# enable to apply suggestion 💎
|
||||
apply_suggestions_checkbox=true
|
||||
# suggestions scoring
|
||||
self_reflect_on_suggestions=true
|
||||
suggestions_score_threshold=0 # [0-10]. highly recommend not to set this value above 8, since above it may clip highly relevant suggestions
|
||||
# params for '/improve --extended' mode
|
||||
auto_extended_mode=true
|
||||
num_code_suggestions_per_chunk=5
|
||||
num_code_suggestions_per_chunk=4
|
||||
max_number_of_calls = 3
|
||||
parallel_calls = true
|
||||
rank_extended_suggestions = false
|
||||
final_clip_factor = 0.8
|
||||
# self-review checkbox
|
||||
demand_code_suggestions_self_review=false # add a checkbox for the author to self-review the code suggestions
|
||||
code_suggestions_self_review_text= "**Author self-review**: I have reviewed the PR code suggestions, and addressed the relevant ones."
|
||||
approve_pr_on_self_review=false # Pro feature. if true, the PR will be auto-approved after the author clicks on the self-review checkbox
|
||||
# Suggestion impact
|
||||
publish_post_process_suggestion_impact=true
|
||||
|
||||
[pr_custom_prompt] # /custom_prompt #
|
||||
prompt = """\
|
||||
The code suggestions should focus only on the following:
|
||||
- ...
|
||||
- ...
|
||||
...
|
||||
"""
|
||||
suggestions_score_threshold=0
|
||||
num_code_suggestions_per_chunk=4
|
||||
self_reflect_on_custom_suggestions=true
|
||||
enable_help_text=false
|
||||
|
||||
|
||||
[pr_add_docs] # /add_docs #
|
||||
extra_instructions = ""
|
||||
docs_style = "Sphinx Style" # "Google Style with Args, Returns, Attributes...etc", "Numpy Style", "Sphinx Style", "PEP257", "reStructuredText"
|
||||
docs_style = "Sphinx" # "Google Style with Args, Returns, Attributes...etc", "Numpy Style", "Sphinx Style", "PEP257", "reStructuredText"
|
||||
file = "" # in case there are several components with the same name, you can specify the relevant file
|
||||
class_name = "" # in case there are several methods with the same name in the same file, you can specify the relevant class name
|
||||
|
||||
[pr_update_changelog] # /update_changelog #
|
||||
push_changelog_changes=false
|
||||
extra_instructions = ""
|
||||
|
||||
[pr_analyze] # /analyze #
|
||||
enable_help_text=true
|
||||
|
||||
[pr_test] # /test #
|
||||
extra_instructions = ""
|
||||
@ -116,13 +152,14 @@ enable_help_text=false
|
||||
num_code_suggestions=4
|
||||
extra_instructions = ""
|
||||
file = "" # in case there are several components with the same name, you can specify the relevant file
|
||||
class_name = ""
|
||||
class_name = "" # in case there are several methods with the same name in the same file, you can specify the relevant class name
|
||||
|
||||
[checks] # /checks (pro feature) #
|
||||
enable_auto_checks_feedback=true
|
||||
excluded_checks_list=["lint"] # list of checks to exclude, for example: ["check1", "check2"]
|
||||
persistent_comment=true
|
||||
enable_help_text=true
|
||||
final_update_message = false
|
||||
|
||||
[pr_help] # /help #
|
||||
|
||||
@ -135,20 +172,21 @@ ratelimit_retries = 5
|
||||
base_url = "https://api.github.com"
|
||||
publish_inline_comments_fallback_with_verification = true
|
||||
try_fix_invalid_inline_comments = true
|
||||
app_name = "pr-agent"
|
||||
|
||||
[github_action_config]
|
||||
# auto_review = true # set as env var in .github/workflows/pr-agent.yaml
|
||||
# auto_describe = true # set as env var in .github/workflows/pr-agent.yaml
|
||||
# auto_improve = true # set as env var in .github/workflows/pr-agent.yaml
|
||||
# enable_output = true # set as env var in .github/workflows/pr-agent.yaml
|
||||
|
||||
[github_app]
|
||||
# these toggles allows running the github app from custom deployments
|
||||
bot_user = "github-actions[bot]"
|
||||
override_deployment_type = true
|
||||
# settings for "pull_request" event
|
||||
handle_pr_actions = ['opened', 'reopened', 'ready_for_review']
|
||||
pr_commands = [
|
||||
"/describe",
|
||||
"/describe --pr_description.final_update_message=false",
|
||||
"/review --pr_reviewer.num_code_suggestions=0",
|
||||
"/improve",
|
||||
]
|
||||
@ -167,17 +205,29 @@ ignore_pr_title = []
|
||||
ignore_bot_pr = true
|
||||
|
||||
[gitlab]
|
||||
url = "https://gitlab.com" # URL to the gitlab service
|
||||
# URL to the gitlab service
|
||||
url = "https://gitlab.com"
|
||||
# Polling (either project id or namespace/project_name) syntax can be used
|
||||
projects_to_monitor = ['org_name/repo_name']
|
||||
# Polling trigger
|
||||
magic_word = "AutoReview"
|
||||
# Polling interval
|
||||
polling_interval_seconds = 30
|
||||
pr_commands = [
|
||||
"/describe",
|
||||
"/review --pr_reviewer.num_code_suggestions=0",
|
||||
"/improve",
|
||||
]
|
||||
handle_push_trigger = false
|
||||
push_commands = [
|
||||
"/describe",
|
||||
"/review --pr_reviewer.num_code_suggestions=0",
|
||||
]
|
||||
|
||||
[bitbucket_app]
|
||||
pr_commands = [
|
||||
"/review --pr_reviewer.num_code_suggestions=0",
|
||||
"/improve --pr_code_suggestions.commitable_code_suggestions=true",
|
||||
"/improve --pr_code_suggestions.commitable_code_suggestions=true --pr_code_suggestions.suggestions_score_threshold=7",
|
||||
]
|
||||
|
||||
|
||||
@ -211,6 +261,14 @@ force_update_dataset = false
|
||||
max_issues_to_scan = 500
|
||||
vectordb = "pinecone"
|
||||
|
||||
[pr_find_similar_component]
|
||||
class_name = ""
|
||||
file = ""
|
||||
search_from_org = false
|
||||
allow_fallback_less_words = true
|
||||
number_of_keywords = 5
|
||||
number_of_results = 5
|
||||
|
||||
[pinecone]
|
||||
# fill and place in .secrets.toml
|
||||
#api_key = ...
|
||||
|
@ -8,4 +8,5 @@ glob = [
|
||||
regex = [
|
||||
# Ignore files and directories matching these regex patterns.
|
||||
# See https://learnbyexample.github.io/python-regex-cheatsheet/
|
||||
# for example: regex = ['.*\.toml$']
|
||||
]
|
||||
|
@ -44,6 +44,7 @@ default = [
|
||||
'ss',
|
||||
'svg',
|
||||
'tar',
|
||||
'tgz',
|
||||
'tsv',
|
||||
'ttf',
|
||||
'war',
|
||||
|
@ -1,8 +1,9 @@
|
||||
[pr_code_suggestions_prompt]
|
||||
system="""You are PR-Reviewer, a language model that specializes in suggesting code improvements for a Pull Request (PR).
|
||||
Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR diff (lines starting with '+').
|
||||
system="""You are PR-Reviewer, a language model that specializes in suggesting ways to improve for a Pull Request (PR) code.
|
||||
Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR diff.
|
||||
|
||||
Example for the PR Diff format:
|
||||
|
||||
The format we will use to present the PR code diff:
|
||||
======
|
||||
## file: 'src/file1.py'
|
||||
|
||||
@ -26,22 +27,27 @@ __old hunk__
|
||||
## file: 'src/file2.py'
|
||||
...
|
||||
======
|
||||
- In this format, we separated each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed.
|
||||
- We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference.
|
||||
- Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \
|
||||
Suggestions should always focus on ways to improve the new code lines introduced in the PR, meaning lines in the '__new hunk__' sections that begin with a '+' symbol (after the line numbers). The '__old hunk__' sections code is for context and reference only.
|
||||
|
||||
|
||||
Specific instructions:
|
||||
Specific instructions for generating code suggestions:
|
||||
- Provide up to {{ num_code_suggestions }} code suggestions. The suggestions should be diverse and insightful.
|
||||
- The suggestions should refer only to code from the '__new hunk__' sections, and focus on new lines of code (lines starting with '+').
|
||||
- Prioritize suggestions that address major problems, issues and bugs in the PR code. As a second priority, suggestions should focus on enhancement, best practice, performance, maintainability, and other aspects.
|
||||
- The suggestions should focus on improving the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers).
|
||||
- Prioritize suggestions that address possible issues, major problems, and bugs in the PR code.
|
||||
- Don't suggest to add docstring, type hints, or comments, or to remove unused imports.
|
||||
- Suggestions should not repeat code already present in the '__new hunk__' sections.
|
||||
- Provide the exact line numbers range (inclusive) for each suggestion.
|
||||
- Provide the exact line numbers range (inclusive) for each suggestion. Use the line numbers from the '__new hunk__' sections.
|
||||
- When quoting variables or names from the code, use backticks (`) instead of single quote (').
|
||||
- Take into account that you are reviewing a PR code diff, and that the entire codebase is not available for you as context. Hence, avoid suggestions that might conflict with unseen parts of the codebase.
|
||||
|
||||
|
||||
{%- if extra_instructions %}
|
||||
|
||||
|
||||
Extra instructions from the user:
|
||||
Extra instructions from the user, that should be taken into account with high priority:
|
||||
======
|
||||
{{ extra_instructions }}
|
||||
======
|
||||
@ -54,17 +60,12 @@ class CodeSuggestion(BaseModel):
|
||||
relevant_file: str = Field(description="the relevant file full path")
|
||||
language: str = Field(description="the code language of the relevant file")
|
||||
suggestion_content: str = Field(description="an actionable suggestion for meaningfully improving the new code introduced in the PR")
|
||||
{%- if not commitable_code_suggestions_mode %}
|
||||
existing_code: str = Field(description="a short code snippet from a '__new hunk__' section to illustrate the relevant existing code. Don't show the line numbers.")
|
||||
improved_code: str = Field(description="a short code snippet to illustrate the improved code, after applying the suggestion.")
|
||||
existing_code: str = Field(description="a short code snippet, demonstrating the relevant code lines from a '__new hunk__' section. It must be without line numbers. Use abbreviations if needed")
|
||||
improved_code: str = Field(description="a new code snippet, that can be used to replace the relevant 'existing_code' lines in '__new hunk__' code after applying the suggestion")
|
||||
one_sentence_summary: str = Field(description="a short summary of the suggestion action, in a single sentence. Focus on the 'what'. Be general, and avoid method or variable names.")
|
||||
{%- else %}
|
||||
existing_code: str = Field(description="a code snippet, demonstrating the relevant code lines from a '__new hunk__' section. It must be contiguous, correctly formatted and indented, and without line numbers")
|
||||
improved_code: str = Field(description="a new code snippet, that can be used to replace the relevant lines in '__new hunk__' code. Replacement suggestions should be complete, correctly formatted and indented, and without line numbers")
|
||||
{%- endif %}
|
||||
relevant_lines_start: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion starts (inclusive). Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above")
|
||||
relevant_lines_end: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion ends (inclusive). Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above")
|
||||
label: str = Field(description="a single label for the suggestion, to help the user understand the suggestion type. For example: 'security', 'bug', 'performance', 'enhancement', 'possible issue', 'best practice', 'maintainability', etc. Other labels are also allowed")
|
||||
label: str = Field(description="a single label for the suggestion, to help the user understand the suggestion type. For example: 'security', 'possible bug', 'possible issue', 'performance', 'enhancement', 'best practice', 'maintainability', etc. Other labels are also allowed")
|
||||
|
||||
class PRCodeSuggestions(BaseModel):
|
||||
code_suggestions: List[CodeSuggestion]
|
||||
@ -80,7 +81,6 @@ code_suggestions:
|
||||
python
|
||||
suggestion_content: |
|
||||
...
|
||||
{%- if not commitable_code_suggestions_mode %}
|
||||
existing_code: |
|
||||
...
|
||||
improved_code: |
|
||||
@ -89,14 +89,6 @@ code_suggestions:
|
||||
...
|
||||
relevant_lines_start: 12
|
||||
relevant_lines_end: 13
|
||||
{%- else %}
|
||||
existing_code: |
|
||||
...
|
||||
relevant_lines_start: 12
|
||||
relevant_lines_end: 13
|
||||
improved_code: |
|
||||
...
|
||||
{%- endif %}
|
||||
label: |
|
||||
...
|
||||
```
|
||||
@ -119,3 +111,102 @@ The PR Diff:
|
||||
Response (should be a valid YAML, and nothing else):
|
||||
```yaml
|
||||
"""
|
||||
|
||||
|
||||
[pr_code_suggestions_prompt_claude]
|
||||
system="""You are PR-Reviewer, a language model that specializes in suggesting ways to improve for a Pull Request (PR) code.
|
||||
Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR diff.
|
||||
|
||||
|
||||
The format we will use to present the PR code diff:
|
||||
======
|
||||
## file: 'src/file1.py'
|
||||
|
||||
@@ ... @@ def func1():
|
||||
__new hunk__
|
||||
12 code line1 that remained unchanged in the PR
|
||||
13 +new hunk code line2 added in the PR
|
||||
14 code line3 that remained unchanged in the PR
|
||||
__old hunk__
|
||||
code line1 that remained unchanged in the PR
|
||||
-old hunk code line2 that was removed in the PR
|
||||
code line3 that remained unchanged in the PR
|
||||
|
||||
@@ ... @@ def func2():
|
||||
__new hunk__
|
||||
...
|
||||
__old hunk__
|
||||
...
|
||||
|
||||
|
||||
## file: 'src/file2.py'
|
||||
...
|
||||
======
|
||||
- In this format, we separated each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed.
|
||||
- We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference.
|
||||
- Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \
|
||||
Suggestions should always focus on ways to improve the new code lines introduced in the PR, meaning lines in the '__new hunk__' sections that begin with a '+' symbol (after the line numbers). The '__old hunk__' sections code is for context and reference only.
|
||||
|
||||
|
||||
Specific instructions for generating code suggestions:
|
||||
- Provide up to {{ num_code_suggestions }} code suggestions. The suggestions should be diverse and insightful.
|
||||
- The suggestions should focus on improving the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers).
|
||||
- Prioritize suggestions that address possible issues, major problems, and bugs in the PR code.
|
||||
- Don't suggest to add docstring, type hints, or comments, or to remove unused imports.
|
||||
- Provide the exact line numbers range (inclusive) for each suggestion. Use the line numbers from the '__new hunk__' sections.
|
||||
- When quoting variables or names from the code, use backticks (`) instead of single quote (').
|
||||
- Take into account that you are recieving as an input only a PR code diff. The entire codebase is not available for you as context. Hence, avoid suggestions that might conflict with unseen parts of the codebase, like imports, global variables, etc.
|
||||
|
||||
|
||||
{%- if extra_instructions %}
|
||||
|
||||
|
||||
Extra instructions from the user, that should be taken into account with high priority:
|
||||
======
|
||||
{{ extra_instructions }}
|
||||
======
|
||||
{%- endif %}
|
||||
|
||||
|
||||
The output must be a YAML object equivalent to type $PRCodeSuggestions, according to the following Pydantic definitions:
|
||||
=====
|
||||
class CodeSuggestion(BaseModel):
|
||||
relevant_file: str = Field(description="the relevant file full path")
|
||||
language: str = Field(description="the code language of the relevant file")
|
||||
suggestion_content: str = Field(description="an actionable suggestion for meaningfully improving the new code introduced in the PR. Don't present here actual code snippets, just the suggestion. Be short and concise ")
|
||||
existing_code: str = Field(description="a short code snippet, demonstrating the relevant code lines from a '__new hunk__' section. It must be without line numbers. Use abbreviations ("...") if needed")
|
||||
improved_code: str = Field(description="a new code snippet, that can be used to replace the relevant 'existing_code' lines in '__new hunk__' code after applying the suggestion")
|
||||
one_sentence_summary: str = Field(description="a short summary of the suggestion action, in a single sentence. Focus on the 'what'. Be general, and avoid method or variable names.")
|
||||
relevant_lines_start: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion starts (inclusive). Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above")
|
||||
relevant_lines_end: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion ends (inclusive). Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above")
|
||||
label: str = Field(description="a single label for the suggestion, to help understand the suggestion type. For example: 'security', 'possible bug', 'possible issue', 'performance', 'enhancement', 'best practice', 'maintainability', etc. Other labels are also allowed")
|
||||
|
||||
class PRCodeSuggestions(BaseModel):
|
||||
code_suggestions: List[CodeSuggestion]
|
||||
=====
|
||||
|
||||
|
||||
Example output:
|
||||
```yaml
|
||||
code_suggestions:
|
||||
- relevant_file: |
|
||||
src/file1.py
|
||||
language: |
|
||||
python
|
||||
suggestion_content: |
|
||||
...
|
||||
existing_code: |
|
||||
...
|
||||
improved_code: |
|
||||
...
|
||||
one_sentence_summary: |
|
||||
...
|
||||
relevant_lines_start: 12
|
||||
relevant_lines_end: 13
|
||||
label: |
|
||||
...
|
||||
```
|
||||
|
||||
|
||||
Each YAML output MUST be after a newline, indented, with block scalar indicator ('|').
|
||||
"""
|
89
pr_agent/settings/pr_code_suggestions_reflect_prompts.toml
Normal file
89
pr_agent/settings/pr_code_suggestions_reflect_prompts.toml
Normal file
@ -0,0 +1,89 @@
|
||||
[pr_code_suggestions_reflect_prompt]
|
||||
system="""You are a language model that specializes in reviewing and evaluating suggestions for a Pull Request (PR) code.
|
||||
|
||||
Your input is a PR code, and a list of code suggestions that were generated for the PR.
|
||||
Your goal is to inspect, review and score the suggestsions.
|
||||
Be aware - the suggestions may not always be correct or accurate, and you should evaluate them in relation to the actual PR code diff presented. Sometimes the suggestion may ignore parts of the actual code diff, and in that case, you should give it a score of 0.
|
||||
|
||||
Specific instructions:
|
||||
- Carefully review both the suggestion content, and the related PR code diff. Mistakes in the suggestions can occur. Make sure the suggestions are correct, and properly derived from the PR code diff.
|
||||
- In addition to the exact code lines mentioned in each suggestion, review the code around them, to ensure that the suggestions are contextually accurate.
|
||||
- Also check that the 'existing_code' and 'improved_code' fields correctly reflect the suggested changes.
|
||||
- Make sure the suggestions focus on new code introduced in the PR, and not on existing code that was not changed.
|
||||
- High scores (8 to 10) should be given to correct suggestions that address major bugs and issues, or security concerns. Lower scores (3 to 7) should be for correct suggestions addressing minor issues, code style, code readability, maintainability, etc. Don't give high scores to suggestions that are not crucial, and bring only small improvement or optimization.
|
||||
- Order the feedback the same way the suggestions are ordered in the input.
|
||||
|
||||
|
||||
The format that is used to present the PR code diff is as follows:
|
||||
======
|
||||
## file: 'src/file1.py'
|
||||
|
||||
@@ ... @@ def func1():
|
||||
__new hunk__
|
||||
12 code line1 that remained unchanged in the PR
|
||||
13 +new hunk code line2 added in the PR
|
||||
14 code line3 that remained unchanged in the PR
|
||||
__old hunk__
|
||||
code line1 that remained unchanged in the PR
|
||||
-old hunk code line2 that was removed in the PR
|
||||
code line3 that remained unchanged in the PR
|
||||
|
||||
@@ ... @@ def func2():
|
||||
__new hunk__
|
||||
...
|
||||
__old hunk__
|
||||
...
|
||||
|
||||
|
||||
## file: 'src/file2.py'
|
||||
...
|
||||
======
|
||||
- In this format, we separated each hunk of code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code that was removed.
|
||||
- Code lines are prefixed symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code.
|
||||
- We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference.
|
||||
|
||||
|
||||
The output must be a YAML object equivalent to type $PRCodeSuggestionsFeedback, according to the following Pydantic definitions:
|
||||
=====
|
||||
class CodeSuggestionFeedback(BaseModel):
|
||||
suggestion_summary: str = Field(description="repeated from the input")
|
||||
relevant_file: str = Field(description="repeated from the input")
|
||||
suggestion_score: int = Field(description="The actual output - the score of the suggestion, from 0 to 10. Give 0 if the suggestion is plain wrong. Otherwise, give a score from 1 to 10 (inclusive), where 1 is the lowest and 10 is the highest.")
|
||||
why: str = Field(description="Short and concise explanation of why the suggestion received the score (one to two sentences).")
|
||||
|
||||
class PRCodeSuggestionsFeedback(BaseModel):
|
||||
code_suggestions: List[CodeSuggestionFeedback]
|
||||
=====
|
||||
|
||||
|
||||
Example output:
|
||||
```yaml
|
||||
code_suggestions:
|
||||
- suggestion_summary: |
|
||||
Use a more descriptive variable name here
|
||||
relevant_file: "src/file1.py"
|
||||
suggestion_score: 6
|
||||
why: |
|
||||
The variable name 't' is not descriptive enough
|
||||
- ...
|
||||
```
|
||||
|
||||
|
||||
Each YAML output MUST be after a newline, indented, with block scalar indicator ('|').
|
||||
"""
|
||||
|
||||
user="""You are given a Pull Request (PR) code diff:
|
||||
======
|
||||
{{ diff|trim }}
|
||||
======
|
||||
|
||||
|
||||
And here is a list of corresponding {{ num_code_suggestions }} code suggestions to improve this Pull Request code:
|
||||
======
|
||||
{{ suggestion_str|trim }}
|
||||
======
|
||||
|
||||
|
||||
Response (should be a valid YAML, and nothing else):
|
||||
```yaml
|
||||
"""
|
@ -37,7 +37,7 @@ class PRType(str, Enum):
|
||||
|
||||
{%- if enable_semantic_files_types %}
|
||||
|
||||
Class FileDescription(BaseModel):
|
||||
class FileDescription(BaseModel):
|
||||
filename: str = Field(description="the relevant file full path")
|
||||
language: str = Field(description="the relevant file language")
|
||||
changes_summary: str = Field(description="concise summary of the changes in the relevant file, in bullet points (1-4 bullet points).")
|
||||
@ -45,7 +45,7 @@ Class FileDescription(BaseModel):
|
||||
label: str = Field(description="a single semantic label that represents a type of code changes that occurred in the File. Possible values (partial list): 'bug fix', 'tests', 'enhancement', 'documentation', 'error handling', 'configuration changes', 'dependencies', 'formatting', 'miscellaneous', ...")
|
||||
{%- endif %}
|
||||
|
||||
Class PRDescription(BaseModel):
|
||||
class PRDescription(BaseModel):
|
||||
type: List[PRType] = Field(description="one or more types that describe the PR content. Return the label member value (e.g. 'Bug fix', not 'bug_fix')")
|
||||
{%- if enable_semantic_files_types %}
|
||||
pr_files[List[FileDescription]] = Field(max_items=15, description="a list of the files in the PR, and their changes summary.")
|
||||
|
68
pr_agent/settings/pr_evaluate_prompt_response.toml
Normal file
68
pr_agent/settings/pr_evaluate_prompt_response.toml
Normal file
@ -0,0 +1,68 @@
|
||||
[pr_evaluate_prompt]
|
||||
prompt="""\
|
||||
You are the PR-task-evaluator, a language model that compares and ranks the quality of two responses provided in response to a lengthy task regarding a Pull Request (PR) code diff.
|
||||
|
||||
|
||||
The task to be evaluated is:
|
||||
|
||||
***** Start of Task *****
|
||||
{{pr_task|trim}}
|
||||
|
||||
***** End of Task *****
|
||||
|
||||
|
||||
|
||||
Response 1 to the task is:
|
||||
|
||||
***** Start of Response 1 *****
|
||||
|
||||
{{pr_response1|trim}}
|
||||
|
||||
***** End of Response 1 *****
|
||||
|
||||
|
||||
|
||||
Response 2 to the task is:
|
||||
|
||||
***** Start of Response 2 *****
|
||||
|
||||
{{pr_response2|trim}}
|
||||
|
||||
***** End of Response 2 *****
|
||||
|
||||
|
||||
|
||||
Guidelines to evaluate the responses:
|
||||
- Thoroughly read the 'Task' part. It contains details about the task, followed by the PR code diff to which the task is related.
|
||||
- Thoroughly read 'Response1' and 'Response2' parts. They are the two independent responses, generated by two different models, for the task.
|
||||
|
||||
After that, rank each response. Criterions to rank each response:
|
||||
- How well does the response follow the specific task instructions and requirements?
|
||||
- How well does the response analyze and understand the PR code diff?
|
||||
- How well will a person perceive it as a good response that correctly addresses the task?
|
||||
- How well does the response prioritize key feedback, related to the task instructions, that a human reader seeing that feedback would also consider as important?
|
||||
- Don't necessarily rank higher a response that is longer. A shorter response might be better if it is more concise, and still addresses the task better.
|
||||
|
||||
|
||||
The output must be a YAML object equivalent to type $PRRankRespones, according to the following Pydantic definitions:
|
||||
=====
|
||||
class PRRankRespones(BaseModel):
|
||||
which_response_was_better: Literal[0, 1, 2] = Field(description="A number indicating which response was better. 0 means both responses are equally good.")
|
||||
why: str = Field(description="In a short and concise manner, explain why the chosen response is better than the other. Be specific and give examples if relevant.")
|
||||
score_response1: int = Field(description="A score between 1 and 10, indicating the quality of the response1, based on the criterions mentioned in the prompt.")
|
||||
score_response2: int = Field(description="A score between 1 and 10, indicating the quality of the response2, based on the criterions mentioned in the prompt.")
|
||||
=====
|
||||
|
||||
|
||||
Example output:
|
||||
```yaml
|
||||
which_response_was_better: "X"
|
||||
why: "Response X is better because it is more practical, and addresses the task requirements better since ..."
|
||||
score_response1: ...
|
||||
score_response2: ...
|
||||
```
|
||||
|
||||
|
||||
Response (should be a valid YAML, and nothing else):
|
||||
```yaml
|
||||
"""
|
@ -57,7 +57,7 @@ class SubPR(BaseModel):
|
||||
|
||||
class Review(BaseModel):
|
||||
{%- if require_estimate_effort_to_review %}
|
||||
estimated_effort_to_review_[1-5]: str = Field(description="Estimate, on a scale of 1-5 (inclusive), the time and effort required to review this PR by an experienced and knowledgeable developer. 1 means short and easy review , 5 means long and hard review. Take into account the size, complexity, quality, and the needed changes of the PR code diff. Explain your answer in a short and concise manner.")
|
||||
estimated_effort_to_review_[1-5]: int = Field(description="Estimate, on a scale of 1-5 (inclusive), the time and effort required to review this PR by an experienced and knowledgeable developer. 1 means short and easy review , 5 means long and hard review. Take into account the size, complexity, quality, and the needed changes of the PR code diff.")
|
||||
{%- endif %}
|
||||
{%- if require_score %}
|
||||
score: str = Field(description="Rate this PR on a scale of 0-100 (inclusive), where 0 means the worst possible PR code, and 100 means PR code of the highest quality, without any bugs or performance issues, that is ready to be merged immediately and run in production at scale.")
|
||||
@ -68,10 +68,12 @@ class Review(BaseModel):
|
||||
{%- if question_str %}
|
||||
insights_from_user_answers: str = Field(description="shortly summarize the insights you gained from the user's answers to the questions")
|
||||
{%- endif %}
|
||||
possible_issues: str = Field(description="Does this PR code introduce clear issues, bugs, or major performance concerns? If there are no apparent issues, respond with 'No'. If there are any issues, describe them briefly. Use bullet points if more than one issue. Be specific, and provide examples if possible. Start each bullet point with a short specific header, such as: "- Possible Bug: ...", etc.")
|
||||
key_issues_to_review: str = Field(description="Does this PR code introduce issues, bugs, or major performance concerns, which the PR reviewer should further investigate ? If there are no apparent issues, respond with 'None'. If there are any issues, describe them briefly. Use bullet points if more than one issue. Be specific, and provide examples if possible. Start each bullet point with a short specific header, such as: "- Possible Bug: ...", etc.")
|
||||
{%- if require_security_review %}
|
||||
security_concerns: str = Field(description="does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible")
|
||||
{%- endif %}
|
||||
{%- if require_can_be_split_review %}
|
||||
can_be_split: List[SubPR] = Field(min_items=0, max_items=3, description="Can this PR, which contains {{ num_pr_files }} changed files in total, be divided into smaller sub-PRs with distinct tasks that can be reviewed and merged independently, regardless of the order ? Make sure that the sub-PRs are indeed independent, with no code dependencies between them, and that each sub-PR represent a meaningfull independent task. Output an empty list if the PR code does not needd to be split.")
|
||||
can_be_split: List[SubPR] = Field(min_items=0, max_items=3, description="Can this PR, which contains {{ num_pr_files }} changed files in total, be divided into smaller sub-PRs with distinct tasks that can be reviewed and merged independently, regardless of the order ? Make sure that the sub-PRs are indeed independent, with no code dependencies between them, and that each sub-PR represent a meaningful independent task. Output an empty list if the PR code does not need to be split.")
|
||||
{%- endif %}
|
||||
{%- if num_code_suggestions > 0 %}
|
||||
|
||||
@ -99,19 +101,19 @@ Example output:
|
||||
review:
|
||||
{%- if require_estimate_effort_to_review %}
|
||||
estimated_effort_to_review_[1-5]: |
|
||||
3, because ...
|
||||
3
|
||||
{%- endif %}
|
||||
{%- if require_score %}
|
||||
score: 89
|
||||
{%- endif %}
|
||||
relevant_tests: |
|
||||
No
|
||||
possible_issues: |
|
||||
No
|
||||
key_issues_to_review: |
|
||||
...
|
||||
security_concerns: |
|
||||
No
|
||||
{%- if require_can_be_split_review %}
|
||||
can_be_split: |
|
||||
can_be_split:
|
||||
- relevant_files:
|
||||
- ...
|
||||
- ...
|
||||
|
@ -71,10 +71,6 @@ class PRAddDocs:
|
||||
async def _prepare_prediction(self, model: str):
|
||||
get_logger().info('Getting PR diff...')
|
||||
|
||||
# Disable adding docs to scripts and other non-relevant text files
|
||||
from pr_agent.algo.language_handler import bad_extensions
|
||||
bad_extensions += get_settings().docs_blacklist_extensions.docs_blacklist
|
||||
|
||||
self.patches_diff = get_pr_diff(self.git_provider,
|
||||
self.token_handler,
|
||||
model,
|
||||
|
@ -9,20 +9,22 @@ from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
|
||||
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
|
||||
from pr_agent.algo.pr_processing import get_pr_diff, get_pr_multi_diffs, retry_with_fallback_models
|
||||
from pr_agent.algo.token_handler import TokenHandler
|
||||
from pr_agent.algo.utils import load_yaml, replace_code_tags, ModelType
|
||||
from pr_agent.algo.utils import load_yaml, replace_code_tags, ModelType, show_relevant_configurations
|
||||
from pr_agent.config_loader import get_settings
|
||||
from pr_agent.git_providers import get_git_provider
|
||||
from pr_agent.git_providers import get_git_provider, get_git_provider_with_context, GithubProvider, GitLabProvider
|
||||
from pr_agent.git_providers.git_provider import get_main_pr_language
|
||||
from pr_agent.log import get_logger
|
||||
from pr_agent.servers.help import HelpMessage
|
||||
from pr_agent.tools.pr_description import insert_br_after_x_chars
|
||||
import difflib
|
||||
import re
|
||||
|
||||
|
||||
class PRCodeSuggestions:
|
||||
def __init__(self, pr_url: str, cli_mode=False, args: list = None,
|
||||
ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):
|
||||
|
||||
self.git_provider = get_git_provider()(pr_url)
|
||||
self.git_provider = get_git_provider_with_context(pr_url)
|
||||
self.main_language = get_main_pr_language(
|
||||
self.git_provider.get_languages(), self.git_provider.get_files()
|
||||
)
|
||||
@ -34,7 +36,6 @@ class PRCodeSuggestions:
|
||||
get_logger().info(f"Setting max_model_tokens to {MAX_CONTEXT_TOKENS_IMPROVE} for PR improve")
|
||||
get_settings().config.max_model_tokens = MAX_CONTEXT_TOKENS_IMPROVE
|
||||
|
||||
|
||||
# extended mode
|
||||
try:
|
||||
self.is_extended = self._get_is_extended(args or [])
|
||||
@ -57,13 +58,18 @@ class PRCodeSuggestions:
|
||||
"language": self.main_language,
|
||||
"diff": "", # empty diff for initial calculation
|
||||
"num_code_suggestions": num_code_suggestions,
|
||||
"commitable_code_suggestions_mode": get_settings().pr_code_suggestions.commitable_code_suggestions,
|
||||
"extra_instructions": get_settings().pr_code_suggestions.extra_instructions,
|
||||
"commit_messages_str": self.git_provider.get_commit_messages(),
|
||||
}
|
||||
if 'claude' in get_settings().config.model:
|
||||
# prompt for Claude, with minor adjustments
|
||||
self.pr_code_suggestions_prompt_system = get_settings().pr_code_suggestions_prompt_claude.system
|
||||
else:
|
||||
self.pr_code_suggestions_prompt_system = get_settings().pr_code_suggestions_prompt.system
|
||||
|
||||
self.token_handler = TokenHandler(self.git_provider.pr,
|
||||
self.vars,
|
||||
get_settings().pr_code_suggestions_prompt.system,
|
||||
self.pr_code_suggestions_prompt_system,
|
||||
get_settings().pr_code_suggestions_prompt.user)
|
||||
|
||||
self.progress = f"## Generating PR code suggestions\n\n"
|
||||
@ -76,19 +82,21 @@ class PRCodeSuggestions:
|
||||
relevant_configs = {'pr_code_suggestions': dict(get_settings().pr_code_suggestions),
|
||||
'config': dict(get_settings().config)}
|
||||
get_logger().debug("Relevant configs", artifacts=relevant_configs)
|
||||
if get_settings().config.publish_output and get_settings().config.publish_output_progress:
|
||||
if (get_settings().config.publish_output and get_settings().config.publish_output_progress and
|
||||
not get_settings().config.get('is_auto_command', False)):
|
||||
if self.git_provider.is_supported("gfm_markdown"):
|
||||
self.progress_response = self.git_provider.publish_comment(self.progress)
|
||||
else:
|
||||
self.git_provider.publish_comment("Preparing suggestions...", is_temporary=True)
|
||||
|
||||
if not self.is_extended:
|
||||
await retry_with_fallback_models(self._prepare_prediction, ModelType.TURBO)
|
||||
data = self._prepare_pr_code_suggestions()
|
||||
data = await retry_with_fallback_models(self._prepare_prediction)
|
||||
else:
|
||||
data = await retry_with_fallback_models(self._prepare_prediction_extended, ModelType.TURBO)
|
||||
data = await retry_with_fallback_models(self._prepare_prediction_extended)
|
||||
if not data:
|
||||
data = {"code_suggestions": []}
|
||||
|
||||
if (not data) or (not 'code_suggestions' in data) or (not data['code_suggestions']):
|
||||
if data is None or 'code_suggestions' not in data or not data['code_suggestions']:
|
||||
get_logger().error('No code suggestions found for PR.')
|
||||
pr_body = "## PR Code Suggestions ✨\n\nNo code suggestions found for PR."
|
||||
get_logger().debug(f"PR output", artifact=pr_body)
|
||||
@ -105,29 +113,40 @@ class PRCodeSuggestions:
|
||||
|
||||
if get_settings().config.publish_output:
|
||||
self.git_provider.remove_initial_comment()
|
||||
if (not get_settings().pr_code_suggestions.commitable_code_suggestions) and self.git_provider.is_supported("gfm_markdown"):
|
||||
if ((not get_settings().pr_code_suggestions.commitable_code_suggestions) and
|
||||
self.git_provider.is_supported("gfm_markdown")):
|
||||
|
||||
# generate summarized suggestions
|
||||
pr_body = self.generate_summarized_suggestions(data)
|
||||
get_logger().debug(f"PR output", artifact=pr_body)
|
||||
|
||||
# require self-review
|
||||
if get_settings().pr_code_suggestions.demand_code_suggestions_self_review:
|
||||
text = get_settings().pr_code_suggestions.code_suggestions_self_review_text
|
||||
pr_body += f"\n\n- [ ] {text}"
|
||||
if get_settings().pr_code_suggestions.approve_pr_on_self_review:
|
||||
pr_body += ' <!-- approve pr self-review -->'
|
||||
|
||||
# add usage guide
|
||||
if get_settings().pr_code_suggestions.enable_help_text:
|
||||
pr_body += "<hr>\n\n<details> <summary><strong>💡 Tool usage guide:</strong></summary><hr> \n\n"
|
||||
pr_body += HelpMessage.get_improve_usage_guide()
|
||||
pr_body += "\n</details>\n"
|
||||
|
||||
# Output the relevant configurations if enabled
|
||||
if get_settings().get('config', {}).get('output_relevant_configurations', False):
|
||||
pr_body += show_relevant_configurations(relevant_section='pr_code_suggestions')
|
||||
|
||||
if get_settings().pr_code_suggestions.persistent_comment:
|
||||
final_update_message = False
|
||||
self.git_provider.publish_persistent_comment(pr_body,
|
||||
self.publish_persistent_comment_with_history(pr_body,
|
||||
initial_header="## PR Code Suggestions ✨",
|
||||
update_header=True,
|
||||
name="suggestions",
|
||||
final_update_message=final_update_message, )
|
||||
if self.progress_response:
|
||||
self.progress_response.delete()
|
||||
final_update_message=final_update_message,
|
||||
max_previous_comments=get_settings().pr_code_suggestions.max_history_len,
|
||||
progress_response=self.progress_response)
|
||||
else:
|
||||
|
||||
if self.progress_response:
|
||||
self.git_provider.edit_comment(self.progress_response, body=pr_body)
|
||||
else:
|
||||
@ -148,7 +167,109 @@ class PRCodeSuggestions:
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
async def _prepare_prediction(self, model: str):
|
||||
def publish_persistent_comment_with_history(self, pr_comment: str,
|
||||
initial_header: str,
|
||||
update_header: bool = True,
|
||||
name='review',
|
||||
final_update_message=True,
|
||||
max_previous_comments=4,
|
||||
progress_response=None):
|
||||
history_header = f"#### Previous suggestions\n"
|
||||
last_commit_num = self.git_provider.get_latest_commit_url().split('/')[-1][:7]
|
||||
latest_suggestion_header = f"Latest suggestions up to {last_commit_num}"
|
||||
latest_commit_html_comment = f"<!-- {last_commit_num} -->"
|
||||
found_comment = None
|
||||
|
||||
if max_previous_comments > 0:
|
||||
try:
|
||||
prev_comments = list(self.git_provider.get_issue_comments())
|
||||
for comment in prev_comments:
|
||||
if comment.body.startswith(initial_header):
|
||||
prev_suggestions = comment.body
|
||||
found_comment = comment
|
||||
comment_url = self.git_provider.get_comment_url(comment)
|
||||
|
||||
if history_header.strip() not in comment.body:
|
||||
# no history section
|
||||
# extract everything between <table> and </table> in comment.body including <table> and </table>
|
||||
table_index = comment.body.find("<table>")
|
||||
if table_index == -1:
|
||||
self.git_provider.edit_comment(comment, pr_comment)
|
||||
continue
|
||||
# find http link from comment.body[:table_index]
|
||||
up_to_commit_txt = self.extract_link(comment.body[:table_index])
|
||||
prev_suggestion_table = comment.body[table_index:comment.body.rfind("</table>") + len("</table>")]
|
||||
|
||||
tick = "✅ " if "✅" in prev_suggestion_table else ""
|
||||
# surround with details tag
|
||||
prev_suggestion_table = f"<details><summary>{tick}{name.capitalize()}{up_to_commit_txt}</summary>\n<br>{prev_suggestion_table}\n\n</details>"
|
||||
|
||||
new_suggestion_table = pr_comment.replace(initial_header, "").strip()
|
||||
|
||||
pr_comment_updated = f"{initial_header}\n{latest_commit_html_comment}\n\n"
|
||||
pr_comment_updated += f"{latest_suggestion_header}\n{new_suggestion_table}\n\n___\n\n"
|
||||
pr_comment_updated += f"{history_header}{prev_suggestion_table}\n"
|
||||
else:
|
||||
# get the text of the previous suggestions until the latest commit
|
||||
sections = prev_suggestions.split(history_header.strip())
|
||||
latest_table = sections[0].strip()
|
||||
prev_suggestion_table = sections[1].replace(history_header, "").strip()
|
||||
|
||||
# get text after the latest_suggestion_header in comment.body
|
||||
table_ind = latest_table.find("<table>")
|
||||
up_to_commit_txt = self.extract_link(latest_table[:table_ind])
|
||||
|
||||
latest_table = latest_table[table_ind:latest_table.rfind("</table>") + len("</table>")]
|
||||
# enforce max_previous_comments
|
||||
count = prev_suggestions.count(f"\n<details><summary>{name.capitalize()}")
|
||||
count += prev_suggestions.count(f"\n<details><summary>✅ {name.capitalize()}")
|
||||
if count >= max_previous_comments:
|
||||
# remove the oldest suggestion
|
||||
prev_suggestion_table = prev_suggestion_table[:prev_suggestion_table.rfind(f"<details><summary>{name.capitalize()} up to commit")]
|
||||
|
||||
tick = "✅ " if "✅" in latest_table else ""
|
||||
# Add to the prev_suggestions section
|
||||
last_prev_table = f"\n<details><summary>{tick}{name.capitalize()}{up_to_commit_txt}</summary>\n<br>{latest_table}\n\n</details>"
|
||||
prev_suggestion_table = last_prev_table + "\n" + prev_suggestion_table
|
||||
|
||||
new_suggestion_table = pr_comment.replace(initial_header, "").strip()
|
||||
|
||||
pr_comment_updated = f"{initial_header}\n"
|
||||
pr_comment_updated += f"{latest_commit_html_comment}\n\n"
|
||||
pr_comment_updated += f"{latest_suggestion_header}\n\n{new_suggestion_table}\n\n"
|
||||
pr_comment_updated += "___\n\n"
|
||||
pr_comment_updated += f"{history_header}\n"
|
||||
pr_comment_updated += f"{prev_suggestion_table}\n"
|
||||
|
||||
get_logger().info(f"Persistent mode - updating comment {comment_url} to latest {name} message")
|
||||
if progress_response: # publish to 'progress_response' comment, because it refreshes immediately
|
||||
self.git_provider.edit_comment(progress_response, pr_comment_updated)
|
||||
comment.delete()
|
||||
else:
|
||||
self.git_provider.edit_comment(comment, pr_comment_updated)
|
||||
return
|
||||
except Exception as e:
|
||||
get_logger().exception(f"Failed to update persistent review, error: {e}")
|
||||
pass
|
||||
|
||||
# if we are here, we did not find a previous comment to update
|
||||
body = pr_comment.replace(initial_header, "").strip()
|
||||
pr_comment = f"{initial_header}\n\n{latest_commit_html_comment}\n\n{body}\n\n"
|
||||
if progress_response:
|
||||
self.git_provider.edit_comment(progress_response, pr_comment)
|
||||
else:
|
||||
self.git_provider.publish_comment(pr_comment)
|
||||
|
||||
def extract_link(self, s):
|
||||
r = re.compile(r"<!--.*?-->")
|
||||
match = r.search(s)
|
||||
|
||||
up_to_commit_txt = ""
|
||||
if match:
|
||||
up_to_commit_txt = f" up to commit {match.group(0)[4:-3].strip()}"
|
||||
return up_to_commit_txt
|
||||
|
||||
async def _prepare_prediction(self, model: str) -> dict:
|
||||
self.patches_diff = get_pr_diff(self.git_provider,
|
||||
self.token_handler,
|
||||
model,
|
||||
@ -162,16 +283,47 @@ class PRCodeSuggestions:
|
||||
get_logger().error(f"Error getting PR diff")
|
||||
self.prediction = None
|
||||
|
||||
async def _get_prediction(self, model: str, patches_diff: str):
|
||||
data = self.prediction
|
||||
return data
|
||||
|
||||
async def _get_prediction(self, model: str, patches_diff: str) -> dict:
|
||||
variables = copy.deepcopy(self.vars)
|
||||
variables["diff"] = patches_diff # update diff
|
||||
environment = Environment(undefined=StrictUndefined)
|
||||
system_prompt = environment.from_string(get_settings().pr_code_suggestions_prompt.system).render(variables)
|
||||
system_prompt = environment.from_string(self.pr_code_suggestions_prompt_system).render(variables)
|
||||
user_prompt = environment.from_string(get_settings().pr_code_suggestions_prompt.user).render(variables)
|
||||
response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
|
||||
system=system_prompt, user=user_prompt)
|
||||
|
||||
return response
|
||||
# load suggestions from the AI response
|
||||
data = self._prepare_pr_code_suggestions(response)
|
||||
|
||||
# self-reflect on suggestions
|
||||
if get_settings().pr_code_suggestions.self_reflect_on_suggestions:
|
||||
model = get_settings().config.model_turbo # use turbo model for self-reflection, since it is an easier task
|
||||
response_reflect = await self.self_reflect_on_suggestions(data["code_suggestions"], patches_diff,
|
||||
model=model)
|
||||
if response_reflect:
|
||||
response_reflect_yaml = load_yaml(response_reflect)
|
||||
code_suggestions_feedback = response_reflect_yaml["code_suggestions"]
|
||||
if len(code_suggestions_feedback) == len(data["code_suggestions"]):
|
||||
for i, suggestion in enumerate(data["code_suggestions"]):
|
||||
try:
|
||||
suggestion["score"] = code_suggestions_feedback[i]["suggestion_score"]
|
||||
suggestion["score_why"] = code_suggestions_feedback[i]["why"]
|
||||
except Exception as e: #
|
||||
get_logger().error(f"Error processing suggestion score {i}",
|
||||
artifact={"suggestion": suggestion,
|
||||
"code_suggestions_feedback": code_suggestions_feedback[i]})
|
||||
suggestion["score"] = 7
|
||||
suggestion["score_why"] = ""
|
||||
else:
|
||||
# get_logger().error(f"Could not self-reflect on suggestions. using default score 7")
|
||||
for i, suggestion in enumerate(data["code_suggestions"]):
|
||||
suggestion["score"] = 7
|
||||
suggestion["score_why"] = ""
|
||||
|
||||
return data
|
||||
|
||||
@staticmethod
|
||||
def _truncate_if_needed(suggestion):
|
||||
@ -185,40 +337,52 @@ class PRCodeSuggestions:
|
||||
f"characters to {max_code_suggestion_length} characters")
|
||||
return suggestion
|
||||
|
||||
def _prepare_pr_code_suggestions(self) -> Dict:
|
||||
review = self.prediction.strip()
|
||||
data = load_yaml(review,
|
||||
keys_fix_yaml=["relevant_file", "suggestion_content", "existing_code", "improved_code"])
|
||||
def _prepare_pr_code_suggestions(self, predictions: str) -> Dict:
|
||||
data = load_yaml(predictions.strip(),
|
||||
keys_fix_yaml=["relevant_file", "suggestion_content", "existing_code", "improved_code"],
|
||||
first_key="code_suggestions",last_key="label")
|
||||
if isinstance(data, list):
|
||||
data = {'code_suggestions': data}
|
||||
|
||||
# remove invalid suggestions
|
||||
# remove or edit invalid suggestions
|
||||
suggestion_list = []
|
||||
one_sentence_summary_list = []
|
||||
for i, suggestion in enumerate(data['code_suggestions']):
|
||||
try:
|
||||
if not get_settings().pr_code_suggestions.commitable_code_suggestions:
|
||||
if not suggestion or 'one_sentence_summary' not in suggestion or 'label' not in suggestion or 'relevant_file' not in suggestion:
|
||||
get_logger().debug(f"Skipping suggestion {i + 1}, because it is invalid: {suggestion}")
|
||||
needed_keys = ['one_sentence_summary', 'label', 'relevant_file', 'relevant_lines_start', 'relevant_lines_end']
|
||||
is_valid_keys = True
|
||||
for key in needed_keys:
|
||||
if key not in suggestion:
|
||||
is_valid_keys = False
|
||||
get_logger().debug(f"Skipping suggestion {i + 1}, because it does not contain '{key}':\n'{suggestion}")
|
||||
break
|
||||
if not is_valid_keys:
|
||||
continue
|
||||
|
||||
if suggestion['one_sentence_summary'] in one_sentence_summary_list:
|
||||
get_logger().debug(f"Skipping suggestion {i + 1}, because it is a duplicate: {suggestion}")
|
||||
continue
|
||||
|
||||
if 'const' in suggestion['suggestion_content'] and 'instead' in suggestion['suggestion_content'] and 'let' in suggestion['suggestion_content']:
|
||||
get_logger().debug(f"Skipping suggestion {i + 1}, because it uses 'const instead let': {suggestion}")
|
||||
if 'const' in suggestion['suggestion_content'] and 'instead' in suggestion[
|
||||
'suggestion_content'] and 'let' in suggestion['suggestion_content']:
|
||||
get_logger().debug(
|
||||
f"Skipping suggestion {i + 1}, because it uses 'const instead let': {suggestion}")
|
||||
continue
|
||||
|
||||
if ('existing_code' in suggestion) and ('improved_code' in suggestion) and (
|
||||
suggestion['existing_code'] != suggestion['improved_code']):
|
||||
if ('existing_code' in suggestion) and ('improved_code' in suggestion):
|
||||
if suggestion['existing_code'] == suggestion['improved_code']:
|
||||
get_logger().debug(
|
||||
f"edited improved suggestion {i + 1}, because equal to existing code: {suggestion['existing_code']}")
|
||||
if get_settings().pr_code_suggestions.commitable_code_suggestions:
|
||||
suggestion['improved_code'] = "" # we need 'existing_code' to locate the code in the PR
|
||||
else:
|
||||
suggestion['existing_code'] = ""
|
||||
suggestion = self._truncate_if_needed(suggestion)
|
||||
if not get_settings().pr_code_suggestions.commitable_code_suggestions:
|
||||
one_sentence_summary_list.append(suggestion['one_sentence_summary'])
|
||||
suggestion_list.append(suggestion)
|
||||
else:
|
||||
get_logger().debug(
|
||||
f"Skipping suggestion {i + 1}, because existing code is equal to improved code {suggestion['existing_code']}")
|
||||
get_logger().info(
|
||||
f"Skipping suggestion {i + 1}, because it does not contain 'existing_code' or 'improved_code': {suggestion}")
|
||||
except Exception as e:
|
||||
get_logger().error(f"Error processing suggestion {i + 1}: {suggestion}, error: {e}")
|
||||
data['code_suggestions'] = suggestion_list
|
||||
@ -231,12 +395,15 @@ class PRCodeSuggestions:
|
||||
if not data['code_suggestions']:
|
||||
get_logger().info('No suggestions found to improve this PR.')
|
||||
if self.progress_response:
|
||||
return self.git_provider.edit_comment(self.progress_response, body='No suggestions found to improve this PR.')
|
||||
return self.git_provider.edit_comment(self.progress_response,
|
||||
body='No suggestions found to improve this PR.')
|
||||
else:
|
||||
return self.git_provider.publish_comment('No suggestions found to improve this PR.')
|
||||
|
||||
for d in data['code_suggestions']:
|
||||
try:
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
get_logger().info(f"suggestion: {d}")
|
||||
relevant_file = d['relevant_file'].strip()
|
||||
relevant_lines_start = int(d['relevant_lines_start']) # absolute position
|
||||
relevant_lines_end = int(d['relevant_lines_end'])
|
||||
@ -247,6 +414,9 @@ class PRCodeSuggestions:
|
||||
if new_code_snippet:
|
||||
new_code_snippet = self.dedent_code(relevant_file, relevant_lines_start, new_code_snippet)
|
||||
|
||||
if d.get('score'):
|
||||
body = f"**Suggestion:** {content} [{label}, importance: {d.get('score')}]\n```suggestion\n" + new_code_snippet + "\n```"
|
||||
else:
|
||||
body = f"**Suggestion:** {content} [{label}]\n```suggestion\n" + new_code_snippet + "\n```"
|
||||
code_suggestions.append({'body': body, 'relevant_file': relevant_file,
|
||||
'relevant_lines_start': relevant_lines_start,
|
||||
@ -296,7 +466,8 @@ class PRCodeSuggestions:
|
||||
self.patches_diff_list = get_pr_multi_diffs(self.git_provider, self.token_handler, model,
|
||||
max_calls=get_settings().pr_code_suggestions.max_number_of_calls)
|
||||
if self.patches_diff_list:
|
||||
get_logger().debug(f"PR diff", artifact=self.patches_diff_list)
|
||||
get_logger().info(f"Number of PR chunk calls: {len(self.patches_diff_list)}")
|
||||
get_logger().debug(f"PR diff:", artifact=self.patches_diff_list)
|
||||
|
||||
# parallelize calls to AI:
|
||||
if get_settings().pr_code_suggestions.parallel_calls:
|
||||
@ -309,14 +480,24 @@ class PRCodeSuggestions:
|
||||
prediction = await self._get_prediction(model, patches_diff)
|
||||
prediction_list.append(prediction)
|
||||
|
||||
data = {}
|
||||
for prediction in prediction_list:
|
||||
self.prediction = prediction
|
||||
data_per_chunk = self._prepare_pr_code_suggestions()
|
||||
if "code_suggestions" in data:
|
||||
data["code_suggestions"].extend(data_per_chunk["code_suggestions"])
|
||||
data = {"code_suggestions": []}
|
||||
for j, predictions in enumerate(prediction_list): # each call adds an element to the list
|
||||
if "code_suggestions" in predictions:
|
||||
score_threshold = max(1, get_settings().pr_code_suggestions.suggestions_score_threshold)
|
||||
for i, prediction in enumerate(predictions["code_suggestions"]):
|
||||
try:
|
||||
if get_settings().pr_code_suggestions.self_reflect_on_suggestions:
|
||||
score = int(prediction["score"])
|
||||
if score >= score_threshold:
|
||||
data["code_suggestions"].append(prediction)
|
||||
else:
|
||||
data.update(data_per_chunk)
|
||||
get_logger().info(
|
||||
f"Removing suggestions {i} from call {j}, because score is {score}, and score_threshold is {score_threshold}",
|
||||
artifact=prediction)
|
||||
else:
|
||||
data["code_suggestions"].append(prediction)
|
||||
except Exception as e:
|
||||
get_logger().error(f"Error getting PR diff for suggestion {i} in call {j}, error: {e}")
|
||||
self.data = data
|
||||
else:
|
||||
get_logger().error(f"Error getting PR diff")
|
||||
@ -397,10 +578,13 @@ class PRCodeSuggestions:
|
||||
pr_body = "## PR Code Suggestions ✨\n\n"
|
||||
|
||||
pr_body += "<table>"
|
||||
header = f"Suggestions"
|
||||
delta = 76
|
||||
header = f"Suggestion"
|
||||
delta = 66
|
||||
header += " " * delta
|
||||
pr_body += f"""<thead><tr><td>Category</td><td align=left>{header}</td></tr></thead>"""
|
||||
if get_settings().pr_code_suggestions.self_reflect_on_suggestions:
|
||||
pr_body += f"""<thead><tr><td>Category</td><td align=left>{header}</td><td align=center>Score</td></tr>"""
|
||||
else:
|
||||
pr_body += f"""<thead><tr><td>Category</td><td align=left>{header}</td></tr>"""
|
||||
pr_body += """<tbody>"""
|
||||
suggestions_labels = dict()
|
||||
# add all suggestions related to each label
|
||||
@ -410,6 +594,15 @@ class PRCodeSuggestions:
|
||||
suggestions_labels[label] = []
|
||||
suggestions_labels[label].append(suggestion)
|
||||
|
||||
# sort suggestions_labels by the suggestion with the highest score
|
||||
if get_settings().pr_code_suggestions.self_reflect_on_suggestions:
|
||||
suggestions_labels = dict(
|
||||
sorted(suggestions_labels.items(), key=lambda x: max([s['score'] for s in x[1]]), reverse=True))
|
||||
# sort the suggestions inside each label group by score
|
||||
for label, suggestions in suggestions_labels.items():
|
||||
suggestions_labels[label] = sorted(suggestions, key=lambda x: x['score'], reverse=True)
|
||||
|
||||
counter_suggestions = 0
|
||||
for label, suggestions in suggestions_labels.items():
|
||||
num_suggestions = len(suggestions)
|
||||
pr_body += f"""<tr><td rowspan={num_suggestions}><strong>{label.capitalize()}</strong></td>\n"""
|
||||
@ -423,8 +616,12 @@ class PRCodeSuggestions:
|
||||
range_str = f"[{relevant_lines_start}]"
|
||||
else:
|
||||
range_str = f"[{relevant_lines_start}-{relevant_lines_end}]"
|
||||
|
||||
try:
|
||||
code_snippet_link = self.git_provider.get_line_link(relevant_file, relevant_lines_start,
|
||||
relevant_lines_end)
|
||||
except:
|
||||
code_snippet_link = ""
|
||||
# add html table for each suggestion
|
||||
|
||||
suggestion_content = suggestion['suggestion_content'].rstrip().rstrip()
|
||||
@ -445,22 +642,31 @@ class PRCodeSuggestions:
|
||||
pr_body += f"""<td>\n\n"""
|
||||
else:
|
||||
pr_body += f"""<tr><td>\n\n"""
|
||||
suggestion_summary = suggestion['one_sentence_summary'].strip()
|
||||
suggestion_summary = suggestion['one_sentence_summary'].strip().rstrip('.')
|
||||
if '`' in suggestion_summary:
|
||||
suggestion_summary = replace_code_tags(suggestion_summary)
|
||||
# suggestion_summary = suggestion_summary + max((77-len(suggestion_summary)), 0)*" "
|
||||
pr_body += f"""\n\n<details><summary>{suggestion_summary}</summary>\n\n___\n\n"""
|
||||
|
||||
pr_body += f"""\n\n<details><summary>{suggestion_summary}</summary>\n\n___\n\n"""
|
||||
pr_body += f"""
|
||||
**{suggestion_content}**
|
||||
|
||||
[{relevant_file} {range_str}]({code_snippet_link})
|
||||
|
||||
{example_code}
|
||||
{example_code.rstrip()}
|
||||
"""
|
||||
if get_settings().pr_code_suggestions.self_reflect_on_suggestions:
|
||||
pr_body += f"<details><summary>Suggestion importance[1-10]: {suggestion['score']}</summary>\n\n"
|
||||
pr_body += f"Why: {suggestion['score_why']}\n\n"
|
||||
pr_body += f"</details>"
|
||||
pr_body += f"</td></tr>"
|
||||
|
||||
pr_body += f"</details>"
|
||||
|
||||
# # add another column for 'score'
|
||||
if get_settings().pr_code_suggestions.self_reflect_on_suggestions:
|
||||
pr_body += f"</td><td align=center>{suggestion['score']}\n\n"
|
||||
|
||||
pr_body += f"</td></tr>"
|
||||
counter_suggestions += 1
|
||||
|
||||
# pr_body += "</details>"
|
||||
# pr_body += """</td></tr>"""
|
||||
@ -469,3 +675,32 @@ class PRCodeSuggestions:
|
||||
except Exception as e:
|
||||
get_logger().info(f"Failed to publish summarized code suggestions, error: {e}")
|
||||
return ""
|
||||
|
||||
async def self_reflect_on_suggestions(self, suggestion_list: List, patches_diff: str, model: str) -> str:
|
||||
if not suggestion_list:
|
||||
return ""
|
||||
|
||||
try:
|
||||
suggestion_str = ""
|
||||
for i, suggestion in enumerate(suggestion_list):
|
||||
suggestion_str += f"suggestion {i + 1}: " + str(suggestion) + '\n\n'
|
||||
|
||||
variables = {'suggestion_list': suggestion_list,
|
||||
'suggestion_str': suggestion_str,
|
||||
"diff": patches_diff,
|
||||
'num_code_suggestions': len(suggestion_list)}
|
||||
environment = Environment(undefined=StrictUndefined)
|
||||
system_prompt_reflect = environment.from_string(
|
||||
get_settings().pr_code_suggestions_reflect_prompt.system).render(
|
||||
variables)
|
||||
user_prompt_reflect = environment.from_string(
|
||||
get_settings().pr_code_suggestions_reflect_prompt.user).render(variables)
|
||||
with get_logger().contextualize(command="self_reflect_on_suggestions"):
|
||||
response_reflect, finish_reason_reflect = await self.ai_handler.chat_completion(model=model,
|
||||
system=system_prompt_reflect,
|
||||
user=user_prompt_reflect)
|
||||
except Exception as e:
|
||||
get_logger().info(f"Could not reflect on suggestions, error: {e}")
|
||||
return ""
|
||||
return response_reflect
|
||||
|
||||
|
@ -1,17 +1,22 @@
|
||||
import asyncio
|
||||
import copy
|
||||
import re
|
||||
from functools import partial
|
||||
from typing import List, Tuple
|
||||
|
||||
import yaml
|
||||
from jinja2 import Environment, StrictUndefined
|
||||
|
||||
from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
|
||||
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
|
||||
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
|
||||
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models, get_pr_diff_multiple_patchs, \
|
||||
OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD
|
||||
from pr_agent.algo.token_handler import TokenHandler
|
||||
from pr_agent.algo.utils import load_yaml, set_custom_labels, get_user_labels, ModelType
|
||||
from pr_agent.algo.utils import set_custom_labels
|
||||
from pr_agent.algo.utils import load_yaml, get_user_labels, ModelType, show_relevant_configurations, get_max_tokens, \
|
||||
clip_tokens
|
||||
from pr_agent.config_loader import get_settings
|
||||
from pr_agent.git_providers import get_git_provider
|
||||
from pr_agent.git_providers import get_git_provider, GithubProvider, get_git_provider_with_context
|
||||
from pr_agent.git_providers.git_provider import get_main_pr_language
|
||||
from pr_agent.log import get_logger
|
||||
from pr_agent.servers.help import HelpMessage
|
||||
@ -28,7 +33,7 @@ class PRDescription:
|
||||
args (list, optional): List of arguments passed to the PRDescription class. Defaults to None.
|
||||
"""
|
||||
# Initialize the git provider and main PR language
|
||||
self.git_provider = get_git_provider()(pr_url)
|
||||
self.git_provider = get_git_provider_with_context(pr_url)
|
||||
self.main_pr_language = get_main_pr_language(
|
||||
self.git_provider.get_languages(), self.git_provider.get_files()
|
||||
)
|
||||
@ -43,7 +48,6 @@ class PRDescription:
|
||||
self.ai_handler = ai_handler()
|
||||
self.ai_handler.main_pr_language = self.main_pr_language
|
||||
|
||||
|
||||
# Initialize the variables dictionary
|
||||
self.vars = {
|
||||
"title": self.git_provider.pr.title,
|
||||
@ -57,6 +61,7 @@ class PRDescription:
|
||||
"custom_labels_class": "", # will be filled if necessary in 'set_custom_labels' function
|
||||
"enable_semantic_files_types": get_settings().pr_description.enable_semantic_files_types,
|
||||
}
|
||||
|
||||
self.user_description = self.git_provider.get_user_description()
|
||||
|
||||
# Initialize the token handler
|
||||
@ -79,10 +84,10 @@ class PRDescription:
|
||||
relevant_configs = {'pr_description': dict(get_settings().pr_description),
|
||||
'config': dict(get_settings().config)}
|
||||
get_logger().debug("Relevant configs", artifacts=relevant_configs)
|
||||
if get_settings().config.publish_output:
|
||||
if get_settings().config.publish_output and not get_settings().config.get('is_auto_command', False):
|
||||
self.git_provider.publish_comment("Preparing PR description...", is_temporary=True)
|
||||
|
||||
await retry_with_fallback_models(self._prepare_prediction, ModelType.TURBO) # turbo model because larger context
|
||||
await retry_with_fallback_models(self._prepare_prediction, ModelType.TURBO)
|
||||
|
||||
if self.prediction:
|
||||
self._prepare_data()
|
||||
@ -116,6 +121,10 @@ class PRDescription:
|
||||
pr_body += "\n\n___\n\n> 💡 **PR-Agent usage**:"
|
||||
pr_body += "\n>Comment `/help` on the PR to get a list of all available PR-Agent tools and their descriptions\n\n"
|
||||
|
||||
# Output the relevant configurations if enabled
|
||||
if get_settings().get('config', {}).get('output_relevant_configurations', False):
|
||||
pr_body += show_relevant_configurations(relevant_section='pr_description')
|
||||
|
||||
if get_settings().config.publish_output:
|
||||
# publish labels
|
||||
if get_settings().pr_description.publish_labels and self.git_provider.is_supported("get_labels"):
|
||||
@ -160,32 +169,159 @@ class PRDescription:
|
||||
if get_settings().pr_description.use_description_markers and 'pr_agent:' not in self.user_description:
|
||||
return None
|
||||
|
||||
self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model)
|
||||
if self.patches_diff:
|
||||
large_pr_handling = get_settings().pr_description.enable_large_pr_handling and "pr_description_only_files_prompts" in get_settings()
|
||||
output = get_pr_diff(self.git_provider, self.token_handler, model, large_pr_handling=large_pr_handling, return_remaining_files=True)
|
||||
if isinstance(output, tuple):
|
||||
patches_diff, remaining_files_list = output
|
||||
else:
|
||||
patches_diff = output
|
||||
remaining_files_list = []
|
||||
if not large_pr_handling or patches_diff:
|
||||
self.patches_diff = patches_diff
|
||||
if patches_diff:
|
||||
get_logger().debug(f"PR diff", artifact=self.patches_diff)
|
||||
self.prediction = await self._get_prediction(model)
|
||||
self.prediction = await self._get_prediction(model, patches_diff, prompt="pr_description_prompt")
|
||||
if (remaining_files_list and 'pr_files' in self.prediction and 'label:' in self.prediction and
|
||||
get_settings().pr_description.mention_extra_files):
|
||||
get_logger().debug(f"Extending additional files, {len(remaining_files_list)} files")
|
||||
self.prediction = await self.extend_additional_files(remaining_files_list)
|
||||
else:
|
||||
get_logger().error(f"Error getting PR diff {self.pr_id}")
|
||||
self.prediction = None
|
||||
else:
|
||||
# get the diff in multiple patches, with the token handler only for the files prompt
|
||||
get_logger().debug('large_pr_handling for describe')
|
||||
token_handler_only_files_prompt = TokenHandler(
|
||||
self.git_provider.pr,
|
||||
self.vars,
|
||||
get_settings().pr_description_only_files_prompts.system,
|
||||
get_settings().pr_description_only_files_prompts.user,
|
||||
)
|
||||
(patches_compressed_list, total_tokens_list, deleted_files_list, remaining_files_list, file_dict,
|
||||
files_in_patches_list) = get_pr_diff_multiple_patchs(
|
||||
self.git_provider, token_handler_only_files_prompt, model)
|
||||
|
||||
async def _get_prediction(self, model: str) -> str:
|
||||
# get the files prediction for each patch
|
||||
if not get_settings().pr_description.async_ai_calls:
|
||||
results = []
|
||||
for i, patches in enumerate(patches_compressed_list): # sync calls
|
||||
patches_diff = "\n".join(patches)
|
||||
get_logger().debug(f"PR diff number {i + 1} for describe files")
|
||||
prediction_files = await self._get_prediction(model, patches_diff,
|
||||
prompt="pr_description_only_files_prompts")
|
||||
results.append(prediction_files)
|
||||
else: # async calls
|
||||
tasks = []
|
||||
for i, patches in enumerate(patches_compressed_list):
|
||||
patches_diff = "\n".join(patches)
|
||||
get_logger().debug(f"PR diff number {i + 1} for describe files")
|
||||
task = asyncio.create_task(
|
||||
self._get_prediction(model, patches_diff, prompt="pr_description_only_files_prompts"))
|
||||
tasks.append(task)
|
||||
# Wait for all tasks to complete
|
||||
results = await asyncio.gather(*tasks)
|
||||
file_description_str_list = []
|
||||
for i, result in enumerate(results):
|
||||
prediction_files = result.strip().removeprefix('```yaml').strip('`').strip()
|
||||
if load_yaml(prediction_files) and prediction_files.startswith('pr_files'):
|
||||
prediction_files = prediction_files.removeprefix('pr_files:').strip()
|
||||
file_description_str_list.append(prediction_files)
|
||||
else:
|
||||
get_logger().debug(f"failed to generate predictions in iteration {i + 1} for describe files")
|
||||
|
||||
# generate files_walkthrough string, with proper token handling
|
||||
token_handler_only_description_prompt = TokenHandler(
|
||||
self.git_provider.pr,
|
||||
self.vars,
|
||||
get_settings().pr_description_only_description_prompts.system,
|
||||
get_settings().pr_description_only_description_prompts.user)
|
||||
files_walkthrough = "\n".join(file_description_str_list)
|
||||
files_walkthrough_prompt = copy.deepcopy(files_walkthrough)
|
||||
if remaining_files_list:
|
||||
files_walkthrough_prompt += "\n\nNo more token budget. Additional unprocessed files:"
|
||||
for file in remaining_files_list:
|
||||
files_walkthrough_prompt += f"\n- {file}"
|
||||
if deleted_files_list:
|
||||
files_walkthrough_prompt += "\n\nAdditional deleted files:"
|
||||
for file in deleted_files_list:
|
||||
files_walkthrough_prompt += f"\n- {file}"
|
||||
tokens_files_walkthrough = len(
|
||||
token_handler_only_description_prompt.encoder.encode(files_walkthrough_prompt))
|
||||
total_tokens = token_handler_only_description_prompt.prompt_tokens + tokens_files_walkthrough
|
||||
max_tokens_model = get_max_tokens(model)
|
||||
if total_tokens > max_tokens_model - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD:
|
||||
# clip files_walkthrough to git the tokens within the limit
|
||||
files_walkthrough_prompt = clip_tokens(files_walkthrough_prompt,
|
||||
max_tokens_model - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD - token_handler_only_description_prompt.prompt_tokens,
|
||||
num_input_tokens=tokens_files_walkthrough)
|
||||
|
||||
# PR header inference
|
||||
get_logger().debug(f"PR diff only description", artifact=files_walkthrough_prompt)
|
||||
prediction_headers = await self._get_prediction(model, patches_diff=files_walkthrough_prompt,
|
||||
prompt="pr_description_only_description_prompts")
|
||||
prediction_headers = prediction_headers.strip().removeprefix('```yaml').strip('`').strip()
|
||||
|
||||
# manually add extra files to final prediction
|
||||
if get_settings().pr_description.mention_extra_files:
|
||||
for file in remaining_files_list:
|
||||
extra_file_yaml = f"""\
|
||||
- filename: |
|
||||
{file}
|
||||
changes_summary: |
|
||||
...
|
||||
changes_title: |
|
||||
...
|
||||
label: |
|
||||
additional files (token-limit)
|
||||
"""
|
||||
Generate an AI prediction for the PR description based on the provided model.
|
||||
files_walkthrough = files_walkthrough.strip() + "\n" + extra_file_yaml.strip()
|
||||
# final processing
|
||||
self.prediction = prediction_headers + "\n" + "pr_files:\n" + files_walkthrough
|
||||
if not load_yaml(self.prediction):
|
||||
get_logger().error(f"Error getting valid YAML in large PR handling for describe {self.pr_id}")
|
||||
if load_yaml(prediction_headers):
|
||||
get_logger().debug(f"Using only headers for describe {self.pr_id}")
|
||||
self.prediction = prediction_headers
|
||||
|
||||
Args:
|
||||
model (str): The name of the model to be used for generating the prediction.
|
||||
|
||||
Returns:
|
||||
str: The generated AI prediction.
|
||||
async def extend_additional_files(self, remaining_files_list) -> str:
|
||||
prediction = self.prediction
|
||||
try:
|
||||
original_prediction_dict = load_yaml(self.prediction)
|
||||
prediction_extra = "pr_files:"
|
||||
for file in remaining_files_list:
|
||||
extra_file_yaml = f"""\
|
||||
- filename: |
|
||||
{file}
|
||||
changes_summary: |
|
||||
...
|
||||
changes_title: |
|
||||
...
|
||||
label: |
|
||||
additional files (token-limit)
|
||||
"""
|
||||
prediction_extra = prediction_extra + "\n" + extra_file_yaml.strip()
|
||||
prediction_extra_dict = load_yaml(prediction_extra)
|
||||
# merge the two dictionaries
|
||||
if isinstance(original_prediction_dict, dict) and isinstance(prediction_extra_dict, dict):
|
||||
original_prediction_dict["pr_files"].extend(prediction_extra_dict["pr_files"])
|
||||
new_yaml = yaml.dump(original_prediction_dict)
|
||||
if load_yaml(new_yaml):
|
||||
prediction = new_yaml
|
||||
return prediction
|
||||
except Exception as e:
|
||||
get_logger().error(f"Error extending additional files {self.pr_id}: {e}")
|
||||
return self.prediction
|
||||
|
||||
async def _get_prediction(self, model: str, patches_diff: str, prompt="pr_description_prompt") -> str:
|
||||
variables = copy.deepcopy(self.vars)
|
||||
variables["diff"] = self.patches_diff # update diff
|
||||
variables["diff"] = patches_diff # update diff
|
||||
|
||||
environment = Environment(undefined=StrictUndefined)
|
||||
set_custom_labels(variables, self.git_provider)
|
||||
self.variables = variables
|
||||
system_prompt = environment.from_string(get_settings().pr_description_prompt.system).render(variables)
|
||||
user_prompt = environment.from_string(get_settings().pr_description_prompt.user).render(variables)
|
||||
|
||||
system_prompt = environment.from_string(get_settings().get(prompt, {}).get("system", "")).render(variables)
|
||||
user_prompt = environment.from_string(get_settings().get(prompt, {}).get("user", "")).render(variables)
|
||||
|
||||
response, finish_reason = await self.ai_handler.chat_completion(
|
||||
model=model,
|
||||
@ -217,9 +353,6 @@ class PRDescription:
|
||||
if 'pr_files' in self.data:
|
||||
self.data['pr_files'] = self.data.pop('pr_files')
|
||||
|
||||
|
||||
|
||||
|
||||
def _prepare_labels(self) -> List[str]:
|
||||
pr_types = []
|
||||
|
||||
@ -351,7 +484,7 @@ class PRDescription:
|
||||
filename = file['filename'].replace("'", "`").replace('"', '`')
|
||||
changes_summary = file['changes_summary']
|
||||
changes_title = file['changes_title'].strip()
|
||||
label = file.get('label')
|
||||
label = file.get('label').strip().lower()
|
||||
if label not in file_label_dict:
|
||||
file_label_dict[label] = []
|
||||
file_label_dict[label].append((filename, changes_title, changes_summary))
|
||||
@ -392,6 +525,7 @@ class PRDescription:
|
||||
for filename, file_changes_title, file_change_description in list_tuples:
|
||||
filename = filename.replace("'", "`").rstrip()
|
||||
filename_publish = filename.split("/")[-1]
|
||||
|
||||
file_changes_title_code = f"<code>{file_changes_title}</code>"
|
||||
file_changes_title_code_br = insert_br_after_x_chars(file_changes_title_code, x=(delta - 5)).strip()
|
||||
if len(file_changes_title_code_br) < (delta - 5):
|
||||
@ -399,9 +533,9 @@ class PRDescription:
|
||||
filename_publish = f"<strong>{filename_publish}</strong><dd>{file_changes_title_code_br}</dd>"
|
||||
diff_plus_minus = ""
|
||||
delta_nbsp = ""
|
||||
diff_files = self.git_provider.diff_files
|
||||
diff_files = self.git_provider.get_diff_files()
|
||||
for f in diff_files:
|
||||
if f.filename.lower() == filename.lower():
|
||||
if f.filename.lower().strip('/') == filename.lower().strip('/'):
|
||||
num_plus_lines = f.num_plus_lines
|
||||
num_minus_lines = f.num_minus_lines
|
||||
diff_plus_minus += f"+{num_plus_lines}/-{num_minus_lines}"
|
||||
@ -423,6 +557,7 @@ class PRDescription:
|
||||
<hr>
|
||||
|
||||
{filename}
|
||||
|
||||
{file_change_description_br}
|
||||
|
||||
|
||||
@ -431,6 +566,7 @@ class PRDescription:
|
||||
|
||||
</td>
|
||||
<td><a href="{link}">{diff_plus_minus}</a>{delta_nbsp}</td>
|
||||
|
||||
</tr>
|
||||
"""
|
||||
if use_collapsible_file_list:
|
||||
@ -508,6 +644,7 @@ def insert_br_after_x_chars(text, x=70):
|
||||
is_inside_code = False
|
||||
return ''.join(new_text).strip()
|
||||
|
||||
|
||||
def replace_code_tags(text):
|
||||
"""
|
||||
Replace odd instances of ` with <code> and even instances of ` with </code>
|
||||
|
@ -35,7 +35,7 @@ class PRHelpMessage:
|
||||
tool_names.append(f"[ASK]({base_path}/ask/)")
|
||||
tool_names.append(f"[GENERATE CUSTOM LABELS]({base_path}/custom_labels/) 💎")
|
||||
tool_names.append(f"[CI FEEDBACK]({base_path}/ci_feedback/) 💎")
|
||||
tool_names.append(f"[CUSTOM SUGGESTIONS]({base_path}/custom_suggestions/) 💎")
|
||||
tool_names.append(f"[CUSTOM PROMPT]({base_path}/custom_prompt/) 💎")
|
||||
tool_names.append(f"[SIMILAR ISSUE]({base_path}/similar_issues/)")
|
||||
|
||||
descriptions = []
|
||||
@ -50,7 +50,7 @@ class PRHelpMessage:
|
||||
descriptions.append("Answering free-text questions about the PR")
|
||||
descriptions.append("Generates custom labels for the PR, based on specific guidelines defined by the user")
|
||||
descriptions.append("Generates feedback and analysis for a failed CI job")
|
||||
descriptions.append("Generates custom suggestions for improving the PR code, based only on specific guidelines defined by the user")
|
||||
descriptions.append("Generates custom suggestions for improving the PR code, derived only from a specific guidelines prompt defined by the user")
|
||||
descriptions.append("Automatically retrieves and presents similar issues")
|
||||
|
||||
commands =[]
|
||||
@ -65,7 +65,7 @@ class PRHelpMessage:
|
||||
commands.append("`/ask`")
|
||||
commands.append("`/generate_labels`")
|
||||
commands.append("`/checks`")
|
||||
commands.append("`/custom_suggestions`")
|
||||
commands.append("`/custom_prompt`")
|
||||
commands.append("`/similar_issue`")
|
||||
|
||||
checkbox_list = []
|
||||
@ -86,12 +86,12 @@ class PRHelpMessage:
|
||||
checkbox_list.append("[*]")
|
||||
checkbox_list.append("[*]")
|
||||
|
||||
if isinstance(self.git_provider, GithubProvider):
|
||||
if isinstance(self.git_provider, GithubProvider) and not get_settings().config.get('disable_checkboxes', False):
|
||||
pr_comment += f"<table><tr align='left'><th align='left'>Tool</th><th align='left'>Description</th><th align='left'>Trigger Interactively :gem:</th></tr>"
|
||||
for i in range(len(tool_names)):
|
||||
pr_comment += f"\n<tr><td align='left'>\n\n<strong>{tool_names[i]}</strong></td>\n<td>{descriptions[i]}</td>\n<td>\n\n{checkbox_list[i]}\n</td></tr>"
|
||||
pr_comment += "</table>\n\n"
|
||||
pr_comment += f"""\n\n(1) Note that each tool be [triggered automatically](https://github.com/Codium-ai/pr-agent/blob/main/Usage.md#github-app-automatic-tools-for-pr-actions) when a new PR is opened, or called manually by [commenting on a PR](https://github.com/Codium-ai/pr-agent/blob/main/Usage.md#online-usage)."""
|
||||
pr_comment += f"""\n\n(1) Note that each tool be [triggered automatically](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#github-app-automatic-tools-when-a-new-pr-is-opened) when a new PR is opened, or called manually by [commenting on a PR](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#online-usage)."""
|
||||
pr_comment += f"""\n\n(2) Tools marked with [*] require additional parameters to be passed. For example, to invoke the `/ask` tool, you need to comment on a PR: `/ask "<question content>"`. See the relevant documentation for each tool for more details."""
|
||||
else:
|
||||
pr_comment += f"<table><tr align='left'><th align='left'>Tool</th><th align='left'>Command</th><th align='left'>Description</th></tr>"
|
||||
|
@ -7,6 +7,7 @@ from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
|
||||
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
|
||||
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
|
||||
from pr_agent.algo.token_handler import TokenHandler
|
||||
from pr_agent.algo.utils import ModelType
|
||||
from pr_agent.config_loader import get_settings
|
||||
from pr_agent.git_providers import get_git_provider
|
||||
from pr_agent.git_providers.git_provider import get_main_pr_language
|
||||
@ -58,11 +59,11 @@ class PRQuestions:
|
||||
self.git_provider.publish_comment("Preparing answer...", is_temporary=True)
|
||||
|
||||
# identify image
|
||||
img_path = self.idenfity_image_in_comment()
|
||||
img_path = self.identify_image_in_comment()
|
||||
if img_path:
|
||||
get_logger().debug(f"Image path identified", artifact=img_path)
|
||||
|
||||
await retry_with_fallback_models(self._prepare_prediction)
|
||||
await retry_with_fallback_models(self._prepare_prediction, model_type=ModelType.TURBO)
|
||||
|
||||
pr_comment = self._prepare_pr_answer()
|
||||
get_logger().debug(f"PR output", artifact=pr_comment)
|
||||
@ -77,7 +78,7 @@ class PRQuestions:
|
||||
self.git_provider.remove_initial_comment()
|
||||
return ""
|
||||
|
||||
def idenfity_image_in_comment(self):
|
||||
def identify_image_in_comment(self):
|
||||
img_path = ''
|
||||
if '![image]' in self.question_str:
|
||||
# assuming structure:
|
||||
|
@ -8,9 +8,10 @@ from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
|
||||
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
|
||||
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
|
||||
from pr_agent.algo.token_handler import TokenHandler
|
||||
from pr_agent.algo.utils import convert_to_markdown, github_action_output, load_yaml, ModelType
|
||||
from pr_agent.algo.utils import github_action_output, load_yaml, ModelType, \
|
||||
show_relevant_configurations, convert_to_markdown_v2, PRReviewHeader
|
||||
from pr_agent.config_loader import get_settings
|
||||
from pr_agent.git_providers import get_git_provider
|
||||
from pr_agent.git_providers import get_git_provider, get_git_provider_with_context
|
||||
from pr_agent.git_providers.git_provider import IncrementalPR, get_main_pr_language
|
||||
from pr_agent.log import get_logger
|
||||
from pr_agent.servers.help import HelpMessage
|
||||
@ -20,6 +21,7 @@ class PRReviewer:
|
||||
"""
|
||||
The PRReviewer class is responsible for reviewing a pull request and generating feedback using an AI model.
|
||||
"""
|
||||
|
||||
def __init__(self, pr_url: str, is_answer: bool = False, is_auto: bool = False, args: list = None,
|
||||
ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):
|
||||
"""
|
||||
@ -32,10 +34,12 @@ class PRReviewer:
|
||||
ai_handler (BaseAiHandler): The AI handler to be used for the review. Defaults to None.
|
||||
args (list, optional): List of arguments passed to the PRReviewer class. Defaults to None.
|
||||
"""
|
||||
self.git_provider = get_git_provider_with_context(pr_url)
|
||||
self.args = args
|
||||
self.parse_args(args) # -i command
|
||||
self.incremental = self.parse_incremental(args) # -i command
|
||||
if self.incremental and self.incremental.is_incremental:
|
||||
self.git_provider.get_incremental_commits(self.incremental)
|
||||
|
||||
self.git_provider = get_git_provider()(pr_url, incremental=self.incremental)
|
||||
self.main_language = get_main_pr_language(
|
||||
self.git_provider.get_languages(), self.git_provider.get_files()
|
||||
)
|
||||
@ -63,6 +67,7 @@ class PRReviewer:
|
||||
"require_tests": get_settings().pr_reviewer.require_tests_review,
|
||||
"require_estimate_effort_to_review": get_settings().pr_reviewer.require_estimate_effort_to_review,
|
||||
'require_can_be_split_review': get_settings().pr_reviewer.require_can_be_split_review,
|
||||
'require_security_review': get_settings().pr_reviewer.require_security_review,
|
||||
'num_code_suggestions': get_settings().pr_reviewer.num_code_suggestions,
|
||||
'question_str': question_str,
|
||||
'answer_str': answer_str,
|
||||
@ -79,22 +84,14 @@ class PRReviewer:
|
||||
get_settings().pr_review_prompt.user
|
||||
)
|
||||
|
||||
def parse_args(self, args: List[str]) -> None:
|
||||
"""
|
||||
Parse the arguments passed to the PRReviewer class and set the 'incremental' attribute accordingly.
|
||||
|
||||
Args:
|
||||
args: A list of arguments passed to the PRReviewer class.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
def parse_incremental(self, args: List[str]):
|
||||
is_incremental = False
|
||||
if args and len(args) >= 1:
|
||||
arg = args[0]
|
||||
if arg == "-i":
|
||||
is_incremental = True
|
||||
self.incremental = IncrementalPR(is_incremental)
|
||||
incremental = IncrementalPR(is_incremental)
|
||||
return incremental
|
||||
|
||||
async def run(self) -> None:
|
||||
try:
|
||||
@ -121,10 +118,10 @@ class PRReviewer:
|
||||
f"No files were changed since the [previous PR Review]({previous_review_url})")
|
||||
return None
|
||||
|
||||
if get_settings().config.publish_output:
|
||||
if get_settings().config.publish_output and not get_settings().config.get('is_auto_command', False):
|
||||
self.git_provider.publish_comment("Preparing review...", is_temporary=True)
|
||||
|
||||
await retry_with_fallback_models(self._prepare_prediction, model_type=ModelType.TURBO)
|
||||
await retry_with_fallback_models(self._prepare_prediction)
|
||||
if not self.prediction:
|
||||
self.git_provider.remove_initial_comment()
|
||||
return None
|
||||
@ -137,7 +134,7 @@ class PRReviewer:
|
||||
if get_settings().pr_reviewer.persistent_comment and not self.incremental.is_incremental:
|
||||
final_update_message = get_settings().pr_reviewer.final_update_message
|
||||
self.git_provider.publish_persistent_comment(pr_review,
|
||||
initial_header="## PR Review 🔍",
|
||||
initial_header=f"{PRReviewHeader.REGULAR.value} 🔍",
|
||||
update_header=True,
|
||||
final_update_message=final_update_message, )
|
||||
else:
|
||||
@ -189,11 +186,19 @@ class PRReviewer:
|
||||
Prepare the PR review by processing the AI prediction and generating a markdown-formatted text that summarizes
|
||||
the feedback.
|
||||
"""
|
||||
first_key = 'review'
|
||||
last_key = 'security_concerns'
|
||||
data = load_yaml(self.prediction.strip(),
|
||||
keys_fix_yaml=["estimated_effort_to_review_[1-5]:", "security_concerns:", "possible_issues:",
|
||||
"relevant_file:", "relevant_line:", "suggestion:"])
|
||||
keys_fix_yaml=["estimated_effort_to_review_[1-5]:", "security_concerns:", "key_issues_to_review:",
|
||||
"relevant_file:", "relevant_line:", "suggestion:"],
|
||||
first_key=first_key, last_key=last_key)
|
||||
github_action_output(data, 'review')
|
||||
|
||||
# move data['review'] 'key_issues_to_review' key to the end of the dictionary
|
||||
if 'key_issues_to_review' in data['review']:
|
||||
key_issues_to_review = data['review'].pop('key_issues_to_review')
|
||||
data['review']['key_issues_to_review'] = key_issues_to_review
|
||||
|
||||
if 'code_feedback' in data:
|
||||
code_feedback = data['code_feedback']
|
||||
|
||||
@ -221,7 +226,6 @@ class PRReviewer:
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
incremental_review_markdown_text = None
|
||||
# Add incremental review section
|
||||
if self.incremental.is_incremental:
|
||||
@ -229,7 +233,7 @@ class PRReviewer:
|
||||
f"{self.git_provider.incremental.first_new_commit_sha}"
|
||||
incremental_review_markdown_text = f"Starting from commit {last_commit_url}"
|
||||
|
||||
markdown_text = convert_to_markdown(data, self.git_provider.is_supported("gfm_markdown"),
|
||||
markdown_text = convert_to_markdown_v2(data, self.git_provider.is_supported("gfm_markdown"),
|
||||
incremental_review_markdown_text)
|
||||
|
||||
# Add help text if gfm_markdown is supported
|
||||
@ -238,6 +242,10 @@ class PRReviewer:
|
||||
markdown_text += HelpMessage.get_review_usage_guide()
|
||||
markdown_text += "\n</details>\n"
|
||||
|
||||
# Output the relevant configurations if enabled
|
||||
if get_settings().get('config', {}).get('output_relevant_configurations', False):
|
||||
markdown_text += show_relevant_configurations(relevant_section='pr_reviewer')
|
||||
|
||||
# Add custom labels from the review prediction (effort, security)
|
||||
self.set_review_labels(data)
|
||||
|
||||
@ -253,9 +261,12 @@ class PRReviewer:
|
||||
if get_settings().pr_reviewer.num_code_suggestions == 0:
|
||||
return
|
||||
|
||||
first_key = 'review'
|
||||
last_key = 'security_concerns'
|
||||
data = load_yaml(self.prediction.strip(),
|
||||
keys_fix_yaml=["estimated_effort_to_review_[1-5]:", "security_concerns:", "possible_issues:",
|
||||
"relevant_file:", "relevant_line:", "suggestion:"])
|
||||
keys_fix_yaml=["estimated_effort_to_review_[1-5]:", "security_concerns:", "key_issues_to_review:",
|
||||
"relevant_file:", "relevant_line:", "suggestion:"],
|
||||
first_key=first_key, last_key=last_key)
|
||||
comments: List[str] = []
|
||||
for suggestion in data.get('code_feedback', []):
|
||||
relevant_file = suggestion.get('relevant_file', '').strip()
|
||||
@ -370,7 +381,7 @@ class PRReviewer:
|
||||
estimated_effort_number = int(estimated_effort.split(',')[0])
|
||||
if 1 <= estimated_effort_number <= 5: # 1, because ...
|
||||
review_labels.append(f'Review effort [1-5]: {estimated_effort_number}')
|
||||
if get_settings().pr_reviewer.enable_review_labels_security:
|
||||
if get_settings().pr_reviewer.enable_review_labels_security and get_settings().pr_reviewer.require_security_review:
|
||||
security_concerns = data['review']['security_concerns'] # yes, because ...
|
||||
security_concerns_bool = 'yes' in security_concerns.lower() or 'true' in security_concerns.lower()
|
||||
if security_concerns_bool:
|
||||
|
@ -8,7 +8,7 @@ from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
|
||||
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
|
||||
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
|
||||
from pr_agent.algo.token_handler import TokenHandler
|
||||
from pr_agent.algo.utils import ModelType
|
||||
from pr_agent.algo.utils import ModelType, show_relevant_configurations
|
||||
from pr_agent.config_loader import get_settings
|
||||
from pr_agent.git_providers import get_git_provider, GithubProvider
|
||||
from pr_agent.git_providers.git_provider import get_main_pr_language
|
||||
@ -25,7 +25,7 @@ class PRUpdateChangelog:
|
||||
self.git_provider.get_languages(), self.git_provider.get_files()
|
||||
)
|
||||
self.commit_changelog = get_settings().pr_update_changelog.push_changelog_changes
|
||||
self._get_changlog_file() # self.changelog_file_str
|
||||
self._get_changelog_file() # self.changelog_file_str
|
||||
|
||||
self.ai_handler = ai_handler()
|
||||
self.ai_handler.main_pr_language = self.main_language
|
||||
@ -74,6 +74,11 @@ class PRUpdateChangelog:
|
||||
await retry_with_fallback_models(self._prepare_prediction, model_type=ModelType.TURBO)
|
||||
|
||||
new_file_content, answer = self._prepare_changelog_update()
|
||||
|
||||
# Output the relevant configurations if enabled
|
||||
if get_settings().get('config', {}).get('output_relevant_configurations', False):
|
||||
answer += show_relevant_configurations(relevant_section='pr_update_changelog')
|
||||
|
||||
get_logger().debug(f"PR output", artifact=answer)
|
||||
|
||||
if get_settings().config.publish_output:
|
||||
@ -158,7 +163,7 @@ Example:
|
||||
"""
|
||||
return example_changelog
|
||||
|
||||
def _get_changlog_file(self):
|
||||
def _get_changelog_file(self):
|
||||
try:
|
||||
self.changelog_file = self.git_provider.get_pr_file_content(
|
||||
"CHANGELOG.md", self.git_provider.get_pr_branch()
|
||||
|
@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "pr-agent"
|
||||
version = "0.2.1"
|
||||
version = "0.2.2"
|
||||
|
||||
authors = [{name= "CodiumAI", email = "tal.r@codium.ai"}]
|
||||
|
||||
|
@ -1,28 +1,30 @@
|
||||
aiohttp==3.9.1
|
||||
anthropic[vertex]==0.21.3
|
||||
atlassian-python-api==3.41.4
|
||||
azure-devops==7.1.0b3
|
||||
azure-identity==1.15.0
|
||||
boto3==1.33.6
|
||||
dynaconf==3.2.4
|
||||
fastapi==0.99.0
|
||||
fastapi==0.111.0
|
||||
GitPython==3.1.32
|
||||
google-cloud-aiplatform==1.35.0
|
||||
google-cloud-aiplatform==1.38.0
|
||||
google-cloud-storage==2.10.0
|
||||
Jinja2==3.1.2
|
||||
litellm==1.31.10
|
||||
litellm==1.40.17
|
||||
loguru==0.7.2
|
||||
msrest==0.7.1
|
||||
openai==1.13.3
|
||||
openai==1.35.1
|
||||
pytest==7.4.0
|
||||
PyGithub==1.59.*
|
||||
PyYAML==6.0.1
|
||||
python-gitlab==3.15.0
|
||||
retry==0.9.2
|
||||
starlette-context==0.3.6
|
||||
tiktoken==0.5.2
|
||||
tiktoken==0.7.0
|
||||
ujson==5.8.0
|
||||
uvicorn==0.22.0
|
||||
tenacity==8.2.3
|
||||
gunicorn==20.1.0
|
||||
# Uncomment the following lines to enable the 'similar issue' tool
|
||||
# pinecone-client
|
||||
# pinecone-datasets @ git+https://github.com/mrT23/pinecone-datasets.git@main
|
||||
|
@ -15,5 +15,5 @@ class TestClipTokens:
|
||||
|
||||
max_tokens = 10
|
||||
result = clip_tokens(text, max_tokens)
|
||||
expected_results = 'line1\nline2\nline3\nli\n...(truncated)'
|
||||
expected_results = 'line1\nline2\nline3\n\n...(truncated)'
|
||||
assert result == expected_results
|
||||
|
@ -1,5 +1,5 @@
|
||||
# Generated by CodiumAI
|
||||
from pr_agent.algo.utils import convert_to_markdown
|
||||
from pr_agent.algo.utils import PRReviewHeader, convert_to_markdown_v2
|
||||
from pr_agent.tools.pr_description import insert_br_after_x_chars
|
||||
|
||||
"""
|
||||
@ -52,9 +52,10 @@ class TestConvertToMarkdown:
|
||||
'suggestion': "Consider raising an exception or logging a warning when 'pr_url' attribute is not found. This can help in debugging issues related to the absence of 'pr_url' in instances where it's expected. [important]\n",
|
||||
'relevant_line': '[return ""](https://github.com/Codium-ai/pr-agent-pro/pull/102/files#diff-52d45f12b836f77ed1aef86e972e65404634ea4e2a6083fb71a9b0f9bb9e062fR199)'}]}
|
||||
|
||||
expected_output = '## PR Review 🔍\n\n<table>\n<tr>\n<tr><td> ⏱️ <strong>Estimated effort to review [1-5]</strong></td><td>\n\n1, because the changes are minimal and straightforward, focusing on a single functionality addition.\n\n\n</td></tr>\n<tr><td> 🧪 <strong>Relevant tests</strong></td><td>\n\nNo\n\n\n</td></tr>\n<tr><td> ⚡ <strong>Possible issues</strong></td><td>\n\nNo\n\n</td></tr>\n<tr><td> 🔒 <strong>Security concerns</strong></td><td>\n\nNo\n\n</td></tr>\n</table>\n\n\n<details><summary> <strong>Code feedback:</strong></summary>\n\n<hr><table><tr><td>relevant file</td><td>pr_agent/git_providers/git_provider.py\n</td></tr><tr><td>suggestion </td><td>\n\n<strong>\n\nConsider raising an exception or logging a warning when \'pr_url\' attribute is not found. This can help in debugging issues related to the absence of \'pr_url\' in instances where it\'s expected. [important]\n\n</strong>\n</td></tr><tr><td>relevant line</td><td><a href=\'https://github.com/Codium-ai/pr-agent-pro/pull/102/files#diff-52d45f12b836f77ed1aef86e972e65404634ea4e2a6083fb71a9b0f9bb9e062fR199\'>return ""</a></td></tr></table><hr>\n\n</details>'
|
||||
|
||||
assert convert_to_markdown(input_data).strip() == expected_output.strip()
|
||||
expected_output = f'{PRReviewHeader.REGULAR} 🔍\n\n<table>\n<tr><td>⏱️ <strong>Estimated effort to review</strong>: 1 🔵⚪⚪⚪⚪</td></tr>\n<tr><td>🧪 <strong>No relevant tests</strong></td></tr>\n<tr><td>⚡ <strong>Possible issues</strong>: No\n</td></tr>\n<tr><td>🔒 <strong>No security concerns identified</strong></td></tr>\n</table>\n\n\n<details><summary> <strong>Code feedback:</strong></summary>\n\n<hr><table><tr><td>relevant file</td><td>pr_agent/git_providers/git_provider.py\n</td></tr><tr><td>suggestion </td><td>\n\n<strong>\n\nConsider raising an exception or logging a warning when \'pr_url\' attribute is not found. This can help in debugging issues related to the absence of \'pr_url\' in instances where it\'s expected. [important]\n\n</strong>\n</td></tr><tr><td>relevant line</td><td><a href=\'https://github.com/Codium-ai/pr-agent-pro/pull/102/files#diff-52d45f12b836f77ed1aef86e972e65404634ea4e2a6083fb71a9b0f9bb9e062fR199\'>return ""</a></td></tr></table><hr>\n\n</details>'
|
||||
|
||||
assert convert_to_markdown_v2(input_data).strip() == expected_output.strip()
|
||||
|
||||
# Tests that the function works correctly with an empty dictionary input
|
||||
def test_empty_dictionary_input(self):
|
||||
@ -63,7 +64,7 @@ class TestConvertToMarkdown:
|
||||
expected_output = ''
|
||||
|
||||
|
||||
assert convert_to_markdown(input_data).strip() == expected_output.strip()
|
||||
assert convert_to_markdown_v2(input_data).strip() == expected_output.strip()
|
||||
|
||||
def test_dictionary_with_empty_dictionaries(self):
|
||||
input_data = {'review': {}, 'code_feedback': [{}]}
|
||||
@ -71,7 +72,7 @@ class TestConvertToMarkdown:
|
||||
expected_output = ''
|
||||
|
||||
|
||||
assert convert_to_markdown(input_data).strip() == expected_output.strip()
|
||||
assert convert_to_markdown_v2(input_data).strip() == expected_output.strip()
|
||||
|
||||
class TestBR:
|
||||
def test_br1(self):
|
||||
|
@ -42,3 +42,49 @@ age: 35
|
||||
def test_empty_yaml_fixed(self):
|
||||
review_text = ""
|
||||
assert try_fix_yaml(review_text) is None
|
||||
|
||||
|
||||
# The function extracts YAML snippet
|
||||
def test_no_initial_yaml(self):
|
||||
review_text = '''\
|
||||
I suggest the following:
|
||||
|
||||
code_suggestions:
|
||||
- relevant_file: |
|
||||
src/index.ts
|
||||
label: |
|
||||
best practice
|
||||
|
||||
- relevant_file: |
|
||||
src/index2.ts
|
||||
label: |
|
||||
enhancment
|
||||
```
|
||||
|
||||
We can further improve the code by using the `const` keyword instead of `var` in the `src/index.ts` file.
|
||||
'''
|
||||
expected_output = {'code_suggestions': [{'relevant_file': 'src/index.ts\n', 'label': 'best practice\n'}, {'relevant_file': 'src/index2.ts\n', 'label': 'enhancment'}]}
|
||||
|
||||
assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='label') == expected_output
|
||||
|
||||
def test_with_initial_yaml(self):
|
||||
review_text = '''\
|
||||
I suggest the following:
|
||||
|
||||
```
|
||||
code_suggestions:
|
||||
- relevant_file: |
|
||||
src/index.ts
|
||||
label: |
|
||||
best practice
|
||||
|
||||
- relevant_file: |
|
||||
src/index2.ts
|
||||
label: |
|
||||
enhancment
|
||||
```
|
||||
|
||||
We can further improve the code by using the `const` keyword instead of `var` in the `src/index.ts` file.
|
||||
'''
|
||||
expected_output = {'code_suggestions': [{'relevant_file': 'src/index.ts\n', 'label': 'best practice\n'}, {'relevant_file': 'src/index2.ts\n', 'label': 'enhancment'}]}
|
||||
assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='label') == expected_output
|
Reference in New Issue
Block a user