Compare commits

..

234 Commits

Author SHA1 Message Date
5c527eca66 Merge remote-tracking branch 'origin/main' into pg/pip_package 2023-07-28 02:19:04 +03:00
b4ca52c7d8 updated Dockerfile.github_action 2023-07-28 02:18:12 +03:00
a78d741292 updated pyproject.toml 2023-07-28 02:09:01 +03:00
a50e137bba Merge pull request #133 from idavidov/idavidov/github-ratelimit-message
Handling GitHub API Rate Limit Exceeded Exception
2023-07-27 14:22:11 +03:00
92c0522f4d Merge pull request #144 from Codium-ai/tr/readme_update
Update README with 'Why use PR-Agent?' section
2023-07-27 10:43:56 +03:00
6a72df2981 Merge pull request #139 from Codium-ai/tr/changelog
Add feature to update CHANGELOG.md based on PR content
2023-07-27 09:04:48 +03:00
808ca48605 if not self.commit_changelog: 2023-07-27 08:48:39 +03:00
c827cbc0ae final touches 2023-07-27 08:47:26 +03:00
48fcb46d4f Delete CHANGELOG.md 2023-07-27 08:46:14 +03:00
66b94599ec Update CHANGELOG.md 2023-07-27 08:45:33 +03:00
231efb33c1 add CHANGELOG.md 2023-07-27 08:43:29 +03:00
eb798dae6f Why use PR-Agent
Why use PR-Agent
2023-07-27 08:25:05 +03:00
52576c79b3 Update CHANGELOG.md 2023-07-26 20:40:28 +03:00
cce2a79a1f add CHANGELOG.md 2023-07-26 20:40:15 +03:00
413e5f6d77 general 2023-07-26 20:37:38 +03:00
09ca848d4c Merge remote-tracking branch 'origin/tr/changelog' into tr/changelog 2023-07-26 20:33:32 +03:00
801923789b final 2023-07-26 20:33:21 +03:00
cfb696dfd5 Delete CHANGELOG.md 2023-07-26 20:09:18 +03:00
2e7a0a88fa Update CHANGELOG.md 2023-07-26 20:08:29 +03:00
1dbbafc30a add CHANGELOG.md 2023-07-26 20:08:06 +03:00
d8eae7faab Delete CHANGELOG.md 2023-07-26 20:06:23 +03:00
14eceb6e61 PRUpdateChangelog 2023-07-26 20:05:18 +03:00
884317c4f7 stable 2023-07-26 20:03:22 +03:00
c5f4b229b8 Merge pull request #142 from patryk-kowalski-ds/pk/local-git-provider-impvs
Improvements to Local Git Provider
2023-07-26 19:18:35 +03:00
5a2a17ec25 Merge pull request #140 from Codium-ai/tr/enhance_review
Enhancement of PRReviewer class in pr_reviewer.py
2023-07-26 17:32:15 +03:00
1bd47b0d53 enhance pr_reviewer.py code 2023-07-26 17:24:03 +03:00
7531ccd31f stable 2023-07-26 16:29:42 +03:00
3b19827ae2 Add validation for repository path 2023-07-26 15:29:09 +02:00
ea6e1811c1 Fixed PR title - should be feature branch name, not target branch name 2023-07-26 14:15:50 +02:00
bc2cf75b76 Use pyproject.toml to install dependencies instead of requirements.txt. Fix incorrect mangum version 2023-07-26 09:14:24 +02:00
9e1e0766b7 Set python min version to 3.10 2023-07-26 09:13:54 +02:00
ccde68293f Update README.md 2023-07-26 10:09:01 +03:00
99d53af28d Update CHANGELOG.md 2023-07-26 09:50:21 +03:00
5ea607be58 Add package setup 2023-07-26 08:48:12 +02:00
e3846a480e s 2023-07-26 09:21:31 +03:00
a60a58794c Merge pull request #132 from Codium-ai/tr/code_enhancment
Enhancement of GitHub Webhook and Polling Server
2023-07-26 07:24:46 +03:00
8ae5faca53 Fix cyclic dependency 2023-07-25 16:52:18 +03:00
28d6adf62a Quick fix for github action 2023-07-25 16:41:29 +03:00
1229fba346 + settings.github.ratelimit_retries setup in configuration.toml 2023-07-25 16:37:13 +03:00
59a59ebf66 Quick fix for github action 2023-07-25 16:36:58 +03:00
36ab12c486 Merge pull request #136 from Codium-ai/ok/handle_sub_group
Handle subgroup in GitLab merge request URL parsing
2023-07-25 16:15:35 +03:00
0254e3d04a Merge pull request #128 from patryk-kowalski-ds/deepsense.ai/local-git-provider
Add Local Git Provider Support
2023-07-25 16:15:02 +03:00
f6036e936e + settings.github.ratelimit_retries setup in configuration.toml 2023-07-25 15:23:40 +03:00
10a07e497d Handle sub group in gitlab MR URLs 2023-07-25 15:15:51 +03:00
3b334805ee still need GithubException.RateLimitExceededException in pr_processing.py for correct exception catch 2023-07-25 15:14:56 +03:00
b6f6c903a0 moved @retry to github_provider.py and fetch number of retries from settings 2023-07-25 15:12:02 +03:00
55637a5620 added retry decorator similar to used in ai_handler following @okotek suggestion 2023-07-25 14:42:54 +03:00
404cc0a00e small change to show message and fail 2023-07-25 14:20:20 +03:00
0815e2024c - Replaced two dot diff with three dot diff. Cleaned up obsolete code linked to double dot diff.
- Moved target_branch_existence assertion to _prepare_repo method
- Renamed branch_name -> target_branch_name
- Simplified get_files method
2023-07-25 13:07:21 +02:00
41dcb75e8e Merge pull request #134 from Codium-ai/ok/gitlat_use_oauth
Use OAuth token for GitLab API
2023-07-25 14:04:50 +03:00
d23daf880f Change gitlab API to use oauth_token instead of PAT (PAT shuold work as well) 2023-07-25 13:58:48 +03:00
d1a8a610e9 Revert "show how much time until rate limit reset"
This reverts commit 8f482cd41a.
2023-07-25 13:38:55 +03:00
918549a4fc Implementing 'is_supported' method 2023-07-25 12:35:39 +02:00
8f482cd41a show how much time until rate limit reset
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
2023-07-25 13:23:19 +03:00
34096059ff quick and dirty response for github API ratelimit, until some smart solution will be implemented 2023-07-25 13:05:56 +03:00
2dfbfec8c2 refactor 2023-07-24 19:48:24 +03:00
6170995665 replaced hardcoded main with actual target_branch name' 2023-07-24 16:59:07 +02:00
ca42a54bc3 Update pr_agent/git_providers/local_git_provider.py
Co-authored-by: Ori Kotek <orikotek@gmail.com>
2023-07-24 16:47:05 +02:00
c0610afe2a Update pr_agent/git_providers/local_git_provider.py
Co-authored-by: Ori Kotek <orikotek@gmail.com>
2023-07-24 16:46:46 +02:00
d4cbcc465c Update pr_agent/git_providers/local_git_provider.py
Co-authored-by: Ori Kotek <orikotek@gmail.com>
2023-07-24 16:46:36 +02:00
adb3f17258 Merge pull request #131 from Codium-ai/ok/gitlab_webook
GitLab Webhook Integration and Provider Enhancements
2023-07-24 16:01:17 +03:00
2c03a67312 Add labels 2023-07-24 16:00:51 +03:00
55eb741965 Merge pull request #125 from Codium-ai/tr/code_enhancment
Code Enhancement in PR Agent
2023-07-24 15:37:53 +03:00
8e6518f071 Added GitPython to requirements. Changed default review path (aesthetics) 2023-07-24 14:28:37 +02:00
c9c95d60d4 Implement gitlab webhook 2023-07-24 15:05:24 +03:00
02ecaa340f Local Git Provider Implementation 2023-07-24 12:49:57 +02:00
cca809e91c run_action 2023-07-24 12:45:24 +03:00
57ff46ecc1 stable 2023-07-24 12:41:00 +03:00
3819d52eb0 Merge remote-tracking branch 'origin/tr/code_enhancment' into tr/code_enhancment 2023-07-24 12:15:17 +03:00
3072325d2c PRDescription 2023-07-24 12:14:53 +03:00
abca2fdcb7 Merge remote-tracking branch 'origin/main' into tr/code_enhancment 2023-07-24 12:04:54 +03:00
4d84f76948 _get_prediction 2023-07-24 11:31:35 +03:00
dd8f6eb923 Merge pull request #126 from Codium-ai/ok/preserve_labels
Add functionality to preserve existing labels in PRs
2023-07-24 10:22:51 +03:00
b9c25e487a On /describe, preserve the current labels 2023-07-24 10:17:26 +03:00
1bf27c38a7 _prepare_pr_answer 2023-07-24 09:15:45 +03:00
1f987380ed Merge pull request #124 from Xyand/bugfix/mising_model
Bugfix - missing function argument
2023-07-24 07:36:21 +03:00
cd8bbbf889 bugfix 2023-07-24 00:58:21 +03:00
8e5498ee97 Merge pull request #122 from Codium-ai/update-readme-gifs-2
Update README.md
2023-07-23 17:40:26 +03:00
0412d7aca0 Update README.md 2023-07-23 17:38:08 +03:00
1eac3245d9 Merge pull request #121 from Codium-ai/update-gifs
Update GIF URLs in README
2023-07-23 17:33:47 +03:00
cd51bef7f7 Merge pull request #119 from zmeir/zmeir-code_suggestions_single_api_call
Optimize Code Suggestions API Calls
2023-07-23 17:30:37 +03:00
e8aa33fa0b Update README.md 2023-07-23 17:27:26 +03:00
54b021b02c Merge pull request #120 from Codium-ai/ok/remove_gitlab_polling
Temporarily remove gitlab polling server until a rewrite is ready
2023-07-23 17:07:59 +03:00
32151e3d9a Temporarily remove gitlab polling server until a rewrite is ready 2023-07-23 17:04:41 +03:00
32358678e6 Reduce the number of GitHub API calls when pushing code suggestions 2023-07-23 16:59:08 +03:00
42e32664a1 Merge pull request #118 from Codium-ai/ok/fallback_models
Handling exceptions in fallback models
2023-07-23 16:43:30 +03:00
1e97236a15 Add support for fallback models 2023-07-23 16:39:25 +03:00
321f7bce46 Merge pull request #117 from Codium-ai/ok/fallback_models
Implementing Fallback Models for Tokenization
2023-07-23 16:20:10 +03:00
02a1d8dbfc Add support for fallback models 2023-07-23 16:16:36 +03:00
e34f9d8d1c Merge pull request #116 from Codium-ai/fix-describe-gif
Fix describe gif
2023-07-23 14:18:21 +03:00
35dac012bd Update README.md 2023-07-23 14:17:27 +03:00
21ced18f50 Merge pull request #115 from Codium-ai/ok/readme_update
Update Installation Instructions in README
2023-07-23 13:42:06 +03:00
fca78cf395 Merge pull request #114 from Codium-ai/update-readme-gifs
Update README GIFs
2023-07-23 13:33:27 +03:00
d1b91b0ea3 Update INSTALL.md 2023-07-23 13:03:44 +03:00
76e00acbdb Update INSTALL.md 2023-07-23 13:02:32 +03:00
2f83e7738c Update README.md gifs 2023-07-23 12:06:04 +03:00
f4a226b0f7 Merge pull request #113 from Codium-ai/tr/cli_refactor
Refactoring CLI Commands Handling
2023-07-23 08:37:26 +03:00
f5e2838fc3 refactor 2023-07-21 22:12:51 +03:00
bbdfd2c3d4 Merge pull request #108 from patryk-kowalski-ds/deepsense.ai/configurable-language-extensions
Configurable Language Extensions
2023-07-21 21:47:52 +03:00
74572e1768 Update action.yaml 2023-07-20 22:02:08 +03:00
f0a17b863c Merge remote-tracking branch 'origin/main' 2023-07-20 22:00:24 +03:00
86fd84e113 Update action name 2023-07-20 22:00:17 +03:00
d5b9be23d3 Merge pull request #110 from linuxlewis/main
Fix TypeError for GitlabProvider
2023-07-20 19:52:43 +03:00
057bb3932f Merge pull request #109 from Codium-ai/mrT23-patch-1
Update README.md
2023-07-20 19:51:51 +03:00
05f29cc406 Fix TypeError for GitlabProvider 2023-07-20 11:49:42 -05:00
63c4c7e584 Merge pull request #90 from zmeir/zmeir-output_progress
Add Option to Control Comment Publishing Progress
2023-07-20 18:48:20 +03:00
1ea23cab96 Merge pull request #105 from Codium-ai/ok/retry_on_rate_limit_error
Retry on Rate Limit Error
2023-07-20 18:31:04 +03:00
e99f9fd59f Update README.md 2023-07-20 17:36:40 +03:00
fdf6a3e833 Merge pull request #107 from Codium-ai/mrT23-patch-1
Update README.md
2023-07-20 16:49:20 +03:00
79cb94b4c2 Add use_extra_bad_extensions to configuration.toml 2023-07-20 15:41:03 +02:00
9adec7cc10 README update 2023-07-20 16:40:14 +03:00
1f0df47b4d Update README.md 2023-07-20 16:39:28 +03:00
a71a12791b Move hard-coded language configurations to a configuration file.
Load this configuration file in the config loader.
2023-07-20 15:37:42 +02:00
23fa834721 Merge pull request #106 from Codium-ai/ok/readme_update
Update README.md
2023-07-20 15:35:12 +03:00
9f67d07156 README update 2023-07-20 15:32:16 +03:00
6731a7643e README update 2023-07-20 15:31:35 +03:00
f87fdd88ad README update 2023-07-20 15:29:46 +03:00
f825f6b90a README update 2023-07-20 15:29:46 +03:00
f5d5008a24 README update 2023-07-20 15:29:46 +03:00
0b63d4cde5 README update 2023-07-20 15:29:46 +03:00
2e246869d0 Retry on rate limit error on OpenAI calls 2023-07-20 15:02:34 +03:00
2f9546e144 Retry on rate limit error on OpenAI calls 2023-07-20 15:01:12 +03:00
6134c2ff61 Merge remote-tracking branch 'origin/main' 2023-07-20 14:56:24 +03:00
3cfbba74f8 Fix README 2023-07-20 14:56:14 +03:00
050bb60671 Merge pull request #103 from Codium-ai/mrT23-patch-1
Update README.md
2023-07-20 14:39:55 +03:00
12a7e1ce6e Update README.md 2023-07-20 12:09:58 +03:00
cd0438005b Merge pull request #101 from Codium-ai/tr/docstring
Adding Docstrings to Python Files
2023-07-20 11:53:17 +03:00
7c3188ae06 Merge pull request #102 from Codium-ai/hl/question_fix
Refactoring Question Handling in CLI
2023-07-20 11:18:00 +03:00
6cd38a37cd fix question cli 2023-07-20 11:10:34 +03:00
12e51bb6aa Merge remote-tracking branch 'origin/main' into tr/docstring 2023-07-20 10:54:53 +03:00
e2a4cd6b03 docstring 2023-07-20 10:51:21 +03:00
329e228aa2 Merge pull request #100 from Codium-ai/tr/describe_labels
Enhancement of Code Review Functionality
2023-07-19 21:32:28 +03:00
3d5d517f2a code suggestions 2023-07-19 20:57:14 +03:00
a2eb2e4dac Update pr_agent/git_providers/github_provider.py 2023-07-19 20:31:10 +03:00
d89792d379 PR Type label 2023-07-19 20:25:54 +03:00
23ed2553c4 Update README.md 2023-07-19 18:42:21 +03:00
fe29ce2911 Update README.md 2023-07-19 18:33:38 +03:00
df25a3ede2 typo 2023-07-19 18:22:26 +03:00
4c36fb4df2 args bug 2023-07-19 18:18:18 +03:00
67c61e0ac8 Update help 2023-07-19 17:27:12 +03:00
0985db4e36 Update help 2023-07-19 17:20:26 +03:00
ee2c00abeb split(" ") -> split() 2023-07-19 17:14:55 +03:00
577f24d107 Merge pull request #89 from zmeir/zmeir-review_score
Add Score Review Feature
2023-07-19 17:07:05 +03:00
fc24b34c2b Merge pull request #96 from Codium-ai/ok/update_readme
Update INSTALL.md with additional information on handling secrets
2023-07-19 17:05:12 +03:00
1e962476da Merge pull request #94 from Codium-ai/hl/incremental_review
Add Incremental Review /review -i
2023-07-19 17:02:38 +03:00
3326327572 More refactoring.... 2023-07-19 17:01:56 +03:00
36be79ea38 ignore merge from main 2023-07-19 16:14:59 +03:00
523839be7d Merge commit 'd1586ddd77b86f0d3b29aee3370f29624799e388' into hl/incremental_review 2023-07-19 16:14:33 +03:00
d1586ddd77 Merge pull request #97 from Codium-ai/mrT23-patch-1
Update utils.py
2023-07-19 15:32:41 +03:00
3420853923 Merge pull request #98 from Codium-ai/hl/update_readme
Update README.md
2023-07-19 15:32:06 +03:00
1f373d7b0a Update README.md 2023-07-19 15:31:29 +03:00
7fdbd6a680 Update utils.py 2023-07-19 15:12:50 +03:00
17b40a1fa1 Merge commit '7abbe08ff15d31c7e20de6d88638bfe27430c2f4' into hl/incremental_review 2023-07-19 14:24:44 +03:00
c47e74c5c7 Merge commit '5bc2ef1eff1f570779191a8f3c7a562f5b8fe230' into hl/incremental_review 2023-07-19 14:24:01 +03:00
7abbe08ff1 Merge pull request #95 from Codium-ai/tr/disable_review_suggestiosn
disable code suggestions by default for review
2023-07-19 14:23:54 +03:00
8038b6ab99 refactor and clean 2023-07-19 14:22:34 +03:00
6e26ad0966 Merge branch 'main' into zmeir-review_score 2023-07-19 13:37:55 +03:00
7e2449b228 Changed score type to int 2023-07-19 13:37:35 +03:00
97bfee47a3 minor fixes 2023-07-19 11:34:55 +03:00
3b27c834a4 Merge remote-tracking branch 'origin/main' into ok/update_readme 2023-07-19 11:14:44 +03:00
5bc2ef1eff Merge pull request #92 from YuviGold/deploy-on-lambda
Deployment on AWS Lambda
2023-07-19 11:12:29 +03:00
2f558006bf Update INSTALL.md, add notes about injecting secrets 2023-07-19 11:09:35 +03:00
8868c92141 Merge branch 'main' into zmeir-review_score 2023-07-19 11:05:24 +03:00
370520df51 Update docker/Dockerfile.lambda
have a fixed mangum version

Co-authored-by: Ori Kotek <orikotek@gmail.com>
2023-07-19 11:05:24 +03:00
e17dd66dce Disable score review by default 2023-07-19 11:00:28 +03:00
fc8494d696 Rephrase score description in promt 2023-07-19 10:59:52 +03:00
f8aea909b4 Add example output 2023-07-19 10:57:35 +03:00
2e832b8fb4 Merge pull request #86 from Codium-ai/GadiZimerman-patch-1
Update README.md
2023-07-19 10:51:31 +03:00
ccddbeccad num_code_suggestions=0 for review 2023-07-19 09:34:17 +03:00
a47fa342cb Merge pull request #88 from zmeir/zmeir-cli_args
CLI Arguments Refactoring
2023-07-19 08:15:19 +03:00
f73cddcb93 Change Review title when 2023-07-19 01:03:47 +03:00
5f36f0d753 Merge commit 'bdf7eff7cd0a8894c3e66e49bdf89f27da1bfcb4' into hl/incremental_review 2023-07-18 23:28:43 +03:00
dc4bf13d39 Add Incremental Review 2023-07-18 23:14:47 +03:00
bdf7eff7cd Merge pull request #87 from Codium-ai/tr/bug_fix
Add Insights from User's Answers and Fix User Answers Fetching
2023-07-18 18:20:15 +03:00
dc67e6a66e Support deploying pr-agent on AWS Lambda 2023-07-18 17:46:42 +03:00
6d91f44634 Added configuration option to control publishing review progress
This can be useful in a few situations:
1. To reduce the number of GitHub API calls (thus avoiding hitting the rate limit)
2. When the trigger for the agent is an external process (e.g. some external CI job), so there is no need to publish a message like "preparing review..." because it's not a part of a natual conversation with the user
2023-07-18 16:41:58 +03:00
0396e10706 Add configuration to request a score for the PR
This can help teams compare the review of the PR agent with that of a human reviewer, and fine-tune a score threshold for automatic approval where they decide the agent's review is satisfactory.
2023-07-18 16:40:35 +03:00
77f243b7ab Allow passing CLI args (helps with debugging) 2023-07-18 16:39:46 +03:00
c507785475 bugfix 2023-07-18 16:32:51 +03:00
5c5015b267 Update README.md 2023-07-18 14:45:15 +03:00
3efe08d619 Merge pull request #85 from Codium-ai/hl/always_filer_bad_extensions
Filter out bad files before getting their head and original source code and diff
2023-07-18 13:50:25 +03:00
2e36fce4eb Merge pull request #83 from Codium-ai/hl/gitlab_description
Support describe for Gitlab
2023-07-18 13:47:32 +03:00
d6d4427545 Merge pull request #84 from Codium-ai/GadiZimerman-patch-1
Update README.md
2023-07-18 13:37:43 +03:00
5d45632247 Performance improvement: Filter out bad files before getting their head and original source code and diff 2023-07-18 13:33:32 +03:00
90c045e3d0 Update README.md
changing image
2023-07-18 13:26:19 +03:00
7f0a96d8f7 readme 2023-07-18 13:17:30 +03:00
8fb9affef3 add try catch 2023-07-18 13:14:01 +03:00
6c42a471e1 Merge pull request #76 from zmeir/zmeir-publish_inline_comments_single_api_call
Optimization of Inline Comments Publishing
2023-07-18 13:05:11 +03:00
f2b74b6970 support gitlab describe function 2023-07-18 13:03:36 +03:00
ffd11aeffc Merge pull request #81 from Codium-ai/GadiZimerman-patch-1
Update README.md
2023-07-18 12:55:26 +03:00
05e4e09dfc Lint 2023-07-18 12:27:28 +03:00
13092118dc Move the new git provider function to the abstract interface 2023-07-18 12:26:49 +03:00
7d108992fc Merge remote-tracking branch 'origin/main' into zmeir-publish_inline_comments_single_api_call 2023-07-18 11:53:41 +03:00
e5a8ed205e Merge pull request #82 from Codium-ai/ok/lint
Linting and Code Cleanup
2023-07-18 11:40:43 +03:00
90f97b0226 Lint fixes 2023-07-18 11:34:57 +03:00
9e0f5f0ccc Merge pull request #78 from Codium-ai/tr/agent_logic
Enhancement of PR Agent with User Interaction
2023-07-18 10:37:08 +03:00
87ea0176b9 Update README.md 2023-07-18 10:36:05 +03:00
62f08f4ec4 removed an unneeded file 2023-07-18 10:35:05 +03:00
fe0058f25f Merge branch 'tr/agent_logic' of github.com:Codium-ai/pr-agent into tr/agent_logic 2023-07-18 10:34:40 +03:00
6d2673f39d Merge remote-tracking branch 'origin/tr/agent_logic' into tr/agent_logic 2023-07-18 10:32:43 +03:00
b3a1d456b2 if settings.pr_reviewer.num_code_suggestions 2023-07-18 10:32:36 +03:00
f77a5f6929 Call PRAgent from github_action_runner.py 2023-07-18 10:31:24 +03:00
fdeae9c209 Update pr_agent/agent/pr_agent.py 2023-07-18 10:20:52 +03:00
a994ec1427 Call PRAgent from github_action_runner.py 2023-07-18 10:19:32 +03:00
e5259e2f5c Small refactor 2023-07-18 10:17:09 +03:00
978348240b Update README.md 2023-07-18 09:59:47 +03:00
4d92e7d9c2 Update README.md
consider changing section headers to reflect commands format
2023-07-18 09:56:40 +03:00
6f1b418b25 Merge pull request #79 from patryk-kowalski-ds/deepsense.ai/gitlab-provider-file-creation-handling
Fixes 404 error on gitlab file provider happening in case a MR introduced a new file.
2023-07-18 08:27:59 +03:00
51e08c3c2b reflect and review + protections 2023-07-18 08:22:25 +03:00
4c29ff2db1 Merge remote-tracking branch 'origin/tr/agent_logic' into tr/agent_logic
# Conflicts:
#	pr_agent/tools/pr_description.py
2023-07-18 08:06:47 +03:00
5fbaa4366f publish_output instead publish_review 2023-07-18 08:05:42 +03:00
aee08ebbfe Merge branch 'main' into tr/agent_logic 2023-07-18 08:04:47 +03:00
6ad8df6be7 Merge pull request #80 from Codium-ai/ok/remove_pics
Remove most pics from repo
2023-07-17 23:51:24 +03:00
539edcad3c works 2023-07-17 16:53:38 +03:00
b7172df700 Remove most pics from repo 2023-07-17 16:52:23 +03:00
768bd40ad8 Remove most pics from repo 2023-07-17 16:50:27 +03:00
ea27c63f13 Insights from user's answers 2023-07-17 15:59:57 +03:00
c866288b0a Merge remote-tracking branch 'origin/main' into tr/agent_logic 2023-07-17 15:59:37 +03:00
8ae3c60670 In case of new file creation by the MR there is a 404 error on file retrieval by gitlab provider.
It was handled by catching the error and replacing the file string with an empty string.
Type checking was added before byte decoding - necessary in case of the empty string.
2023-07-17 14:53:23 +02:00
f8f415eb75 stable 2023-07-17 15:49:29 +03:00
24583b05f7 Publish GitHub review comments with single API call 2023-07-17 10:41:02 +03:00
fa421fd169 Merge pull request #75 from Codium-ai/bugfix/rename_get_description
get_description was removed
2023-07-17 10:32:01 +03:00
e0ae5c945e get_description was removed 2023-07-17 10:30:44 +03:00
865888e4e8 Merge pull request #74 from Codium-ai/update-gifs
Update GIFs
2023-07-17 09:35:06 +03:00
3b7cfe7bc5 Merge pull request #73 from Codium-ai/hl/clean_comments
Clean comments
2023-07-17 09:33:49 +03:00
262f9dddbc Merge pull request #72 from Codium-ai/tr/minor_fixes
Minor fixes
2023-07-17 09:33:18 +03:00
fa706b6e96 update gifs 2023-07-17 09:30:45 +03:00
ff51ab0946 Add files via upload 2023-07-17 09:27:41 +03:00
7884aa2348 Clean 2023-07-17 09:25:38 +03:00
8f3520807c minor fixes
minor fixes
2023-07-17 08:42:18 +03:00
fa90b242e3 pr_information_from_user_prompts 2023-07-17 08:09:56 +03:00
2dfd34bd61 Merge pull request #71 from Codium-ai/Minor-spelling-fix
Minor Spelling Fix
2023-07-17 08:08:45 +03:00
48f569bef0 Update README.md 2023-07-17 02:39:58 +03:00
a20fb9cc0c Merge pull request #70 from Codium-ai/hl/gitlab_code_suggestion
GitLab Code Suggestions Integration
2023-07-17 02:11:30 +03:00
67 changed files with 2414 additions and 624 deletions

View File

@ -1,2 +1,3 @@
venv/
pr_agent/settings/.secrets.toml
pr_agent/settings/.secrets.toml
pics/

4
.gitignore vendored
View File

@ -1,4 +1,6 @@
.idea/
venv/
pr_agent/settings/.secrets.toml
__pycache__
__pycache__
dist/
*.egg-info/

6
CHANGELOG.md Normal file
View File

@ -0,0 +1,6 @@
## 2023-07-26
### Added
- New feature for updating the CHANGELOG.md based on the contents of a PR.
- Added support for this feature for the Github provider.
- New configuration settings and prompts for the changelog update feature.

View File

@ -9,6 +9,7 @@ You can select your git_provider with the flag `git_provider` in the `config` se
You can enable/disable the different PR Reviewer abilities with the following flags (`pr_reviewer` section):
```
require_focused_review=true
require_score_review=true
require_tests_review=true
require_security_review=true
```

View File

@ -1,8 +1,8 @@
FROM python:3.10 as base
WORKDIR /app
ADD requirements.txt .
RUN pip install -r requirements.txt && rm requirements.txt
ADD pyproject.toml .
RUN pip install . && rm pyproject.toml
ENV PYTHONPATH=/app
ADD pr_agent pr_agent
ADD github_action/entrypoint.sh /

View File

@ -95,9 +95,10 @@ cp pr_agent/settings/.secrets_template.toml pr_agent/settings/.secrets.toml
# Edit .secrets.toml file
```
4. Run the appropriate Python scripts from the scripts folder:
4. Add the pr_agent folder to your PYTHONPATH, then run the cli.py script:
```
export PYTHONPATH=[$PYTHONPATH:]<PATH to pr_agent folder>
python pr_agent/cli.py --pr_url <pr_url> review
python pr_agent/cli.py --pr_url <pr_url> ask <your question>
python pr_agent/cli.py --pr_url <pr_url> describe
@ -149,16 +150,35 @@ git clone https://github.com/Codium-ai/pr-agent.git
```
5. Copy the secrets template file and fill in the following:
```
cp pr_agent/settings/.secrets_template.toml pr_agent/settings/.secrets.toml
# Edit .secrets.toml file
```
- Your OpenAI key.
- Set deployment_type to 'app'
- Copy your app's private key to the private_key field.
- Copy your app's ID to the app_id field.
- Copy your app's webhook secret to the webhook_secret field.
- Set deployment_type to 'app' in [configuration.toml](./pr_agent/settings/configuration.toml)
> The .secrets.toml file is not copied to the Docker image by default, and is only used for local development.
> If you want to use the .secrets.toml file in your Docker image, you can add remove it from the .dockerignore file.
> In most production environments, you would inject the secrets file as environment variables or as mounted volumes.
> For example, in order to inject a secrets file as a volume in a Kubernetes environment you can update your pod spec to include the following,
> assuming you have a secret named `pr-agent-settings` with a key named `.secrets.toml`:
```
volumes:
- name: settings-volume
secret:
secretName: pr-agent-settings
// ...
containers:
// ...
volumeMounts:
- mountPath: /app/pr_agent/settings_prod
name: settings-volume
```
cp pr_agent/settings/.secrets_template.toml pr_agent/settings/.secrets.toml
# Edit .secrets.toml file
```
> Another option is to set the secrets as environment variables in your deployment environment, for example `OPENAI.KEY` and `GITHUB.USER_TOKEN`.
6. Build a Docker image for the app and optionally push it to a Docker repository. We'll use Dockerhub as an example:
@ -169,6 +189,7 @@ docker push codiumai/pr-agent:github_app # Push to your Docker repository
7. Host the app using a server, serverless function, or container environment. Alternatively, for development and
debugging, you may use tools like smee.io to forward webhooks to your local machine.
You can check [Deploy as a Lambda Function](#deploy-as-a-lambda-function)
8. Go back to your app's settings, and set the following:
@ -178,3 +199,20 @@ docker push codiumai/pr-agent:github_app # Push to your Docker repository
9. Install the app by navigating to the "Install App" tab and selecting your desired repositories.
---
#### Deploy as a Lambda Function
1. Follow steps 1-5 of [Method 5](#method-5-run-as-a-github-app).
2. Build a docker image that can be used as a lambda function
```shell
docker buildx build --platform=linux/amd64 . -t codiumai/pr-agent:serverless -f docker/Dockerfile.lambda
```
3. Push image to ECR
```shell
docker tag codiumai/pr-agent:serverless <AWS_ACCOUNT>.dkr.ecr.<AWS_REGION>.amazonaws.com/codiumai/pr-agent:serverless
docker push <AWS_ACCOUNT>.dkr.ecr.<AWS_REGION>.amazonaws.com/codiumai/pr-agent:serverless
```
4. Create a lambda function that uses the uploaded image. Set the lambda timeout to be at least 3m.
5. Configure the lambda function to have a Function URL.
6. Go back to steps 8-9 of [Method 5](#method-5-run-as-a-github-app) with the function url as your Webhook URL.
The Webhook URL would look like `https://<LAMBDA_FUNCTION_URL>/api/v1/github_webhooks`

View File

@ -39,4 +39,4 @@ We use [tiktoken](https://github.com/openai/tiktoken) to tokenize the patches af
4. If we haven't reached the max token length, add the `deleted files` to the prompt until the prompt reaches the max token length (hard stop), skip the rest of the patches.
### Example
![](./pics/git_patch_logic.png)
![](https://codium.ai/images/git_patch_logic.png)

139
README.md
View File

@ -2,21 +2,24 @@
<div align="center">
<img src="./pics/logo-dark.png#gh-dark-mode-only" width="250"/>
<img src="./pics/logo-light.png#gh-light-mode-only" width="250"/>
<img src="./pics/logo-dark.png#gh-dark-mode-only" width="330"/>
<img src="./pics/logo-light.png#gh-light-mode-only" width="330"/><br/>
Making pull requests less painful with an AI agent
</div>
[![GitHub license](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://github.com/Codium-ai/pr-agent/blob/main/LICENSE)
[![Discord](https://badgen.net/badge/icon/discord?icon=discord&label&color=purple)](https://discord.com/channels/1057273017547378788/1126104260430528613)
<a href="https://github.com/Codium-ai/pr-agent/commits/main">
<img alt="GitHub" src="https://img.shields.io/github/last-commit/Codium-ai/pr-agent/main?style=for-the-badge" height="20">
</a>
</div>
<div style="text-align:left;">
CodiumAI `PR-Agent` is an open-source tool aiming to help developers review PRs faster and more efficiently. It automatically analyzes the PR and can provide several types of feedback:
CodiumAI `PR-Agent` is an open-source tool aiming to help developers review pull requests faster and more efficiently. It automatically analyzes the pull request and can provide several types of feedback:
**Auto-Description**: Automatically generating PR description - name, type, summary, and code walkthrough.
**Auto-Description**: Automatically generating PR description - title, type, summary, code walkthrough and PR labels.
\
**PR Review**: Feedback about the PR main theme, type, relevant tests, security issues, focused PR, and various suggestions for the PR content.
**PR Review**: Adjustable feedback about the PR main theme, type, relevant tests, security issues, focus, score, and various suggestions for the PR content.
\
**Question Answering**: Answering free-text questions about the PR.
\
@ -24,125 +27,151 @@ CodiumAI `PR-Agent` is an open-source tool aiming to help developers review PRs
<h3>Example results:</h2>
</div>
<h4>Describe:</h4>
<h4>/describe:</h4>
<div align="center">
<p float="center">
<img src="./pics/describe.gif" width="800">
<img src="https://www.codium.ai/images/describe-2.gif" width="800">
</p>
</div>
<h4>Review:</h4>
<h4>/review:</h4>
<div align="center">
<p float="center">
<img src="./pics/review.gif" width="800">
<img src="https://www.codium.ai/images/review-2.gif" width="800">
</p>
</div>
<h4>Ask:</h4>
<h4>/reflect_and_review:</h4>
<div align="center">
<p float="center">
<img src="./pics/ask.gif" width="800">
<img src="https://www.codium.ai/images/reflect_and_review.gif" width="800">
</p>
</div>
<h4>Improve:</h4>
<h4>/ask:</h4>
<div align="center">
<p float="center">
<img src="./pics/improve.gif" width="800">
<img src="https://www.codium.ai/images/ask-2.gif" width="800">
</p>
</div>
<h4>/improve:</h4>
<div align="center">
<p float="center">
<img src="https://www.codium.ai/images/improve-2.gif" width="800">
</p>
</div>
<div align="left">
- [Live demo](#live-demo)
- [Overview](#overview)
- [Quickstart](#quickstart)
- [Try it now](#try-it-now)
- [Installation](#installation)
- [Usage and tools](#usage-and-tools)
- [Configuration](./CONFIGURATION.md)
- [How it works](#how-it-works)
- [Why use PR-Agent](#why-use-pr-agent)
- [Roadmap](#roadmap)
- [Similar projects](#similar-projects)
</div>
## Live demo
Experience GPT-4 powered PR review on your public GitHub repository with our hosted PR-Agent. To try it, just mention `@CodiumAI-Agent` and add the desired command in any PR comment! The agent will generate a response based on your command.
![Review generation process](./pics/demo.gif)
To set up your own PR-Agent, see the [Quickstart](#Quickstart) section
---
## Overview
`PR-Agent` offers extensive pull request functionalities across various git providers:
| | | Github | Gitlab | Bitbucket |
|-------|---------------------------------------------|--------|--------|-----------|
| TOOLS | Review | | | |
| | ⮑ Inline review | | | |
| | Ask | | | |
| | Auto-Description | | | |
| | Improve Code | | | |
| | | GitHub | Gitlab | Bitbucket |
|-------|---------------------------------------------|:------:|:------:|:---------:|
| TOOLS | Review | :white_check_mark: | :white_check_mark: | :white_check_mark: |
| | ⮑ Inline review | :white_check_mark: | :white_check_mark: | |
| | Ask | :white_check_mark: | :white_check_mark: | |
| | Auto-Description | :white_check_mark: | :white_check_mark: | |
| | Improve Code | :white_check_mark: | :white_check_mark: | |
| | Reflect and Review | :white_check_mark: | | |
| | Update CHANGELOG.md | :white_check_mark: | | |
| | | | | |
| USAGE | CLI | | | |
| | Tagging bot | | | |
| | Actions | | | |
| USAGE | CLI | :white_check_mark: | :white_check_mark: | :white_check_mark: |
| | App / webhook | :white_check_mark: | :white_check_mark: | |
| | Tagging bot | :white_check_mark: | | |
| | Actions | :white_check_mark: | | |
| | | | | |
| CORE | PR compression | | | |
| | Repo language prioritization | | | |
| | Adaptive and token-aware<br />file patch fitting | | | |
| CORE | PR compression | :white_check_mark: | :white_check_mark: | :white_check_mark: |
| | Repo language prioritization | :white_check_mark: | :white_check_mark: | :white_check_mark: |
| | Adaptive and token-aware<br />file patch fitting | :white_check_mark: | :white_check_mark: | :white_check_mark: |
| | Incremental PR Review | :white_check_mark: | | |
Examples for invoking the different tools via the [CLI](#quickstart):
Examples for invoking the different tools via the CLI:
- **Review**: python cli.py --pr-url=<pr_url> review
- **Describe**: python cli.py --pr-url=<pr_url> describe
- **Improve**: python cli.py --pr-url=<pr_url> improve
- **Ask**: python cli.py --pr-url=<pr_url> ask "Write me a poem about this PR"
- **Reflect**: python cli.py --pr-url=<pr_url> reflect
- **Update changelog**: python cli.py --pr-url=<pr_url> update_changelog
"<pr_url>" is the url of the relevant PR (for example: https://github.com/Codium-ai/pr-agent/pull/50).
In the [configuration](./CONFIGURATION.md) file you can select your git provider (Github, Gitlab, Bitbucket), and further configure the different tools.
In the [configuration](./CONFIGURATION.md) file you can select your git provider (GitHub, Gitlab, Bitbucket), and further configure the different tools.
## Quickstart
## Try it now
Try GPT-4 powered PR-Agent on your public GitHub repository for free. Just mention `@CodiumAI-Agent` and add the desired command in any PR comment! The agent will generate a response based on your command.
![Review generation process](https://www.codium.ai/images/demo-2.gif)
To set up your own PR-Agent, see the [Installation](#installation) section
---
## Installation
To get started with PR-Agent quickly, you first need to acquire two tokens:
1. An OpenAI key from [here](https://platform.openai.com/), with access to GPT-4.
2. A GitHub personal access token (classic) with the repo scope.
There are several ways to use PR-Agent. Let's start with the simplest one:
## Install
Here are several ways to install and run PR-Agent:
There are several ways to use PR-Agent:
- [Method 1: Use Docker image (no installation required)](INSTALL.md#method-1-use-docker-image-no-installation-required)
- [Method 2: Run as a Github Action](INSTALL.md#method-2-run-as-a-github-action)
- [Method 2: Run as a GitHub Action](INSTALL.md#method-2-run-as-a-github-action)
- [Method 3: Run from source](INSTALL.md#method-3-run-from-source)
- [Method 4: Run as a polling server](INSTALL.md#method-4-run-as-a-polling-server)
- Request reviews by tagging your Github user on a PR
- [Method 5: Run as a Github App](INSTALL.md#method-5-run-as-a-github-app)
- Request reviews by tagging your GitHub user on a PR
- [Method 5: Run as a GitHub App](INSTALL.md#method-5-run-as-a-github-app)
- Allowing you to automate the review process on your private or public repositories
## Usage and Tools
**PR-Agent** provides four types of interactions ("tools"): `"PR Reviewer"`, `"PR Q&A"`, `"PR Description"` and `"PR Code Sueggestions"`.
**PR-Agent** provides five types of interactions ("tools"): `"PR Reviewer"`, `"PR Q&A"`, `"PR Description"`, `"PR Code Sueggestions"` and `"PR Reflect and Review"`.
- The "PR Reviewer" tool automatically analyzes PRs, and provides different types of feedback.
- The "PR Ask" tool answers free-text questions about the PR.
- The "PR Reviewer" tool automatically analyzes PRs, and provides various types of feedback.
- The "PR Q&A" tool answers free-text questions about the PR.
- The "PR Description" tool automatically sets the PR Title and body.
- The "PR Code Suggestion" tool provide inline code suggestions for the PR that can be applied and committed.
- The "PR Reflect and Review" tool initiates a dialog with the user, asks them to reflect on the PR, and then provides a more focused review.
## How it works
![PR-Agent Tools](./pics/pr_agent_overview.png)
![PR-Agent Tools](https://www.codium.ai/wp-content/uploads/2023/07/codiumai-diagram-v4.jpg)
Check out the [PR Compression strategy](./PR_COMPRESSION.md) page for more details on how we convert a code diff to a manageable LLM prompt
## Why use PR-Agent?
A reasonable question that can be asked is: `"Why use PR-Agent? What make it stand out from existing tools?"`
Here are some of the reasons why:
- We emphasize **real-life practical usage**. Each tool (review, improve, ask, ...) has a single GPT-4 call, no more. We feel that this is critical for realistic team usage - obtaining an answer quickly (~30 seconds) and affordably.
- Our [PR Compression strategy](./PR_COMPRESSION.md) is a core ability that enables to effectively tackle both short and long PRs.
- Our JSON prompting strategy enables to have **modular, customizable tools**. For example, the '/review' tool categories can be controlled via the configuration file. Adding additional categories is easy and accessible.
- We support **multiple git providers** (GitHub, Gitlab, Bitbucket), and multiple ways to use the tool (CLI, GitHub Action, Docker, ...).
- We are open-source, and welcome contributions from the community.
## Roadmap
- [ ] Support open-source models, as a replacement for openai models. (Note - a minimal requirement for each open-source model is to have 8k+ context, and good support for generating json as an output)
- [ ] Support open-source models, as a replacement for OpenAI models. (Note - a minimal requirement for each open-source model is to have 8k+ context, and good support for generating JSON as an output)
- [x] Support other Git providers, such as Gitlab and Bitbucket.
- [ ] Develop additional logics for handling large PRs, and compressing git patches
- [ ] Dedicated tools and sub-tools for specific programming languages (Python, Javascript, Java, C++, etc)
- [ ] Develop additional logic for handling large PRs, and compressing git patches
- [ ] Add additional context to the prompt. For example, repo (or relevant files) summarization, with tools such a [ctags](https://github.com/universal-ctags/ctags)
- [ ] Adding more tools. Possible directions:
- [x] PR description
- [x] Inline code suggestions
- [x] Reflect and review
- [ ] Enforcing CONTRIBUTING.md guidelines
- [ ] Performance (are there any performance issues)
- [ ] Documentation (is the PR properly documented)

View File

@ -1,5 +1,8 @@
name: 'PR Agent'
name: 'Codium PR Agent'
description: 'Summarize, review and suggest improvements for pull requests'
branding:
icon: 'award'
color: 'green'
runs:
using: 'docker'
image: 'Dockerfile.github_action_dockerhub'

View File

@ -1,8 +1,8 @@
FROM python:3.10 as base
WORKDIR /app
ADD requirements.txt .
RUN pip install -r requirements.txt && rm requirements.txt
ADD pyproject.toml .
RUN pip install . && rm pyproject.toml
ENV PYTHONPATH=/app
ADD pr_agent pr_agent

12
docker/Dockerfile.lambda Normal file
View File

@ -0,0 +1,12 @@
FROM public.ecr.aws/lambda/python:3.10
RUN yum update -y && \
yum install -y gcc python3-devel && \
yum clean all
ADD pyproject.toml .
RUN pip install . && rm pyproject.toml
RUN pip install mangum==0.17.0
COPY pr_agent/ ${LAMBDA_TASK_ROOT}/pr_agent/
CMD ["pr_agent.servers.serverless.serverless"]

BIN
pics/.DS_Store vendored

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.8 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 13 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 13 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 346 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 9.3 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 20 KiB

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 21 KiB

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 260 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.4 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 316 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 335 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 193 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 161 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 185 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 162 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 20 MiB

View File

@ -1,9 +1,12 @@
import re
from pr_agent.config_loader import settings
from pr_agent.tools.pr_code_suggestions import PRCodeSuggestions
from pr_agent.tools.pr_description import PRDescription
from pr_agent.tools.pr_information_from_user import PRInformationFromUser
from pr_agent.tools.pr_questions import PRQuestions
from pr_agent.tools.pr_reviewer import PRReviewer
from pr_agent.tools.pr_update_changelog import PRUpdateChangelog
class PRAgent:
@ -11,18 +14,22 @@ class PRAgent:
pass
async def handle_request(self, pr_url, request) -> bool:
if any(cmd in request for cmd in ["/review", "/review_pr"]):
await PRReviewer(pr_url).review()
elif any(cmd in request for cmd in ["/describe", "/describe_pr"]):
action, *args = request.strip().split()
if any(cmd == action for cmd in ["/answer"]):
await PRReviewer(pr_url, is_answer=True).review()
elif any(cmd == action for cmd in ["/review", "/review_pr", "/reflect_and_review"]):
if settings.pr_reviewer.ask_and_reflect or "/reflect_and_review" in request:
await PRInformationFromUser(pr_url).generate_questions()
else:
await PRReviewer(pr_url, args=args).review()
elif any(cmd == action for cmd in ["/describe", "/describe_pr"]):
await PRDescription(pr_url).describe()
elif any(cmd in request for cmd in ["/improve", "/improve_code"]):
elif any(cmd == action for cmd in ["/improve", "/improve_code"]):
await PRCodeSuggestions(pr_url).suggest()
elif any(cmd in request for cmd in ["/ask", "/ask_question"]):
pattern = r'(/ask|/ask_question)\s*(.*)'
matches = re.findall(pattern, request, re.IGNORECASE)
if matches:
question = matches[0][1]
await PRQuestions(pr_url, question).answer()
elif any(cmd == action for cmd in ["/ask", "/ask_question"]):
await PRQuestions(pr_url, args=args).answer()
elif any(cmd == action for cmd in ["/update_changelog"]):
await PRUpdateChangelog(pr_url, args=args).update_changelog()
else:
return False

View File

@ -1,15 +1,25 @@
import logging
import openai
from openai.error import APIError, Timeout, TryAgain
from openai.error import APIError, Timeout, TryAgain, RateLimitError
from retry import retry
from pr_agent.config_loader import settings
OPENAI_RETRIES=2
OPENAI_RETRIES=5
class AiHandler:
"""
This class handles interactions with the OpenAI API for chat completions.
It initializes the API key and other settings from a configuration file,
and provides a method for performing chat completions using the OpenAI ChatCompletion API.
"""
def __init__(self):
"""
Initializes the OpenAI API key and other settings from a configuration file.
Raises a ValueError if the OpenAI key is missing.
"""
try:
openai.api_key = settings.openai.key
if settings.get("OPENAI.ORG", None):
@ -24,9 +34,28 @@ class AiHandler:
except AttributeError as e:
raise ValueError("OpenAI key is required") from e
@retry(exceptions=(APIError, Timeout, TryAgain, AttributeError),
@retry(exceptions=(APIError, Timeout, TryAgain, AttributeError, RateLimitError),
tries=OPENAI_RETRIES, delay=2, backoff=2, jitter=(1, 3))
async def chat_completion(self, model: str, temperature: float, system: str, user: str):
"""
Performs a chat completion using the OpenAI ChatCompletion API.
Retries in case of API errors or timeouts.
Args:
model (str): The model to use for chat completion.
temperature (float): The temperature parameter for chat completion.
system (str): The system message for chat completion.
user (str): The user message for chat completion.
Returns:
tuple: A tuple containing the response and finish reason from the API.
Raises:
TryAgain: If the API response is empty or there are no choices in the response.
APIError: If there is an error during OpenAI inference.
Timeout: If there is a timeout during OpenAI inference.
TryAgain: If there is an attribute error during OpenAI inference.
"""
try:
response = await openai.ChatCompletion.acreate(
model=model,
@ -40,8 +69,14 @@ class AiHandler:
except (APIError, Timeout, TryAgain) as e:
logging.error("Error during OpenAI inference: ", e)
raise
except (RateLimitError) as e:
logging.error("Rate limit error during OpenAI inference: ", e)
raise
except (Exception) as e:
logging.error("Unknown error during OpenAI inference: ", e)
raise TryAgain from e
if response is None or len(response.choices) == 0:
raise TryAgain
resp = response.choices[0]['message']['content']
finish_reason = response.choices[0].finish_reason
return resp, finish_reason
return resp, finish_reason

View File

@ -8,7 +8,15 @@ from pr_agent.config_loader import settings
def extend_patch(original_file_str, patch_str, num_lines) -> str:
"""
Extends the patch to include 'num_lines' more surrounding lines
Extends the given patch to include a specified number of surrounding lines.
Args:
original_file_str (str): The original file to which the patch will be applied.
patch_str (str): The patch to be applied to the original file.
num_lines (int): The number of surrounding lines to include in the extended patch.
Returns:
str: The extended patch string.
"""
if not patch_str or num_lines == 0:
return patch_str
@ -61,6 +69,14 @@ def extend_patch(original_file_str, patch_str, num_lines) -> str:
def omit_deletion_hunks(patch_lines) -> str:
"""
Omit deletion hunks from the patch and return the modified patch.
Args:
- patch_lines: a list of strings representing the lines of the patch
Returns:
- A string representing the modified patch with deletion hunks omitted
"""
temp_hunk = []
added_patched = []
add_hunk = False
@ -93,7 +109,20 @@ def omit_deletion_hunks(patch_lines) -> str:
def handle_patch_deletions(patch: str, original_file_content_str: str,
new_file_content_str: str, file_name: str) -> str:
"""
Handle entire file or deletion patches
Handle entire file or deletion patches.
This function takes a patch, original file content, new file content, and file name as input.
It handles entire file or deletion patches and returns the modified patch with deletion hunks omitted.
Args:
patch (str): The patch to be handled.
original_file_content_str (str): The original content of the file.
new_file_content_str (str): The new content of the file.
file_name (str): The name of the file.
Returns:
str: The modified patch with deletion hunks omitted.
"""
if not new_file_content_str:
# logic for handling deleted files - don't show patch, just show that the file was deleted
@ -111,20 +140,26 @@ def handle_patch_deletions(patch: str, original_file_content_str: str,
def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
# toDO: (maybe remove '-' and '+' from the beginning of the line)
"""
## src/file.ts
Convert a given patch string into a string with line numbers for each hunk, indicating the new and old content of the file.
Args:
patch (str): The patch string to be converted.
file: An object containing the filename of the file being patched.
Returns:
str: A string with line numbers for each hunk, indicating the new and old content of the file.
example output:
## src/file.ts
--new hunk--
881 line1
882 line2
883 line3
884 line4
885 line6
886 line7
887 + line8
888 + line9
889 line10
890 line11
887 + line4
888 + line5
889 line6
890 line7
...
--old hunk--
line1
@ -134,8 +169,8 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
line5
line6
...
"""
patch_with_lines_str = f"## {file.filename}\n"
import re
patch_lines = patch.splitlines()
@ -158,7 +193,7 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
patch_with_lines_str += f"{start2 + i} {line_new}\n"
if old_content_lines:
patch_with_lines_str += '--old hunk--\n'
for i, line_old in enumerate(old_content_lines):
for line_old in old_content_lines:
patch_with_lines_str += f"{line_old}\n"
new_content_lines = []
old_content_lines = []
@ -179,7 +214,7 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
patch_with_lines_str += f"{start2 + i} {line_new}\n"
if old_content_lines:
patch_with_lines_str += '\n--old hunk--\n'
for i, line_old in enumerate(old_content_lines):
for line_old in old_content_lines:
patch_with_lines_str += f"{line_old}\n"
return patch_with_lines_str.strip()

File diff suppressed because one or more lines are too long

View File

@ -1,15 +1,17 @@
from __future__ import annotations
import difflib
import logging
from typing import Any, Tuple, Union
from typing import Tuple, Union, Callable, List
from pr_agent.algo.git_patch_processing import extend_patch, handle_patch_deletions, \
convert_to_hunks_with_lines_numbers
from github import RateLimitExceededException
from pr_agent.algo import MAX_TOKENS
from pr_agent.algo.git_patch_processing import convert_to_hunks_with_lines_numbers, extend_patch, handle_patch_deletions
from pr_agent.algo.language_handler import sort_files_by_main_languages
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import load_large_diff
from pr_agent.config_loader import settings
from pr_agent.git_providers import GithubProvider
from pr_agent.git_providers.git_provider import GitProvider
DELETED_FILES_ = "Deleted files:\n"
@ -19,33 +21,46 @@ OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD = 1000
OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD = 600
PATCH_EXTRA_LINES = 3
def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: str,
add_line_numbers_to_hunks: bool = False, disable_extra_lines: bool = False) -> str:
"""
Returns a string with the diff of the pull request, applying diff minimization techniques if needed.
def get_pr_diff(git_provider: Union[GithubProvider, Any], token_handler: TokenHandler,
add_line_numbers_to_hunks: bool = False, disable_extra_lines: bool =False) -> str:
"""
Returns a string with the diff of the PR.
If needed, apply diff minimization techniques to reduce the number of tokens
Args:
git_provider (GitProvider): An object of the GitProvider class representing the Git provider used for the pull request.
token_handler (TokenHandler): An object of the TokenHandler class used for handling tokens in the context of the pull request.
model (str): The name of the model used for tokenization.
add_line_numbers_to_hunks (bool, optional): A boolean indicating whether to add line numbers to the hunks in the diff. Defaults to False.
disable_extra_lines (bool, optional): A boolean indicating whether to disable the extension of each patch with extra lines of context. Defaults to False.
Returns:
str: A string with the diff of the pull request, applying diff minimization techniques if needed.
"""
if disable_extra_lines:
global PATCH_EXTRA_LINES
PATCH_EXTRA_LINES = 0
git_provider.pr.diff_files = list(git_provider.get_diff_files())
try:
diff_files = list(git_provider.get_diff_files())
except RateLimitExceededException as e:
logging.error(f"Rate limit exceeded for git provider API. original message {e}")
raise
# get pr languages
pr_languages = sort_files_by_main_languages(git_provider.get_languages(), git_provider.pr.diff_files)
pr_languages = sort_files_by_main_languages(git_provider.get_languages(), diff_files)
# generate a standard diff string, with patch extension
patches_extended, total_tokens = pr_generate_extended_diff(pr_languages, token_handler,
add_line_numbers_to_hunks)
# if we are under the limit, return the full diff
if total_tokens + OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD < token_handler.limit:
if total_tokens + OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD < MAX_TOKENS[model]:
return "\n".join(patches_extended)
# if we are over the limit, start pruning
patches_compressed, modified_file_names, deleted_file_names = \
pr_generate_compressed_diff(pr_languages, token_handler, add_line_numbers_to_hunks)
pr_generate_compressed_diff(pr_languages, token_handler, model, add_line_numbers_to_hunks)
final_diff = "\n".join(patches_compressed)
if modified_file_names:
@ -61,7 +76,16 @@ def pr_generate_extended_diff(pr_languages: list, token_handler: TokenHandler,
add_line_numbers_to_hunks: bool) -> \
Tuple[list, int]:
"""
Generate a standard diff string, with patch extension
Generate a standard diff string with patch extension, while counting the number of tokens used and applying diff minimization techniques if needed.
Args:
- pr_languages: A list of dictionaries representing the languages used in the pull request and their corresponding files.
- token_handler: An object of the TokenHandler class used for handling tokens in the context of the pull request.
- add_line_numbers_to_hunks: A boolean indicating whether to add line numbers to the hunks in the diff.
Returns:
- patches_extended: A list of extended patches for each file in the pull request.
- total_tokens: The total number of tokens used in the extended patches.
"""
total_tokens = token_handler.prompt_tokens # initial tokens
patches_extended = []
@ -92,14 +116,28 @@ def pr_generate_extended_diff(pr_languages: list, token_handler: TokenHandler,
return patches_extended, total_tokens
def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler,
def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, model: str,
convert_hunks_to_line_numbers: bool) -> Tuple[list, list, list]:
# Apply Diff Minimization techniques to reduce the number of tokens:
# 0. Start from the largest diff patch to smaller ones
# 1. Don't use extend context lines around diff
# 2. Minimize deleted files
# 3. Minimize deleted hunks
# 4. Minimize all remaining files when you reach token limit
"""
Generate a compressed diff string for a pull request, using diff minimization techniques to reduce the number of tokens used.
Args:
top_langs (list): A list of dictionaries representing the languages used in the pull request and their corresponding files.
token_handler (TokenHandler): An object of the TokenHandler class used for handling tokens in the context of the pull request.
model (str): The model used for tokenization.
convert_hunks_to_line_numbers (bool): A boolean indicating whether to convert hunks to line numbers in the diff.
Returns:
Tuple[list, list, list]: A tuple containing the following lists:
- patches: A list of compressed diff patches for each file in the pull request.
- modified_files_list: A list of file names that were skipped due to large patch size.
- deleted_files_list: A list of file names that were deleted in the pull request.
Minimization techniques to reduce the number of tokens:
0. Start from the largest diff patch to smaller ones
1. Don't use extend context lines around diff
2. Minimize deleted files
3. Minimize deleted hunks
4. Minimize all remaining files when you reach token limit
"""
patches = []
modified_files_list = []
@ -134,12 +172,12 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler,
new_patch_tokens = token_handler.count_tokens(patch)
# Hard Stop, no more tokens
if total_tokens > token_handler.limit - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD:
if total_tokens > MAX_TOKENS[model] - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD:
logging.warning(f"File was fully skipped, no more tokens: {file.filename}.")
continue
# If the patch is too large, just show the file name
if total_tokens + new_patch_tokens > token_handler.limit - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD:
if total_tokens + new_patch_tokens > MAX_TOKENS[model] - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD:
# Current logic is to skip the patch if it's too large
# TODO: Option for alternative logic to remove hunks from the patch to reduce the number of tokens
# until we meet the requirements
@ -164,14 +202,16 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler,
return patches, modified_files_list, deleted_files_list
def load_large_diff(file, new_file_content_str: str, original_file_content_str: str, patch: str) -> str:
if not patch: # to Do - also add condition for file extension
async def retry_with_fallback_models(f: Callable):
model = settings.config.model
fallback_models = settings.config.fallback_models
if not isinstance(fallback_models, list):
fallback_models = [fallback_models]
all_models = [model] + fallback_models
for i, model in enumerate(all_models):
try:
diff = difflib.unified_diff(original_file_content_str.splitlines(keepends=True),
new_file_content_str.splitlines(keepends=True))
if settings.config.verbosity_level >= 2:
logging.warning(f"File was modified, but no patch was found. Manually creating patch: {file.filename}.")
patch = ''.join(diff)
except Exception:
pass
return patch
return await f(model)
except Exception as e:
logging.warning(f"Failed to generate prediction with {model}: {e}")
if i == len(all_models) - 1: # If it's the last iteration
raise # Re-raise the last exception

View File

@ -6,12 +6,42 @@ from pr_agent.config_loader import settings
class TokenHandler:
"""
A class for handling tokens in the context of a pull request.
Attributes:
- encoder: An object of the encoding_for_model class from the tiktoken module. Used to encode strings and count the number of tokens in them.
- limit: The maximum number of tokens allowed for the given model, as defined in the MAX_TOKENS dictionary in the pr_agent.algo module.
- prompt_tokens: The number of tokens in the system and user strings, as calculated by the _get_system_user_tokens method.
"""
def __init__(self, pr, vars: dict, system, user):
"""
Initializes the TokenHandler object.
Args:
- pr: The pull request object.
- vars: A dictionary of variables.
- system: The system string.
- user: The user string.
"""
self.encoder = encoding_for_model(settings.config.model)
self.limit = MAX_TOKENS[settings.config.model]
self.prompt_tokens = self._get_system_user_tokens(pr, self.encoder, vars, system, user)
def _get_system_user_tokens(self, pr, encoder, vars: dict, system, user):
"""
Calculates the number of tokens in the system and user strings.
Args:
- pr: The pull request object.
- encoder: An object of the encoding_for_model class from the tiktoken module.
- vars: A dictionary of variables.
- system: The system string.
- user: The user string.
Returns:
The sum of the number of tokens in the system and user strings.
"""
environment = Environment(undefined=StrictUndefined)
system_prompt = environment.from_string(system).render(vars)
user_prompt = environment.from_string(user).render(vars)
@ -21,4 +51,13 @@ class TokenHandler:
return system_prompt_tokens + user_prompt_tokens
def count_tokens(self, patch: str) -> int:
"""
Counts the number of tokens in a given patch string.
Args:
- patch: The patch string.
Returns:
The number of tokens in the patch string.
"""
return len(self.encoder.encode(patch, disallowed_special=()))

View File

@ -1,23 +1,36 @@
from __future__ import annotations
import difflib
from datetime import datetime
import json
import logging
import re
import textwrap
from pr_agent.config_loader import settings
def convert_to_markdown(output_data: dict) -> str:
"""
Convert a dictionary of data into markdown format.
Args:
output_data (dict): A dictionary containing data to be converted to markdown format.
Returns:
str: The markdown formatted text generated from the input dictionary.
"""
markdown_text = ""
emojis = {
"Main theme": "🎯",
"Type of PR": "📌",
"Score": "🏅",
"Relevant tests added": "🧪",
"Unrelated changes": "⚠️",
"Focused PR": "",
"Security concerns": "🔒",
"General PR suggestions": "💡",
"Code suggestions": "🤖"
"Insights from user's answers": "📝",
"Code suggestions": "🤖",
}
for key, value in output_data.items():
@ -29,7 +42,7 @@ def convert_to_markdown(output_data: dict) -> str:
elif isinstance(value, list):
if key.lower() == 'code suggestions':
markdown_text += "\n" # just looks nicer with additional line breaks
emoji = emojis.get(key, "") # Use a dash if no emoji is found for the key
emoji = emojis.get(key, "")
markdown_text += f"- {emoji} **{key}:**\n\n"
for item in value:
if isinstance(item, dict) and key.lower() == 'code suggestions':
@ -37,12 +50,21 @@ def convert_to_markdown(output_data: dict) -> str:
elif item:
markdown_text += f" - {item}\n"
elif value != 'n/a':
emoji = emojis.get(key, "") # Use a dash if no emoji is found for the key
emoji = emojis.get(key, "")
markdown_text += f"- {emoji} **{key}:** {value}\n"
return markdown_text
def parse_code_suggestion(code_suggestions: dict) -> str:
"""
Convert a dictionary of data into markdown format.
Args:
code_suggestions (dict): A dictionary containing data to be converted to markdown format.
Returns:
str: A string containing the markdown formatted text generated from the input dictionary.
"""
markdown_text = ""
for sub_key, sub_value in code_suggestions.items():
if isinstance(sub_value, dict): # "code example"
@ -62,18 +84,41 @@ def parse_code_suggestion(code_suggestions: dict) -> str:
def try_fix_json(review, max_iter=10, code_suggestions=False):
"""
Fix broken or incomplete JSON messages and return the parsed JSON data.
Args:
- review: A string containing the JSON message to be fixed.
- max_iter: An integer representing the maximum number of iterations to try and fix the JSON message.
- code_suggestions: A boolean indicating whether to try and fix JSON messages with code suggestions.
Returns:
- data: A dictionary containing the parsed JSON data.
The function attempts to fix broken or incomplete JSON messages by parsing until the last valid code suggestion.
If the JSON message ends with a closing bracket, the function calls the fix_json_escape_char function to fix the message.
If code_suggestions is True and the JSON message contains code suggestions, the function tries to fix the JSON message by parsing until the last valid code suggestion.
The function uses regular expressions to find the last occurrence of "}," with any number of whitespaces or newlines.
It tries to parse the JSON message with the closing bracket and checks if it is valid.
If the JSON message is valid, the parsed JSON data is returned.
If the JSON message is not valid, the last code suggestion is removed and the process is repeated until a valid JSON message is obtained or the maximum number of iterations is reached.
If a valid JSON message is not obtained, an error is logged and an empty dictionary is returned.
"""
if review.endswith("}"):
return fix_json_escape_char(review)
# Try to fix JSON if it is broken/incomplete: parse until the last valid code suggestion
data = {}
if code_suggestions:
closing_bracket = "]}"
else:
closing_bracket = "]}}"
if review.rfind("'Code suggestions': [") > 0 or review.rfind('"Code suggestions": [') > 0:
last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1
valid_json = False
iter_count = 0
while last_code_suggestion_ind > 0 and not valid_json and iter_count < max_iter:
try:
data = json.loads(review[:last_code_suggestion_ind] + closing_bracket)
@ -81,16 +126,30 @@ def try_fix_json(review, max_iter=10, code_suggestions=False):
review = review[:last_code_suggestion_ind].strip() + closing_bracket
except json.decoder.JSONDecodeError:
review = review[:last_code_suggestion_ind]
# Use regular expression to find the last occurrence of "}," with any number of whitespaces or newlines
last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1
iter_count += 1
if not valid_json:
logging.error("Unable to decode JSON response from AI")
data = {}
return data
def fix_json_escape_char(json_message=None):
result = None
"""
Fix broken or incomplete JSON messages and return the parsed JSON data.
Args:
json_message (str): A string containing the JSON message to be fixed.
Returns:
dict: A dictionary containing the parsed JSON data.
Raises:
None
"""
try:
result = json.loads(json_message)
except Exception as e:
@ -100,5 +159,55 @@ def fix_json_escape_char(json_message=None):
json_message = list(json_message)
json_message[idx_to_replace] = ' '
new_message = ''.join(json_message)
return fix_JSON(json_message=new_message)
return result
return fix_json_escape_char(json_message=new_message)
return result
def convert_str_to_datetime(date_str):
"""
Convert a string representation of a date and time into a datetime object.
Args:
date_str (str): A string representation of a date and time in the format '%a, %d %b %Y %H:%M:%S %Z'
Returns:
datetime: A datetime object representing the input date and time.
Example:
>>> convert_str_to_datetime('Mon, 01 Jan 2022 12:00:00 UTC')
datetime.datetime(2022, 1, 1, 12, 0, 0)
"""
datetime_format = '%a, %d %b %Y %H:%M:%S %Z'
return datetime.strptime(date_str, datetime_format)
def load_large_diff(file, new_file_content_str: str, original_file_content_str: str, patch: str) -> str:
"""
Generate a patch for a modified file by comparing the original content of the file with the new content provided as input.
Args:
file: The file object for which the patch needs to be generated.
new_file_content_str: The new content of the file as a string.
original_file_content_str: The original content of the file as a string.
patch: An optional patch string that can be provided as input.
Returns:
The generated or provided patch string.
Raises:
None.
Additional Information:
- If 'patch' is not provided as input, the function generates a patch using the 'difflib' library and returns it as output.
- If the 'settings.config.verbosity_level' is greater than or equal to 2, a warning message is logged indicating that the file was modified but no patch was found, and a patch is manually created.
"""
if not patch: # to Do - also add condition for file extension
try:
diff = difflib.unified_diff(original_file_content_str.splitlines(keepends=True),
new_file_content_str.splitlines(keepends=True))
if settings.config.verbosity_level >= 2:
logging.warning(f"File was modified, but no patch was found. Manually creating patch: {file.filename}.")
patch = ''.join(diff)
except Exception:
pass
return patch

View File

@ -8,62 +8,102 @@ from pr_agent.tools.pr_description import PRDescription
from pr_agent.tools.pr_information_from_user import PRInformationFromUser
from pr_agent.tools.pr_questions import PRQuestions
from pr_agent.tools.pr_reviewer import PRReviewer
from pr_agent.tools.pr_update_changelog import PRUpdateChangelog
def run():
parser = argparse.ArgumentParser(description='AI based pull request analyzer', usage="""\
def run(args=None):
parser = argparse.ArgumentParser(description='AI based pull request analyzer', usage=
"""\
Usage: cli.py --pr-url <URL on supported git hosting service> <command> [<args>].
For example:
- cli.py --pr-url xxx review
- cli.py --pr-url xxx describe
- cli.py --pr-url xxx improve
- cli.py --pr-url xxx ask "write me a poem about this PR"
- cli.py --pr-url=... review
- cli.py --pr-url=... describe
- cli.py --pr-url=... improve
- cli.py --pr-url=... ask "write me a poem about this PR"
- cli.py --pr-url=... reflect
Supported commands:
review / review_pr - Add a review that includes a summary of the PR and specific suggestions for improvement.
ask / ask_question [question] - Ask a question about the PR.
describe / describe_pr - Modify the PR title and description based on the PR's contents.
improve / improve_code - Suggest improvements to the code in the PR as pull request comments ready to commit.
reflect - Ask the PR author questions about the PR.
update_changelog - Update the changelog based on the PR's contents.
""")
parser.add_argument('--pr_url', type=str, help='The URL of the PR to review', required=True)
parser.add_argument('command', type=str, help='The', choices=['review', 'review_pr',
'ask', 'ask_question',
'describe', 'describe_pr',
'improve', 'improve_code',
'user_questions'], default='review')
'reflect', 'review_after_reflect',
'update_changelog'],
default='review')
parser.add_argument('rest', nargs=argparse.REMAINDER, default=[])
args = parser.parse_args()
args = parser.parse_args(args)
logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"))
command = args.command.lower()
if command in ['ask', 'ask_question']:
question = ' '.join(args.rest).strip()
if len(question) == 0:
print("Please specify a question")
parser.print_help()
return
print(f"Question: {question} about PR {args.pr_url}")
reviewer = PRQuestions(args.pr_url, question)
asyncio.run(reviewer.answer())
elif command in ['describe', 'describe_pr']:
print(f"PR description: {args.pr_url}")
reviewer = PRDescription(args.pr_url)
asyncio.run(reviewer.describe())
elif command in ['improve', 'improve_code']:
print(f"PR code suggestions: {args.pr_url}")
reviewer = PRCodeSuggestions(args.pr_url)
asyncio.run(reviewer.suggest())
elif command in ['review', 'review_pr']:
print(f"Reviewing PR: {args.pr_url}")
reviewer = PRReviewer(args.pr_url, cli_mode=True)
asyncio.run(reviewer.review())
elif command in ['user_questions']:
print(f"Asking the PR author questions: {args.pr_url}")
reviewer = PRInformationFromUser(args.pr_url)
asyncio.run(reviewer.generate_questions())
commands = {
'ask': _handle_ask_command,
'ask_question': _handle_ask_command,
'describe': _handle_describe_command,
'describe_pr': _handle_describe_command,
'improve': _handle_improve_command,
'improve_code': _handle_improve_command,
'review': _handle_review_command,
'review_pr': _handle_review_command,
'reflect': _handle_reflect_command,
'review_after_reflect': _handle_review_after_reflect_command,
'update_changelog': _handle_update_changelog,
}
if command in commands:
commands[command](args.pr_url, args.rest)
else:
print(f"Unknown command: {command}")
parser.print_help()
def _handle_ask_command(pr_url: str, rest: list):
if len(rest) == 0:
print("Please specify a question")
return
print(f"Question: {' '.join(rest)} about PR {pr_url}")
reviewer = PRQuestions(pr_url, rest)
asyncio.run(reviewer.answer())
def _handle_describe_command(pr_url: str, rest: list):
print(f"PR description: {pr_url}")
reviewer = PRDescription(pr_url)
asyncio.run(reviewer.describe())
def _handle_improve_command(pr_url: str, rest: list):
print(f"PR code suggestions: {pr_url}")
reviewer = PRCodeSuggestions(pr_url)
asyncio.run(reviewer.suggest())
def _handle_review_command(pr_url: str, rest: list):
print(f"Reviewing PR: {pr_url}")
reviewer = PRReviewer(pr_url, cli_mode=True, args=rest)
asyncio.run(reviewer.review())
def _handle_reflect_command(pr_url: str, rest: list):
print(f"Asking the PR author questions: {pr_url}")
reviewer = PRInformationFromUser(pr_url)
asyncio.run(reviewer.generate_questions())
def _handle_review_after_reflect_command(pr_url: str, rest: list):
print(f"Processing author's answers and sending review: {pr_url}")
reviewer = PRReviewer(pr_url, cli_mode=True, is_answer=True)
asyncio.run(reviewer.review())
def _handle_update_changelog(pr_url: str, rest: list):
print(f"Updating changlog for: {pr_url}")
reviewer = PRUpdateChangelog(pr_url, cli_mode=True, args=rest)
asyncio.run(reviewer.update_changelog())
if __name__ == '__main__':
run()

View File

@ -1,7 +1,11 @@
from os.path import abspath, dirname, join
from pathlib import Path
from typing import Optional
from dynaconf import Dynaconf
PR_AGENT_TOML_KEY = 'pr-agent'
current_dir = dirname(abspath(__file__))
settings = Dynaconf(
envvar_prefix=False,
@ -9,11 +13,42 @@ settings = Dynaconf(
settings_files=[join(current_dir, f) for f in [
"settings/.secrets.toml",
"settings/configuration.toml",
"settings/language_extensions.toml",
"settings/pr_reviewer_prompts.toml",
"settings/pr_questions_prompts.toml",
"settings/pr_description_prompts.toml",
"settings/pr_code_suggestions_prompts.toml",
"settings/pr_information_from_user_prompts.toml",
"settings/pr_update_changelog.toml",
"settings_prod/.secrets.toml"
]]
)
# Add local configuration from pyproject.toml of the project being reviewed
def _find_repository_root() -> Path:
"""
Identify project root directory by recursively searching for the .git directory in the parent directories.
"""
cwd = Path.cwd().resolve()
no_way_up = False
while not no_way_up:
no_way_up = cwd == cwd.parent
if (cwd / ".git").is_dir():
return cwd
cwd = cwd.parent
return None
def _find_pyproject() -> Optional[Path]:
"""
Search for file pyproject.toml in the repository root.
"""
repo_root = _find_repository_root()
if repo_root:
pyproject = _find_repository_root() / "pyproject.toml"
return pyproject if pyproject.is_file() else None
return None
pyproject_path = _find_pyproject()
if pyproject_path is not None:
settings.load_file(pyproject_path, env=f'tool.{PR_AGENT_TOML_KEY}')

View File

@ -1,12 +1,14 @@
from pr_agent.config_loader import settings
from pr_agent.git_providers.bitbucket_provider import BitbucketProvider
from pr_agent.git_providers.github_provider import GithubProvider
from pr_agent.git_providers.gitlab_provider import GitLabProvider
from pr_agent.git_providers.bitbucket_provider import BitbucketProvider
from pr_agent.git_providers.local_git_provider import LocalGitProvider
_GIT_PROVIDERS = {
'github': GithubProvider,
'gitlab': GitLabProvider,
'bitbucket': BitbucketProvider,
'local' : LocalGitProvider
}
def get_git_provider():

View File

@ -1,5 +1,4 @@
import logging
from datetime import datetime
from typing import Optional, Tuple
from urllib.parse import urlparse
@ -10,8 +9,9 @@ from pr_agent.config_loader import settings
from .git_provider import FilePatchInfo
class BitbucketProvider:
def __init__(self, pr_url: Optional[str] = None):
def __init__(self, pr_url: Optional[str] = None, incremental: Optional[bool] = False):
s = requests.Session()
s.headers['Authorization'] = f'Bearer {settings.get("BITBUCKET.BEARER_TOKEN", None)}'
self.bitbucket_client = Cloud(session=s)
@ -22,9 +22,15 @@ class BitbucketProvider:
self.pr_num = None
self.pr = None
self.temp_comments = []
self.incremental = incremental
if pr_url:
self.set_pr(pr_url)
def is_supported(self, capability: str) -> bool:
if capability in ['get_issue_comments', 'create_inline_comment', 'publish_inline_comments', 'get_labels']:
return False
return True
def set_pr(self, pr_url: str):
self.workspace_slug, self.repo_slug, self.pr_num = self._parse_pr_url(pr_url)
self.pr = self._get_pr()
@ -40,7 +46,8 @@ class BitbucketProvider:
for index, diff in enumerate(diffs):
original_file_content_str = self._get_pr_file_content(diff.old.get_data('links'))
new_file_content_str = self._get_pr_file_content(diff.new.get_data('links'))
diff_files.append(FilePatchInfo(original_file_content_str, new_file_content_str, diff_split[index], diff.new.path))
diff_files.append(FilePatchInfo(original_file_content_str, new_file_content_str,
diff_split[index], diff.new.path))
return diff_files
def publish_comment(self, pr_comment: str, is_temporary: bool = False):
@ -58,6 +65,12 @@ class BitbucketProvider:
def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
pass
def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
raise NotImplementedError("Bitbucket provider does not support creating inline comments yet")
def publish_inline_comments(self, comments: list[dict]):
raise NotImplementedError("Bitbucket provider does not support publishing inline comments yet")
def get_title(self):
return self.pr.title
@ -74,6 +87,9 @@ class BitbucketProvider:
def get_user_id(self):
return 0
def get_issue_comments(self):
raise NotImplementedError("Bitbucket provider does not support issue comments yet")
@staticmethod
def _parse_pr_url(pr_url: str) -> Tuple[str, int]:
parsed_url = urlparse(pr_url)

View File

@ -3,12 +3,15 @@ from dataclasses import dataclass
# enum EDIT_TYPE (ADDED, DELETED, MODIFIED, RENAMED)
from enum import Enum
class EDIT_TYPE(Enum):
ADDED = 1
DELETED = 2
MODIFIED = 3
RENAMED = 4
@dataclass
class FilePatchInfo:
base_file: str
@ -21,6 +24,10 @@ class FilePatchInfo:
class GitProvider(ABC):
@abstractmethod
def is_supported(self, capability: str) -> bool:
pass
@abstractmethod
def get_diff_files(self) -> list[FilePatchInfo]:
pass
@ -38,8 +45,23 @@ class GitProvider(ABC):
pass
@abstractmethod
def publish_code_suggestion(self, body: str, relevant_file: str,
relevant_lines_start: int, relevant_lines_end: int):
def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
pass
@abstractmethod
def publish_inline_comments(self, comments: list[dict]):
pass
@abstractmethod
def publish_code_suggestions(self, code_suggestions: list):
pass
@abstractmethod
def publish_labels(self, labels):
pass
@abstractmethod
def get_labels(self):
pass
@abstractmethod
@ -62,6 +84,10 @@ class GitProvider(ABC):
def get_pr_description(self):
pass
@abstractmethod
def get_issue_comments(self):
pass
def get_main_pr_language(languages, files) -> str:
"""
@ -102,3 +128,12 @@ def get_main_pr_language(languages, files) -> str:
pass
return main_language_str
class IncrementalPR:
def __init__(self, is_incremental: bool = False):
self.is_incremental = is_incremental
self.commits_range = None
self.first_new_commit_sha = None
self.last_seen_commit_sha = None

View File

@ -3,15 +3,20 @@ from datetime import datetime
from typing import Optional, Tuple
from urllib.parse import urlparse
from github import AppAuthentication, Github
from github import AppAuthentication, Auth, Github, GithubException
from retry import retry
from pr_agent.config_loader import settings
from .git_provider import FilePatchInfo, GitProvider
from ..algo.language_handler import is_valid_file
from ..algo.utils import load_large_diff
from .git_provider import FilePatchInfo, GitProvider, IncrementalPR
from ..servers.utils import RateLimitExceeded
class GithubProvider(GitProvider):
def __init__(self, pr_url: Optional[str] = None):
def __init__(self, pr_url: Optional[str] = None, incremental=IncrementalPR(False)):
self.repo_obj = None
self.installation_id = settings.get("GITHUB.INSTALLATION_ID")
self.github_client = self._get_github_client()
self.repo = None
@ -19,32 +24,99 @@ class GithubProvider(GitProvider):
self.pr = None
self.github_user_id = None
self.diff_files = None
self.incremental = incremental
if pr_url:
self.set_pr(pr_url)
self.last_commit_id = list(self.pr.get_commits())[-1]
def is_supported(self, capability: str) -> bool:
return True
def get_pr_url(self) -> str:
return f"https://github.com/{self.repo}/pull/{self.pr_num}"
def set_pr(self, pr_url: str):
self.repo, self.pr_num = self._parse_pr_url(pr_url)
self.pr = self._get_pr()
if self.incremental.is_incremental:
self.get_incremental_commits()
def get_incremental_commits(self):
self.commits = list(self.pr.get_commits())
self.get_previous_review()
if self.previous_review:
self.incremental.commits_range = self.get_commit_range()
# Get all files changed during the commit range
self.file_set = dict()
for commit in self.incremental.commits_range:
if commit.commit.message.startswith(f"Merge branch '{self._get_repo().default_branch}'"):
logging.info(f"Skipping merge commit {commit.commit.message}")
continue
self.file_set.update({file.filename: file for file in commit.files})
def get_commit_range(self):
last_review_time = self.previous_review.created_at
first_new_commit_index = 0
for index in range(len(self.commits) - 1, -1, -1):
if self.commits[index].commit.author.date > last_review_time:
self.incremental.first_new_commit_sha = self.commits[index].sha
first_new_commit_index = index
else:
self.incremental.last_seen_commit_sha = self.commits[index].sha
break
return self.commits[first_new_commit_index:]
def get_previous_review(self):
self.previous_review = None
self.comments = list(self.pr.get_issue_comments())
for index in range(len(self.comments) - 1, -1, -1):
if self.comments[index].body.startswith("## PR Analysis"):
self.previous_review = self.comments[index]
break
def get_files(self):
if self.incremental.is_incremental and self.file_set:
return self.file_set.values()
return self.pr.get_files()
@retry(exceptions=RateLimitExceeded,
tries=settings.github.ratelimit_retries, delay=2, backoff=2, jitter=(1, 3))
def get_diff_files(self) -> list[FilePatchInfo]:
files = self.pr.get_files()
diff_files = []
for file in files:
original_file_content_str = self._get_pr_file_content(file, self.pr.base.sha)
new_file_content_str = self._get_pr_file_content(file, self.pr.head.sha)
diff_files.append(FilePatchInfo(original_file_content_str, new_file_content_str, file.patch, file.filename))
self.diff_files = diff_files
return diff_files
try:
files = self.get_files()
diff_files = []
for file in files:
if is_valid_file(file.filename):
new_file_content_str = self._get_pr_file_content(file, self.pr.head.sha)
patch = file.patch
if self.incremental.is_incremental and self.file_set:
original_file_content_str = self._get_pr_file_content(file,
self.incremental.last_seen_commit_sha)
patch = load_large_diff(file,
new_file_content_str,
original_file_content_str,
None)
self.file_set[file.filename] = patch
else:
original_file_content_str = self._get_pr_file_content(file, self.pr.base.sha)
diff_files.append(
FilePatchInfo(original_file_content_str, new_file_content_str, patch, file.filename))
self.diff_files = diff_files
return diff_files
except GithubException.RateLimitExceededException as e:
logging.error(f"Rate limit exceeded for GitHub API. Original message: {e}")
raise RateLimitExceeded("Rate limit exceeded for GitHub API.") from e
def publish_description(self, pr_title: str, pr_body: str):
self.pr.edit(title=pr_title, body=pr_body)
# self.pr.create_issue_comment(pr_comment)
def publish_comment(self, pr_comment: str, is_temporary: bool = False):
if is_temporary and not settings.config.publish_output_progress:
logging.debug(f"Skipping publish_comment for temporary comment: {pr_comment}")
return
response = self.pr.create_issue_comment(pr_comment)
if hasattr(response, "user") and hasattr(response.user, "login"):
self.github_user_id = response.user.login
@ -54,6 +126,9 @@ class GithubProvider(GitProvider):
self.pr.comments_list.append(response)
def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
self.publish_inline_comments([self.create_inline_comment(body, relevant_file, relevant_line_in_file)])
def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
self.diff_files = self.diff_files if self.diff_files else self.get_diff_files()
position = -1
for file in self.diff_files:
@ -64,7 +139,7 @@ class GithubProvider(GitProvider):
if relevant_line_in_file in line:
position = i
break
elif relevant_line_in_file[0] == '+' and relevant_line_in_file[1:] in line:
elif relevant_line_in_file[0] == '+' and relevant_line_in_file[1:].lstrip() in line:
# The model often adds a '+' to the beginning of the relevant_line_in_file even if originally
# it's a context line
position = i
@ -72,32 +147,44 @@ class GithubProvider(GitProvider):
if position == -1:
if settings.config.verbosity_level >= 2:
logging.info(f"Could not find position for {relevant_file} {relevant_line_in_file}")
subject_type = "FILE"
else:
path = relevant_file.strip()
self.pr.create_review_comment(body=body, commit_id=self.last_commit_id, path=path, position=position)
subject_type = "LINE"
path = relevant_file.strip()
# placeholder for future API support (already supported in single inline comment)
# return dict(body=body, path=path, position=position, subject_type=subject_type)
return dict(body=body, path=path, position=position) if subject_type == "LINE" else {}
def publish_code_suggestion(self, body: str,
relevant_file: str,
relevant_lines_start: int,
relevant_lines_end: int):
if not relevant_lines_start or relevant_lines_start == -1:
if settings.config.verbosity_level >= 2:
logging.exception(f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}")
return False
def publish_inline_comments(self, comments: list[dict]):
self.pr.create_review(commit=self.last_commit_id, comments=comments)
if relevant_lines_end<relevant_lines_start:
if settings.config.verbosity_level >= 2:
logging.exception(f"Failed to publish code suggestion, "
f"relevant_lines_end is {relevant_lines_end} and "
f"relevant_lines_start is {relevant_lines_start}")
return False
def publish_code_suggestions(self, code_suggestions: list):
"""
Publishes code suggestions as comments on the PR.
"""
post_parameters_list = []
for suggestion in code_suggestions:
body = suggestion['body']
relevant_file = suggestion['relevant_file']
relevant_lines_start = suggestion['relevant_lines_start']
relevant_lines_end = suggestion['relevant_lines_end']
if not relevant_lines_start or relevant_lines_start == -1:
if settings.config.verbosity_level >= 2:
logging.exception(
f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}")
continue
if relevant_lines_end < relevant_lines_start:
if settings.config.verbosity_level >= 2:
logging.exception(f"Failed to publish code suggestion, "
f"relevant_lines_end is {relevant_lines_end} and "
f"relevant_lines_start is {relevant_lines_start}")
continue
try:
import github.PullRequestComment
if relevant_lines_end > relevant_lines_start:
post_parameters = {
"body": body,
"commit_id": self.last_commit_id._identity,
"path": relevant_file,
"line": relevant_lines_end,
"start_line": relevant_lines_start,
@ -106,17 +193,14 @@ class GithubProvider(GitProvider):
else: # API is different for single line comments
post_parameters = {
"body": body,
"commit_id": self.last_commit_id._identity,
"path": relevant_file,
"line": relevant_lines_start,
"side": "RIGHT",
}
headers, data = self.pr._requester.requestJsonAndCheck(
"POST", f"{self.pr.url}/comments", input=post_parameters
)
github.PullRequestComment.PullRequestComment(
self.pr._requester, headers, data, completed=True
)
post_parameters_list.append(post_parameters)
try:
self.pr.create_review(commit=self.last_commit_id, comments=post_parameters_list)
return True
except Exception as e:
if settings.config.verbosity_level >= 2:
@ -125,7 +209,7 @@ class GithubProvider(GitProvider):
def remove_initial_comment(self):
try:
for comment in self.pr.comments_list:
for comment in getattr(self.pr, 'comments_list', []):
if comment.is_temporary:
comment.delete()
except Exception as e:
@ -161,6 +245,9 @@ class GithubProvider(GitProvider):
notifications = self.github_client.get_user().get_notifications(since=since)
return notifications
def get_issue_comments(self):
return self.pr.get_issue_comments()
@staticmethod
def _parse_pr_url(pr_url: str) -> Tuple[str, int]:
parsed_url = urlparse(pr_url)
@ -212,10 +299,17 @@ class GithubProvider(GitProvider):
raise ValueError(
"GitHub token is required when using user deployment. See: "
"https://github.com/Codium-ai/pr-agent#method-2-run-from-source") from e
return Github(token)
return Github(auth=Auth.Token(token))
def _get_repo(self):
return self.github_client.get_repo(self.repo)
if hasattr(self, 'repo_obj') and \
hasattr(self.repo_obj, 'full_name') and \
self.repo_obj.full_name == self.repo:
return self.repo_obj
else:
self.repo_obj = self.github_client.get_repo(self.repo)
return self.repo_obj
def _get_pr(self):
return self._get_repo().get_pull(self.pr_num)
@ -226,3 +320,23 @@ class GithubProvider(GitProvider):
except Exception:
file_content_str = ""
return file_content_str
def publish_labels(self, pr_types):
try:
label_color_map = {"Bug fix": "1d76db", "Tests": "e99695", "Bug fix with tests": "c5def5", "Refactoring": "bfdadc", "Enhancement": "bfd4f2", "Documentation": "d4c5f9", "Other": "d1bcf9"}
post_parameters = []
for p in pr_types:
color = label_color_map.get(p, "d1bcf9") # default to "Other" color
post_parameters.append({"name": p, "color": color})
headers, data = self.pr._requester.requestJsonAndCheck(
"PUT", f"{self.pr.issue_url}/labels", input=post_parameters
)
except Exception as e:
logging.exception(f"Failed to publish labels, error: {e}")
def get_labels(self):
try:
return [label.name for label in self.pr.labels]
except Exception as e:
logging.exception(f"Failed to get labels, error: {e}")
return []

View File

@ -4,14 +4,17 @@ from typing import Optional, Tuple
from urllib.parse import urlparse
import gitlab
from gitlab import GitlabGetError
from pr_agent.config_loader import settings
from .git_provider import FilePatchInfo, GitProvider, EDIT_TYPE
from ..algo.language_handler import is_valid_file
from .git_provider import EDIT_TYPE, FilePatchInfo, GitProvider
class GitLabProvider(GitProvider):
def __init__(self, merge_request_url: Optional[str] = None):
def __init__(self, merge_request_url: Optional[str] = None, incremental: Optional[bool] = False):
gitlab_url = settings.get("GITLAB.URL", None)
if not gitlab_url:
raise ValueError("GitLab URL is not set in the config file")
@ -19,8 +22,8 @@ class GitLabProvider(GitProvider):
if not gitlab_access_token:
raise ValueError("GitLab personal access token is not set in the config file")
self.gl = gitlab.Gitlab(
gitlab_url,
gitlab_access_token
url=gitlab_url,
oauth_token=gitlab_access_token
)
self.id_project = None
self.id_mr = None
@ -30,6 +33,12 @@ class GitLabProvider(GitProvider):
self._set_merge_request(merge_request_url)
self.RE_HUNK_HEADER = re.compile(
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
self.incremental = incremental
def is_supported(self, capability: str) -> bool:
if capability in ['get_issue_comments', 'create_inline_comment', 'publish_inline_comments']:
return False
return True
@property
def pr(self):
@ -42,31 +51,39 @@ class GitLabProvider(GitProvider):
self.last_diff = self.mr.diffs.list()[-1]
def _get_pr_file_content(self, file_path: str, branch: str) -> str:
return self.gl.projects.get(self.id_project).files.get(file_path, branch).decode()
try:
return self.gl.projects.get(self.id_project).files.get(file_path, branch).decode()
except GitlabGetError:
# In case of file creation the method returns GitlabGetError (404 file not found).
# In this case we return an empty string for the diff.
return ''
def get_diff_files(self) -> list[FilePatchInfo]:
diffs = self.mr.changes()['changes']
diff_files = []
for diff in diffs:
original_file_content_str = self._get_pr_file_content(diff['old_path'], self.mr.target_branch)
new_file_content_str = self._get_pr_file_content(diff['new_path'], self.mr.source_branch)
edit_type = EDIT_TYPE.MODIFIED
if diff['new_file']:
edit_type = EDIT_TYPE.ADDED
elif diff['deleted_file']:
edit_type = EDIT_TYPE.DELETED
elif diff['renamed_file']:
edit_type = EDIT_TYPE.RENAMED
try:
original_file_content_str = bytes.decode(original_file_content_str, 'utf-8')
new_file_content_str = bytes.decode(new_file_content_str, 'utf-8')
except UnicodeDecodeError:
logging.warning(
f"Cannot decode file {diff['old_path']} or {diff['new_path']} in merge request {self.id_mr}")
diff_files.append(
FilePatchInfo(original_file_content_str, new_file_content_str, diff['diff'], diff['new_path'],
edit_type=edit_type,
old_filename=None if diff['old_path'] == diff['new_path'] else diff['old_path']))
if is_valid_file(diff['new_path']):
original_file_content_str = self._get_pr_file_content(diff['old_path'], self.mr.target_branch)
new_file_content_str = self._get_pr_file_content(diff['new_path'], self.mr.source_branch)
edit_type = EDIT_TYPE.MODIFIED
if diff['new_file']:
edit_type = EDIT_TYPE.ADDED
elif diff['deleted_file']:
edit_type = EDIT_TYPE.DELETED
elif diff['renamed_file']:
edit_type = EDIT_TYPE.RENAMED
try:
if isinstance(original_file_content_str, bytes):
original_file_content_str = bytes.decode(original_file_content_str, 'utf-8')
if isinstance(new_file_content_str, bytes):
new_file_content_str = bytes.decode(new_file_content_str, 'utf-8')
except UnicodeDecodeError:
logging.warning(
f"Cannot decode file {diff['old_path']} or {diff['new_path']} in merge request {self.id_mr}")
diff_files.append(
FilePatchInfo(original_file_content_str, new_file_content_str, diff['diff'], diff['new_path'],
edit_type=edit_type,
old_filename=None if diff['old_path'] == diff['new_path'] else diff['old_path']))
self.diff_files = diff_files
return diff_files
@ -74,8 +91,12 @@ class GitLabProvider(GitProvider):
return [change['new_path'] for change in self.mr.changes()['changes']]
def publish_description(self, pr_title: str, pr_body: str):
logging.exception("Not implemented yet")
pass
try:
self.mr.title = pr_title
self.mr.description = pr_body
self.mr.save()
except Exception as e:
logging.exception(f"Could not update merge request {self.id_mr} description: {e}")
def publish_comment(self, mr_comment: str, is_temporary: bool = False):
comment = self.mr.notes.create({'body': mr_comment})
@ -89,6 +110,12 @@ class GitLabProvider(GitProvider):
self.send_inline_comment(body, edit_type, found, relevant_file, relevant_line_in_file, source_line_no,
target_file, target_line_no)
def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
raise NotImplementedError("Gitlab provider does not support creating inline comments yet")
def create_inline_comments(self, comments: list[dict]):
raise NotImplementedError("Gitlab provider does not support publishing inline comments yet")
def send_inline_comment(self, body, edit_type, found, relevant_file, relevant_line_in_file, source_line_no,
target_file, target_line_no):
if not found:
@ -109,40 +136,29 @@ class GitLabProvider(GitProvider):
self.mr.discussions.create({'body': body,
'position': pos_obj})
def publish_code_suggestion(self, body: str,
relevant_file: str,
relevant_lines_start: int,
relevant_lines_end: int):
self.diff_files = self.diff_files if self.diff_files else self.get_diff_files()
target_file = None
for file in self.diff_files:
if file.filename == relevant_file:
if file.filename == relevant_file:
target_file = file
break
range = relevant_lines_end - relevant_lines_start + 1
body = body.replace('```suggestion', f'```suggestion:-0+{range}')
def publish_code_suggestions(self, code_suggestions: list):
for suggestion in code_suggestions:
body = suggestion['body']
relevant_file = suggestion['relevant_file']
relevant_lines_start = suggestion['relevant_lines_start']
relevant_lines_end = suggestion['relevant_lines_end']
d = self.last_diff
#
# pos_obj = {'position_type': 'text',
# 'new_path': target_file.filename,
# 'old_path': target_file.old_filename if target_file.old_filename else target_file.filename,
# 'base_sha': d.base_commit_sha, 'start_sha': d.start_commit_sha, 'head_sha': d.head_commit_sha}
lines = target_file.head_file.splitlines()
relevant_line_in_file = lines[relevant_lines_start - 1]
edit_type, found, source_line_no, target_file, target_line_no = self.find_in_file(target_file, relevant_line_in_file)
self.send_inline_comment(body, edit_type, found, relevant_file, relevant_line_in_file, source_line_no,
target_file, target_line_no)
# if lines[relevant_lines_start][0] == '-':
# pos_obj['old_line'] = relevant_lines_start
# elif lines[relevant_lines_start][0] == '+':
# pos_obj['new_line'] = relevant_lines_start
# else:
# pos_obj['new_line'] = relevant_lines_start
# pos_obj['old_line'] = relevant_lines_start
# self.mr.discussions.create({'body': body,
# 'position': pos_obj})
self.diff_files = self.diff_files if self.diff_files else self.get_diff_files()
target_file = None
for file in self.diff_files:
if file.filename == relevant_file:
if file.filename == relevant_file:
target_file = file
break
range = relevant_lines_end - relevant_lines_start + 1
body = body.replace('```suggestion', f'```suggestion:-0+{range}')
lines = target_file.head_file.splitlines()
relevant_line_in_file = lines[relevant_lines_start - 1]
edit_type, found, source_line_no, target_file, target_line_no = self.find_in_file(target_file,
relevant_line_in_file)
self.send_inline_comment(body, edit_type, found, relevant_file, relevant_line_in_file, source_line_no,
target_file, target_line_no)
def search_line(self, relevant_file, relevant_line_in_file):
target_file = None
@ -162,7 +178,7 @@ class GitLabProvider(GitProvider):
target_file = file
patch = file.patch
patch_lines = patch.splitlines()
for i, line in enumerate(patch_lines):
for line in patch_lines:
if line.startswith('@@'):
match = self.RE_HUNK_HEADER.match(line)
if not match:
@ -182,7 +198,7 @@ class GitLabProvider(GitProvider):
found = True
edit_type = self.get_edit_type(line)
break
elif relevant_line_in_file[0] == '+' and relevant_line_in_file[1:] in line:
elif relevant_line_in_file[0] == '+' and relevant_line_in_file[1:].lstrip() in line:
# The model often adds a '+' to the beginning of the relevant_line_in_file even if originally
# it's a context line
found = True
@ -218,20 +234,33 @@ class GitLabProvider(GitProvider):
def get_pr_description(self):
return self.mr.description
def _parse_merge_request_url(self, merge_request_url: str) -> Tuple[int, int]:
def get_issue_comments(self):
raise NotImplementedError("GitLab provider does not support issue comments yet")
def _parse_merge_request_url(self, merge_request_url: str) -> Tuple[str, int]:
parsed_url = urlparse(merge_request_url)
path_parts = parsed_url.path.strip('/').split('/')
if path_parts[-2] != 'merge_requests':
if 'merge_requests' not in path_parts:
raise ValueError("The provided URL does not appear to be a GitLab merge request URL")
mr_index = path_parts.index('merge_requests')
# Ensure there is an ID after 'merge_requests'
if len(path_parts) <= mr_index + 1:
raise ValueError("The provided URL does not contain a merge request ID")
try:
mr_id = int(path_parts[-1])
mr_id = int(path_parts[mr_index + 1])
except ValueError as e:
raise ValueError("Unable to convert merge request ID to integer") from e
# Gitlab supports access by both project numeric ID as well as 'namespace/project_name'
return "/".join(path_parts[:2]), mr_id
# Handle special delimiter (-)
project_path = "/".join(path_parts[:mr_index])
if project_path.endswith('/-'):
project_path = project_path[:-2]
# Return the path before 'merge_requests' and the ID
return project_path, mr_id
def _get_merge_request(self):
mr = self.gl.projects.get(self.id_project).mergerequests.get(self.id_mr)
@ -239,3 +268,16 @@ class GitLabProvider(GitProvider):
def get_user_id(self):
return None
def publish_labels(self, pr_types):
try:
self.mr.labels = list(set(pr_types))
self.mr.save()
except Exception as e:
logging.exception(f"Failed to publish labels, error: {e}")
def publish_inline_comments(self, comments: list[dict]):
pass
def get_labels(self):
return self.mr.labels

View File

@ -0,0 +1,178 @@
import logging
from collections import Counter
from pathlib import Path
from typing import List
from git import Repo
from pr_agent.config_loader import _find_repository_root, settings
from pr_agent.git_providers.git_provider import EDIT_TYPE, FilePatchInfo, GitProvider
class PullRequestMimic:
"""
This class mimics the PullRequest class from the PyGithub library for the LocalGitProvider.
"""
def __init__(self, title: str, diff_files: List[FilePatchInfo]):
self.title = title
self.diff_files = diff_files
class LocalGitProvider(GitProvider):
"""
This class implements the GitProvider interface for local git repositories.
It mimics the PR functionality of the GitProvider interface,
but does not require a hosted git repository.
Instead of providing a PR url, the user provides a local branch path to generate a diff-patch.
For the MVP it only supports the /review and /describe capabilities.
"""
def __init__(self, target_branch_name, incremental=False):
self.repo_path = _find_repository_root()
if self.repo_path is None:
raise ValueError('Could not find repository root')
self.repo = Repo(self.repo_path)
self.head_branch_name = self.repo.head.ref.name
self.target_branch_name = target_branch_name
self._prepare_repo()
self.diff_files = None
self.pr = PullRequestMimic(self.get_pr_title(), self.get_diff_files())
self.description_path = settings.get('local.description_path') \
if settings.get('local.description_path') is not None else self.repo_path / 'description.md'
self.review_path = settings.get('local.review_path') \
if settings.get('local.review_path') is not None else self.repo_path / 'review.md'
# inline code comments are not supported for local git repositories
settings.pr_reviewer.inline_code_comments = False
def _prepare_repo(self):
"""
Prepare the repository for PR-mimic generation.
"""
logging.debug('Preparing repository for PR-mimic generation...')
if self.repo.is_dirty():
raise ValueError('The repository is not in a clean state. Please commit or stash pending changes.')
if self.target_branch_name not in self.repo.heads:
raise KeyError(f'Branch: {self.target_branch_name} does not exist')
def is_supported(self, capability: str) -> bool:
if capability in ['get_issue_comments', 'create_inline_comment', 'publish_inline_comments', 'get_labels']:
return False
return True
def get_diff_files(self) -> list[FilePatchInfo]:
diffs = self.repo.head.commit.diff(
self.repo.merge_base(self.repo.head, self.repo.branches[self.target_branch_name]),
create_patch=True,
R=True
)
diff_files = []
for diff_item in diffs:
if diff_item.a_blob is not None:
original_file_content_str = diff_item.a_blob.data_stream.read().decode('utf-8')
else:
original_file_content_str = "" # empty file
if diff_item.b_blob is not None:
new_file_content_str = diff_item.b_blob.data_stream.read().decode('utf-8')
else:
new_file_content_str = "" # empty file
edit_type = EDIT_TYPE.MODIFIED
if diff_item.new_file:
edit_type = EDIT_TYPE.ADDED
elif diff_item.deleted_file:
edit_type = EDIT_TYPE.DELETED
elif diff_item.renamed_file:
edit_type = EDIT_TYPE.RENAMED
diff_files.append(
FilePatchInfo(original_file_content_str,
new_file_content_str,
diff_item.diff.decode('utf-8'),
diff_item.b_path,
edit_type=edit_type,
old_filename=None if diff_item.a_path == diff_item.b_path else diff_item.a_path
)
)
self.diff_files = diff_files
return diff_files
def get_files(self) -> List[str]:
"""
Returns a list of files with changes in the diff.
"""
diff_index = self.repo.head.commit.diff(
self.repo.merge_base(self.repo.head, self.repo.branches[self.target_branch_name]),
R=True
)
# Get the list of changed files
diff_files = [item.a_path for item in diff_index]
return diff_files
def publish_description(self, pr_title: str, pr_body: str):
with open(self.description_path, "w") as file:
# Write the string to the file
file.write(pr_title + '\n' + pr_body)
def publish_comment(self, pr_comment: str, is_temporary: bool = False):
with open(self.review_path, "w") as file:
# Write the string to the file
file.write(pr_comment)
def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
raise NotImplementedError('Publishing inline comments is not implemented for the local git provider')
def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
raise NotImplementedError('Creating inline comments is not implemented for the local git provider')
def publish_inline_comments(self, comments: list[dict]):
raise NotImplementedError('Publishing inline comments is not implemented for the local git provider')
def publish_code_suggestion(self, body: str, relevant_file: str,
relevant_lines_start: int, relevant_lines_end: int):
raise NotImplementedError('Publishing code suggestions is not implemented for the local git provider')
def publish_code_suggestions(self, code_suggestions: list):
raise NotImplementedError('Publishing code suggestions is not implemented for the local git provider')
def publish_labels(self, labels):
pass # Not applicable to the local git provider, but required by the interface
def remove_initial_comment(self):
pass # Not applicable to the local git provider, but required by the interface
def get_languages(self):
"""
Calculate percentage of languages in repository. Used for hunk prioritisation.
"""
# Get all files in repository
filepaths = [Path(item.path) for item in self.repo.tree().traverse() if item.type == 'blob']
# Identify language by file extension and count
lang_count = Counter(ext.lstrip('.') for filepath in filepaths for ext in [filepath.suffix.lower()])
# Convert counts to percentages
total_files = len(filepaths)
lang_percentage = {lang: count / total_files * 100 for lang, count in lang_count.items()}
return lang_percentage
def get_pr_branch(self):
return self.repo.head
def get_user_id(self):
return -1 # Not used anywhere for the local provider, but required by the interface
def get_pr_description(self):
commits_diff = list(self.repo.iter_commits(self.target_branch_name + '..HEAD'))
# Get the commit messages and concatenate
commit_messages = " ".join([commit.message for commit in commits_diff])
# TODO Handle the description better - maybe use gpt-3.5 summarisation here?
return commit_messages[:200] # Use max 200 characters
def get_pr_title(self):
"""
Substitutes the branch-name as the PR-mimic title.
"""
return self.head_branch_name
def get_issue_comments(self):
raise NotImplementedError('Getting issue comments is not implemented for the local git provider')
def get_labels(self):
raise NotImplementedError('Getting labels is not implemented for the local git provider')

View File

@ -1,73 +1,68 @@
import asyncio
import json
import os
import re
from pr_agent.agent.pr_agent import PRAgent
from pr_agent.config_loader import settings
from pr_agent.tools.pr_code_suggestions import PRCodeSuggestions
from pr_agent.tools.pr_description import PRDescription
from pr_agent.tools.pr_questions import PRQuestions
from pr_agent.tools.pr_reviewer import PRReviewer
async def run_action():
GITHUB_EVENT_NAME = os.environ.get('GITHUB_EVENT_NAME', None)
# Get environment variables
GITHUB_EVENT_NAME = os.environ.get('GITHUB_EVENT_NAME')
GITHUB_EVENT_PATH = os.environ.get('GITHUB_EVENT_PATH')
OPENAI_KEY = os.environ.get('OPENAI_KEY')
OPENAI_ORG = os.environ.get('OPENAI_ORG')
GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN')
# Check if required environment variables are set
if not GITHUB_EVENT_NAME:
print("GITHUB_EVENT_NAME not set")
return
GITHUB_EVENT_PATH = os.environ.get('GITHUB_EVENT_PATH', None)
if not GITHUB_EVENT_PATH:
print("GITHUB_EVENT_PATH not set")
return
try:
event_payload = json.load(open(GITHUB_EVENT_PATH, 'r'))
except json.decoder.JSONDecodeError as e:
print(f"Failed to parse JSON: {e}")
return
OPENAI_KEY = os.environ.get('OPENAI_KEY', None)
if not OPENAI_KEY:
print("OPENAI_KEY not set")
return
OPENAI_ORG = os.environ.get('OPENAI_ORG', None)
GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN', None)
if not GITHUB_TOKEN:
print("GITHUB_TOKEN not set")
return
# Set the environment variables in the settings
settings.set("OPENAI.KEY", OPENAI_KEY)
if OPENAI_ORG:
settings.set("OPENAI.ORG", OPENAI_ORG)
settings.set("GITHUB.USER_TOKEN", GITHUB_TOKEN)
settings.set("GITHUB.DEPLOYMENT_TYPE", "user")
# Load the event payload
try:
with open(GITHUB_EVENT_PATH, 'r') as f:
event_payload = json.load(f)
except json.decoder.JSONDecodeError as e:
print(f"Failed to parse JSON: {e}")
return
# Handle pull request event
if GITHUB_EVENT_NAME == "pull_request":
action = event_payload.get("action", None)
action = event_payload.get("action")
if action in ["opened", "reopened"]:
pr_url = event_payload.get("pull_request", {}).get("url", None)
pr_url = event_payload.get("pull_request", {}).get("url")
if pr_url:
await PRReviewer(pr_url).review()
# Handle issue comment event
elif GITHUB_EVENT_NAME == "issue_comment":
action = event_payload.get("action", None)
action = event_payload.get("action")
if action in ["created", "edited"]:
comment_body = event_payload.get("comment", {}).get("body", None)
comment_body = event_payload.get("comment", {}).get("body")
if comment_body:
pr_url = event_payload.get("issue", {}).get("pull_request", {}).get("url", None)
pr_url = event_payload.get("issue", {}).get("pull_request", {}).get("url")
if pr_url:
body = comment_body.strip().lower()
if any(cmd in body for cmd in ["/review", "/review_pr"]):
await PRReviewer(pr_url).review()
elif any(cmd in body for cmd in ["/describe", "/describe_pr"]):
await PRDescription(pr_url).describe()
elif any(cmd in body for cmd in ["/improve", "/improve_code"]):
await PRCodeSuggestions(pr_url).suggest()
elif any(cmd in body for cmd in ["/ask", "/ask_question"]):
pattern = r'(/ask|/ask_question)\s*(.*)'
matches = re.findall(pattern, comment_body, re.IGNORECASE)
if matches:
question = matches[0][1]
await PRQuestions(pr_url, question).answer()
else:
print(f"Unknown command: {body}")
await PRAgent().handle_request(pr_url, body)
if __name__ == '__main__':
asyncio.run(run_action())
asyncio.run(run_action())

View File

@ -1,3 +1,4 @@
from typing import Dict, Any
import logging
import sys
@ -14,51 +15,66 @@ router = APIRouter()
@router.post("/api/v1/github_webhooks")
async def handle_github_webhooks(request: Request, response: Response):
logging.debug("Received a github webhook")
"""
Receives and processes incoming GitHub webhook requests.
Verifies the request signature, parses the request body, and passes it to the handle_request function for further processing.
"""
logging.debug("Received a GitHub webhook")
try:
body = await request.json()
except Exception as e:
logging.error("Error parsing request body", e)
raise HTTPException(status_code=400, detail="Error parsing request body") from e
body_bytes = await request.body()
signature_header = request.headers.get('x-hub-signature-256', None)
try:
webhook_secret = settings.github.webhook_secret
except AttributeError:
webhook_secret = None
webhook_secret = getattr(settings.github, 'webhook_secret', None)
if webhook_secret:
verify_signature(body_bytes, webhook_secret, signature_header)
logging.debug(f'Request body:\n{body}')
return await handle_request(body)
async def handle_request(body):
action = body.get("action", None)
installation_id = body.get("installation", {}).get("id", None)
async def handle_request(body: Dict[str, Any]):
"""
Handle incoming GitHub webhook requests.
Args:
body: The request body.
"""
action = body.get("action")
installation_id = body.get("installation", {}).get("id")
settings.set("GITHUB.INSTALLATION_ID", installation_id)
agent = PRAgent()
if action == 'created':
if "comment" not in body:
return {}
comment_body = body.get("comment", {}).get("body", None)
if 'sender' in body and 'login' in body['sender'] and 'bot' in body['sender']['login']:
comment_body = body.get("comment", {}).get("body")
sender = body.get("sender", {}).get("login")
if sender and 'bot' in sender:
return {}
if "issue" not in body and "pull_request" not in body["issue"]:
if "issue" not in body or "pull_request" not in body["issue"]:
return {}
pull_request = body["issue"]["pull_request"]
api_url = pull_request.get("url", None)
api_url = pull_request.get("url")
await agent.handle_request(api_url, comment_body)
elif action in ["opened"] or 'reopened' in action:
pull_request = body.get("pull_request", None)
pull_request = body.get("pull_request")
if not pull_request:
return {}
api_url = pull_request.get("url", None)
if api_url is None:
api_url = pull_request.get("url")
if not api_url:
return {}
await agent.handle_request(api_url, "/review")
else:
return {}
return {}
@router.get("/")
@ -76,4 +92,4 @@ def start():
if __name__ == '__main__':
start()
start()

View File

@ -1,6 +1,5 @@
import asyncio
import logging
import re
import sys
from datetime import datetime, timezone
@ -10,38 +9,46 @@ from pr_agent.agent.pr_agent import PRAgent
from pr_agent.config_loader import settings
from pr_agent.git_providers import get_git_provider
from pr_agent.servers.help import bot_help_text
from pr_agent.tools.pr_code_suggestions import PRCodeSuggestions
from pr_agent.tools.pr_description import PRDescription
from pr_agent.tools.pr_questions import PRQuestions
from pr_agent.tools.pr_reviewer import PRReviewer
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
NOTIFICATION_URL = "https://api.github.com/notifications"
def now() -> str:
"""
Get the current UTC time in ISO 8601 format.
Returns:
str: The current UTC time in ISO 8601 format.
"""
now_utc = datetime.now(timezone.utc).isoformat()
now_utc = now_utc.replace("+00:00", "Z")
return now_utc
async def polling_loop():
"""
Polls for notifications and handles them accordingly.
"""
handled_ids = set()
since = [now()]
last_modified = [None]
git_provider = get_git_provider()()
user_id = git_provider.get_user_id()
agent = PRAgent()
try:
deployment_type = settings.github.deployment_type
token = settings.github.user_token
except AttributeError:
deployment_type = 'none'
token = None
if deployment_type != 'user':
raise ValueError("Deployment mode must be set to 'user' to get notifications")
if not token:
raise ValueError("User token must be set to get notifications")
async with aiohttp.ClientSession() as session:
while True:
try:
@ -57,6 +64,7 @@ async def polling_loop():
params["since"] = since[0]
if last_modified[0]:
headers["If-Modified-Since"] = last_modified[0]
async with session.get(NOTIFICATION_URL, headers=headers, params=params) as response:
if response.status == 200:
if 'Last-Modified' in response.headers:
@ -103,5 +111,6 @@ async def polling_loop():
except Exception as e:
logging.error(f"Exception during processing of a notification: {e}")
if __name__ == '__main__':
asyncio.run(polling_loop())
asyncio.run(polling_loop())

View File

@ -1,64 +0,0 @@
import asyncio
import time
import gitlab
from pr_agent.agent.pr_agent import PRAgent
from pr_agent.config_loader import settings
gl = gitlab.Gitlab(
settings.get("GITLAB.URL"),
private_token=settings.get("GITLAB.PERSONAL_ACCESS_TOKEN")
)
# Set the list of projects to monitor
projects_to_monitor = settings.get("GITLAB.PROJECTS_TO_MONITOR")
magic_word = settings.get("GITLAB.MAGIC_WORD")
# Hold the previous seen comments
previous_comments = set()
def check_comments():
print('Polling')
new_comments = {}
for project in projects_to_monitor:
project = gl.projects.get(project)
merge_requests = project.mergerequests.list(state='opened')
for mr in merge_requests:
notes = mr.notes.list(get_all=True)
for note in notes:
if note.id not in previous_comments and note.body.startswith(magic_word):
new_comments[note.id] = dict(
body=note.body[len(magic_word):],
project=project.name,
mr=mr
)
previous_comments.add(note.id)
print(f"New comment in project {project.name}, merge request {mr.title}: {note.body}")
return new_comments
def handle_new_comments(new_comments):
print('Handling new comments')
agent = PRAgent()
for _, comment in new_comments.items():
print(f"Handling comment: {comment['body']}")
asyncio.run(agent.handle_request(comment['mr'].web_url, comment['body']))
def run():
assert settings.get('CONFIG.GIT_PROVIDER') == 'gitlab', 'This script is only for GitLab'
# Initial run to populate previous_comments
check_comments()
# Run the check every minute
while True:
time.sleep(settings.get("GITLAB.POLLING_INTERVAL_SECONDS"))
new_comments = check_comments()
if new_comments:
handle_new_comments(new_comments)
if __name__ == '__main__':
run()

View File

@ -0,0 +1,47 @@
import logging
import uvicorn
from fastapi import APIRouter, FastAPI, Request, status
from fastapi.encoders import jsonable_encoder
from fastapi.responses import JSONResponse
from starlette.background import BackgroundTasks
from pr_agent.agent.pr_agent import PRAgent
from pr_agent.config_loader import settings
app = FastAPI()
router = APIRouter()
@router.post("/webhook")
async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request):
data = await request.json()
if data.get('object_kind') == 'merge_request' and data['object_attributes'].get('action') in ['open', 'reopen']:
logging.info(f"A merge request has been opened: {data['object_attributes'].get('title')}")
url = data['object_attributes'].get('url')
background_tasks.add_task(PRAgent().handle_request, url, "/review")
elif data.get('object_kind') == 'note' and data['event_type'] == 'note':
if 'merge_request' in data:
mr = data['merge_request']
url = mr.get('url')
body = data.get('object_attributes', {}).get('note')
background_tasks.add_task(PRAgent().handle_request, url, body)
return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"}))
def start():
gitlab_url = settings.get("GITLAB.URL", None)
if not gitlab_url:
raise ValueError("GITLAB.URL is not set")
gitlab_token = settings.get("GITLAB.PERSONAL_ACCESS_TOKEN", None)
if not gitlab_token:
raise ValueError("GITLAB.PERSONAL_ACCESS_TOKEN is not set")
settings.config.git_provider = "gitlab"
app = FastAPI()
app.include_router(router)
uvicorn.run(app, host="0.0.0.0", port=3000)
if __name__ == '__main__':
start()

View File

@ -1,8 +1,9 @@
commands_text = "> /review - Request a review of the latest update to the PR.\n" \
"> /describe - Modify the PR title and description based on the contents of the PR.\n" \
"> /improve - Suggest improvements to the code in the PR. " \
commands_text = "> **/review [-i]**: Request a review of your Pull Request. For an incremental review, which only " \
"considers changes since the last review, include the '-i' option.\n" \
"> **/describe**: Modify the PR title and description based on the contents of the PR.\n" \
"> **/improve**: Suggest improvements to the code in the PR. " \
"These will be provided as pull request comments, ready to commit.\n" \
"> /ask <QUESTION> - Pose a question about the PR.\n"
"> **/ask \\<QUESTION\\>**: Pose a question about the PR.\n"
def bot_help_text(user: str):

View File

@ -0,0 +1,18 @@
import logging
from fastapi import FastAPI
from mangum import Mangum
from pr_agent.servers.github_app import router
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
app = FastAPI()
app.include_router(router)
handler = Mangum(app, lifespan="off")
def serverless(event, context):
return handler(event, context)

View File

@ -21,3 +21,7 @@ def verify_signature(payload_body, secret_token, signature_header):
if not hmac.compare_digest(expected_signature, signature_header):
raise HTTPException(status_code=403, detail="Request signatures didn't match!")
class RateLimitExceeded(Exception):
"""Raised when the git provider API rate limit has been exceeded."""
pass

View File

@ -1,24 +1,36 @@
[config]
model="gpt-4-0613"
model="gpt-4"
fallback_models=["gpt-3.5-turbo-16k"]
git_provider="github"
publish_review=true
verbosity_level=2 # 0,1,2
publish_output=true
publish_output_progress=true
verbosity_level=0 # 0,1,2
use_extra_bad_extensions=false
[pr_reviewer]
require_focused_review=true
require_score_review=false
require_tests_review=true
require_security_review=true
num_code_suggestions=3
num_code_suggestions=0
inline_code_comments = true
ask_and_reflect=false
[pr_description]
publish_description_as_comment=false
[pr_questions]
[pr_code_suggestions]
num_code_suggestions=4
[pr_update_changelog]
push_changelog_changes=false
[github]
# The type of deployment to create. Valid values are 'app' or 'user'.
deployment_type = "user"
ratelimit_retries = 5
[gitlab]
# URL to the gitlab service
@ -32,3 +44,8 @@ magic_word = "AutoReview"
# Polling interval
polling_interval_seconds = 30
[local]
# LocalGitProvider settings - uncomment to use paths other than default
# description_path= "path/to/description.md"
# review_path= "path/to/review.md"

View File

@ -0,0 +1,434 @@
[bad_extensions]
default = [
'app',
'bin',
'bmp',
'bz2',
'class',
'csv',
'dat',
'db',
'dll',
'dylib',
'egg',
'eot',
'exe',
'gif',
'gitignore',
'glif',
'gradle',
'gz',
'ico',
'jar',
'jpeg',
'jpg',
'lo',
'lock',
'log',
'mp3',
'mp4',
'nar',
'o',
'ogg',
'otf',
'p',
'pdf',
'png',
'pickle',
'pkl',
'pyc',
'pyd',
'pyo',
'rkt',
'so',
'ss',
'svg',
'tar',
'tsv',
'ttf',
'war',
'webm',
'woff',
'woff2',
'xz',
'zip',
'zst',
'snap'
]
extra = [
'md',
'txt'
]
[language_extension_map_org]
ABAP = [".abap", ]
"AGS Script" = [".ash", ]
AMPL = [".ampl", ]
ANTLR = [".g4", ]
"API Blueprint" = [".apib", ]
APL = [".apl", ".dyalog", ]
ASP = [".asp", ".asax", ".ascx", ".ashx", ".asmx", ".aspx", ".axd", ]
ATS = [".dats", ".hats", ".sats", ]
ActionScript = [".as", ]
Ada = [".adb", ".ada", ".ads", ]
Agda = [".agda", ]
Alloy = [".als", ]
ApacheConf = [".apacheconf", ".vhost", ]
AppleScript = [".applescript", ".scpt", ]
Arc = [".arc", ]
Arduino = [".ino", ]
AsciiDoc = [".asciidoc", ".adoc", ]
AspectJ = [".aj", ]
Assembly = [".asm", ".a51", ".nasm", ]
Augeas = [".aug", ]
AutoHotkey = [".ahk", ".ahkl", ]
AutoIt = [".au3", ]
Awk = [".awk", ".auk", ".gawk", ".mawk", ".nawk", ]
Batchfile = [".bat", ".cmd", ]
Befunge = [".befunge", ]
Bison = [".bison", ]
BitBake = [".bb", ]
BlitzBasic = [".decls", ]
BlitzMax = [".bmx", ]
Bluespec = [".bsv", ]
Boo = [".boo", ]
Brainfuck = [".bf", ]
Brightscript = [".brs", ]
Bro = [".bro", ]
C = [".c", ".cats", ".h", ".idc", ".w", ]
"C#" = [".cs", ".cake", ".cshtml", ".csx", ]
"C++" = [".cpp", ".c++", ".cc", ".cp", ".cxx", ".h++", ".hh", ".hpp", ".hxx", ".inl", ".ipp", ".tcc", ".tpp", ".C", ".H", ]
C-ObjDump = [".c-objdump", ]
"C2hs Haskell" = [".chs", ]
CLIPS = [".clp", ]
CMake = [".cmake", ".cmake.in", ]
COBOL = [".cob", ".cbl", ".ccp", ".cobol", ".cpy", ]
CSS = [".css", ]
CSV = [".csv", ]
"Cap'n Proto" = [".capnp", ]
CartoCSS = [".mss", ]
Ceylon = [".ceylon", ]
Chapel = [".chpl", ]
ChucK = [".ck", ]
Cirru = [".cirru", ]
Clarion = [".clw", ]
Clean = [".icl", ".dcl", ]
Click = [".click", ]
Clojure = [".clj", ".boot", ".cl2", ".cljc", ".cljs", ".cljs.hl", ".cljscm", ".cljx", ".hic", ]
CoffeeScript = [".coffee", "._coffee", ".cjsx", ".cson", ".iced", ]
ColdFusion = [".cfm", ".cfml", ]
"ColdFusion CFC" = [".cfc", ]
"Common Lisp" = [".lisp", ".asd", ".lsp", ".ny", ".podsl", ".sexp", ]
"Component Pascal" = [".cps", ]
Coq = [".coq", ]
Cpp-ObjDump = [".cppobjdump", ".c++-objdump", ".c++objdump", ".cpp-objdump", ".cxx-objdump", ]
Creole = [".creole", ]
Crystal = [".cr", ]
Csound = [".csd", ]
Cucumber = [".feature", ]
Cuda = [".cu", ".cuh", ]
Cycript = [".cy", ]
Cython = [".pyx", ".pxd", ".pxi", ]
D = [".di", ]
D-ObjDump = [".d-objdump", ]
"DIGITAL Command Language" = [".com", ]
DM = [".dm", ]
"DNS Zone" = [".zone", ".arpa", ]
"Darcs Patch" = [".darcspatch", ".dpatch", ]
Dart = [".dart", ]
Diff = [".diff", ".patch", ]
Dockerfile = [".dockerfile", "Dockerfile", ]
Dogescript = [".djs", ]
Dylan = [".dylan", ".dyl", ".intr", ".lid", ]
E = [".E", ]
ECL = [".ecl", ".eclxml", ]
Eagle = [".sch", ".brd", ]
"Ecere Projects" = [".epj", ]
Eiffel = [".e", ]
Elixir = [".ex", ".exs", ]
Elm = [".elm", ]
"Emacs Lisp" = [".el", ".emacs", ".emacs.desktop", ]
EmberScript = [".em", ".emberscript", ]
Erlang = [".erl", ".escript", ".hrl", ".xrl", ".yrl", ]
"F#" = [".fs", ".fsi", ".fsx", ]
FLUX = [".flux", ]
FORTRAN = [".f90", ".f", ".f03", ".f08", ".f77", ".f95", ".for", ".fpp", ]
Factor = [".factor", ]
Fancy = [".fy", ".fancypack", ]
Fantom = [".fan", ]
Formatted = [".eam.fs", ]
Forth = [".fth", ".4th", ".forth", ".frt", ]
FreeMarker = [".ftl", ]
G-code = [".g", ".gco", ".gcode", ]
GAMS = [".gms", ]
GAP = [".gap", ".gi", ]
GAS = [".s", ]
GDScript = [".gd", ]
GLSL = [".glsl", ".fp", ".frag", ".frg", ".fsh", ".fshader", ".geo", ".geom", ".glslv", ".gshader", ".shader", ".vert", ".vrx", ".vsh", ".vshader", ]
Genshi = [".kid", ]
"Gentoo Ebuild" = [".ebuild", ]
"Gentoo Eclass" = [".eclass", ]
"Gettext Catalog" = [".po", ".pot", ]
Glyph = [".glf", ]
Gnuplot = [".gp", ".gnu", ".gnuplot", ".plot", ".plt", ]
Go = [".go", ]
Golo = [".golo", ]
Gosu = [".gst", ".gsx", ".vark", ]
Grace = [".grace", ]
Gradle = [".gradle", ]
"Grammatical Framework" = [".gf", ]
GraphQL = [".graphql", ]
"Graphviz (DOT)" = [".dot", ".gv", ]
Groff = [".man", ".1", ".1in", ".1m", ".1x", ".2", ".3", ".3in", ".3m", ".3qt", ".3x", ".4", ".5", ".6", ".7", ".8", ".9", ".me", ".rno", ".roff", ]
Groovy = [".groovy", ".grt", ".gtpl", ".gvy", ]
"Groovy Server Pages" = [".gsp", ]
HCL = [".hcl", ".tf", ]
HLSL = [".hlsl", ".fxh", ".hlsli", ]
HTML = [".html", ".htm", ".html.hl", ".xht", ".xhtml", ]
"HTML+Django" = [".mustache", ".jinja", ]
"HTML+EEX" = [".eex", ]
"HTML+ERB" = [".erb", ".erb.deface", ]
"HTML+PHP" = [".phtml", ]
HTTP = [".http", ]
Haml = [".haml", ".haml.deface", ]
Handlebars = [".handlebars", ".hbs", ]
Harbour = [".hb", ]
Haskell = [".hs", ".hsc", ]
Haxe = [".hx", ".hxsl", ]
Hy = [".hy", ]
IDL = [".dlm", ]
"IGOR Pro" = [".ipf", ]
INI = [".ini", ".cfg", ".prefs", ".properties", ]
"IRC log" = [".irclog", ".weechatlog", ]
Idris = [".idr", ".lidr", ]
"Inform 7" = [".ni", ".i7x", ]
"Inno Setup" = [".iss", ]
Io = [".io", ]
Ioke = [".ik", ]
Isabelle = [".thy", ]
J = [".ijs", ]
JFlex = [".flex", ".jflex", ]
JSON = [".json", ".geojson", ".lock", ".topojson", ]
JSON5 = [".json5", ]
JSONLD = [".jsonld", ]
JSONiq = [".jq", ]
JSX = [".jsx", ]
Jade = [".jade", ]
Jasmin = [".j", ]
Java = [".java", ]
"Java Server Pages" = [".jsp", ]
JavaScript = [".js", "._js", ".bones", ".es6", ".jake", ".jsb", ".jscad", ".jsfl", ".jsm", ".jss", ".njs", ".pac", ".sjs", ".ssjs", ".xsjs", ".xsjslib", ]
Julia = [".jl", ]
"Jupyter Notebook" = [".ipynb", ]
KRL = [".krl", ]
KiCad = [".kicad_pcb", ]
Kit = [".kit", ]
Kotlin = [".kt", ".ktm", ".kts", ]
LFE = [".lfe", ]
LLVM = [".ll", ]
LOLCODE = [".lol", ]
LSL = [".lsl", ".lslp", ]
LabVIEW = [".lvproj", ]
Lasso = [".lasso", ".las", ".lasso8", ".lasso9", ".ldml", ]
Latte = [".latte", ]
Lean = [".lean", ".hlean", ]
Less = [".less", ]
Lex = [".lex", ]
LilyPond = [".ly", ".ily", ]
"Linker Script" = [".ld", ".lds", ]
Liquid = [".liquid", ]
"Literate Agda" = [".lagda", ]
"Literate CoffeeScript" = [".litcoffee", ]
"Literate Haskell" = [".lhs", ]
LiveScript = [".ls", "._ls", ]
Logos = [".xm", ".x", ".xi", ]
Logtalk = [".lgt", ".logtalk", ]
LookML = [".lookml", ]
Lua = [".lua", ".nse", ".pd_lua", ".rbxs", ".wlua", ]
M = [".mumps", ]
M4 = [".m4", ]
MAXScript = [".mcr", ]
MTML = [".mtml", ]
MUF = [".muf", ]
Makefile = [".mak", ".mk", ".mkfile", "Makefile", ]
Mako = [".mako", ".mao", ]
Maple = [".mpl", ]
Markdown = [".md", ".markdown", ".mkd", ".mkdn", ".mkdown", ".ron", ]
Mask = [".mask", ]
Mathematica = [".mathematica", ".cdf", ".ma", ".mt", ".nb", ".nbp", ".wl", ".wlt", ]
Matlab = [".matlab", ]
Max = [".maxpat", ".maxhelp", ".maxproj", ".mxt", ".pat", ]
MediaWiki = [".mediawiki", ".wiki", ]
Metal = [".metal", ]
MiniD = [".minid", ]
Mirah = [".druby", ".duby", ".mir", ".mirah", ]
Modelica = [".mo", ]
"Module Management System" = [".mms", ".mmk", ]
Monkey = [".monkey", ]
MoonScript = [".moon", ]
Myghty = [".myt", ]
NSIS = [".nsi", ".nsh", ]
NetLinx = [".axs", ".axi", ]
"NetLinx+ERB" = [".axs.erb", ".axi.erb", ]
NetLogo = [".nlogo", ]
Nginx = [".nginxconf", ]
Nimrod = [".nim", ".nimrod", ]
Ninja = [".ninja", ]
Nit = [".nit", ]
Nix = [".nix", ]
Nu = [".nu", ]
NumPy = [".numpy", ".numpyw", ".numsc", ]
OCaml = [".ml", ".eliom", ".eliomi", ".ml4", ".mli", ".mll", ".mly", ]
ObjDump = [".objdump", ]
"Objective-C++" = [".mm", ]
Objective-J = [".sj", ]
Octave = [".oct", ]
Omgrofl = [".omgrofl", ]
Opa = [".opa", ]
Opal = [".opal", ]
OpenCL = [".cl", ".opencl", ]
"OpenEdge ABL" = [".p", ]
OpenSCAD = [".scad", ]
Org = [".org", ]
Ox = [".ox", ".oxh", ".oxo", ]
Oxygene = [".oxygene", ]
Oz = [".oz", ]
PAWN = [".pwn", ]
PHP = [".php", ".aw", ".ctp", ".php3", ".php4", ".php5", ".phps", ".phpt", ]
"POV-Ray SDL" = [".pov", ]
Pan = [".pan", ]
Papyrus = [".psc", ]
Parrot = [".parrot", ]
"Parrot Assembly" = [".pasm", ]
"Parrot Internal Representation" = [".pir", ]
Pascal = [".pas", ".dfm", ".dpr", ".lpr", ]
Perl = [".pl", ".al", ".perl", ".ph", ".plx", ".pm", ".psgi", ".t", ]
Perl6 = [".6pl", ".6pm", ".nqp", ".p6", ".p6l", ".p6m", ".pl6", ".pm6", ]
Pickle = [".pkl", ]
PigLatin = [".pig", ]
Pike = [".pike", ".pmod", ]
Pod = [".pod", ]
PogoScript = [".pogo", ]
Pony = [".pony", ]
PostScript = [".ps", ".eps", ]
PowerShell = [".ps1", ".psd1", ".psm1", ]
Processing = [".pde", ]
Prolog = [".prolog", ".yap", ]
"Propeller Spin" = [".spin", ]
"Protocol Buffer" = [".proto", ]
"Public Key" = [".pub", ]
"Pure Data" = [".pd", ]
PureBasic = [".pb", ".pbi", ]
PureScript = [".purs", ]
Python = [".py", ".bzl", ".gyp", ".lmi", ".pyde", ".pyp", ".pyt", ".pyw", ".tac", ".wsgi", ".xpy", ]
"Python traceback" = [".pytb", ]
QML = [".qml", ".qbs", ]
QMake = [".pri", ]
R = [".r", ".rd", ".rsx", ]
RAML = [".raml", ]
RDoc = [".rdoc", ]
REALbasic = [".rbbas", ".rbfrm", ".rbmnu", ".rbres", ".rbtbar", ".rbuistate", ]
RHTML = [".rhtml", ]
RMarkdown = [".rmd", ]
Racket = [".rkt", ".rktd", ".rktl", ".scrbl", ]
"Ragel in Ruby Host" = [".rl", ]
"Raw token data" = [".raw", ]
Rebol = [".reb", ".r2", ".r3", ".rebol", ]
Red = [".red", ".reds", ]
Redcode = [".cw", ]
"Ren'Py" = [".rpy", ]
RenderScript = [".rsh", ]
RobotFramework = [".robot", ]
Rouge = [".rg", ]
Ruby = [".rb", ".builder", ".gemspec", ".god", ".irbrc", ".jbuilder", ".mspec", ".podspec", ".rabl", ".rake", ".rbuild", ".rbw", ".rbx", ".ru", ".ruby", ".thor", ".watchr", ]
Rust = [".rs", ".rs.in", ]
SAS = [".sas", ]
SCSS = [".scss", ]
SMT = [".smt2", ".smt", ]
SPARQL = [".sparql", ".rq", ]
SQF = [".sqf", ".hqf", ]
SQL = [".pls", ".pck", ".pkb", ".pks", ".plb", ".plsql", ".sql", ".cql", ".ddl", ".prc", ".tab", ".udf", ".viw", ".db2", ]
STON = [".ston", ]
SVG = [".svg", ]
Sage = [".sage", ".sagews", ]
SaltStack = [".sls", ]
Sass = [".sass", ]
Scala = [".scala", ".sbt", ]
Scaml = [".scaml", ]
Scheme = [".scm", ".sld", ".sps", ".ss", ]
Scilab = [".sci", ".sce", ]
Self = [".self", ]
Shell = [".sh", ".bash", ".bats", ".command", ".ksh", ".sh.in", ".tmux", ".tool", ".zsh", ]
ShellSession = [".sh-session", ]
Shen = [".shen", ]
Slash = [".sl", ]
Slim = [".slim", ]
Smali = [".smali", ]
Smalltalk = [".st", ]
Smarty = [".tpl", ]
Solidity = [".sol", ]
SourcePawn = [".sp", ".sma", ]
Squirrel = [".nut", ]
Stan = [".stan", ]
"Standard ML" = [".ML", ".fun", ".sig", ".sml", ]
Stata = [".do", ".ado", ".doh", ".ihlp", ".mata", ".matah", ".sthlp", ]
Stylus = [".styl", ]
SuperCollider = [".scd", ]
Swift = [".swift", ]
SystemVerilog = [".sv", ".svh", ".vh", ]
TOML = [".toml", ]
TXL = [".txl", ]
Tcl = [".tcl", ".adp", ".tm", ]
Tcsh = [".tcsh", ".csh", ]
TeX = [".tex", ".aux", ".bbx", ".bib", ".cbx", ".dtx", ".ins", ".lbx", ".ltx", ".mkii", ".mkiv", ".mkvi", ".sty", ".toc", ]
Tea = [".tea", ]
Text = [".txt", ".no", ]
Textile = [".textile", ]
Thrift = [".thrift", ]
Turing = [".tu", ]
Turtle = [".ttl", ]
Twig = [".twig", ]
TypeScript = [".ts", ".tsx", ]
"Unified Parallel C" = [".upc", ]
"Unity3D Asset" = [".anim", ".asset", ".mat", ".meta", ".prefab", ".unity", ]
Uno = [".uno", ]
UnrealScript = [".uc", ]
UrWeb = [".ur", ".urs", ]
VCL = [".vcl", ]
VHDL = [".vhdl", ".vhd", ".vhf", ".vhi", ".vho", ".vhs", ".vht", ".vhw", ]
Vala = [".vala", ".vapi", ]
Verilog = [".veo", ]
VimL = [".vim", ]
"Visual Basic" = [".vb", ".bas", ".frm", ".frx", ".vba", ".vbhtml", ".vbs", ]
Volt = [".volt", ]
Vue = [".vue", ]
"Web Ontology Language" = [".owl", ]
WebAssembly = [".wat", ]
WebIDL = [".webidl", ]
X10 = [".x10", ]
XC = [".xc", ]
XML = [".xml", ".ant", ".axml", ".ccxml", ".clixml", ".cproject", ".csl", ".csproj", ".ct", ".dita", ".ditamap", ".ditaval", ".dll.config", ".dotsettings", ".filters", ".fsproj", ".fxml", ".glade", ".grxml", ".iml", ".ivy", ".jelly", ".jsproj", ".kml", ".launch", ".mdpolicy", ".mxml", ".nproj", ".nuspec", ".odd", ".osm", ".plist", ".props", ".ps1xml", ".psc1", ".pt", ".rdf", ".rss", ".scxml", ".srdf", ".storyboard", ".stTheme", ".sublime-snippet", ".targets", ".tmCommand", ".tml", ".tmLanguage", ".tmPreferences", ".tmSnippet", ".tmTheme", ".ui", ".urdf", ".ux", ".vbproj", ".vcxproj", ".vssettings", ".vxml", ".wsdl", ".wsf", ".wxi", ".wxl", ".wxs", ".x3d", ".xacro", ".xaml", ".xib", ".xlf", ".xliff", ".xmi", ".xml.dist", ".xproj", ".xsd", ".xul", ".zcml", ]
XPages = [".xsp-config", ".xsp.metadata", ]
XProc = [".xpl", ".xproc", ]
XQuery = [".xquery", ".xq", ".xql", ".xqm", ".xqy", ]
XS = [".xs", ]
XSLT = [".xslt", ".xsl", ]
Xojo = [".xojo_code", ".xojo_menu", ".xojo_report", ".xojo_script", ".xojo_toolbar", ".xojo_window", ]
Xtend = [".xtend", ]
YAML = [".yml", ".reek", ".rviz", ".sublime-syntax", ".syntax", ".yaml", ".yaml-tmlanguage", ]
YANG = [".yang", ]
Yacc = [".y", ".yacc", ".yy", ]
Zephir = [".zep", ]
Zig = [".zig", ]
Zimpl = [".zimpl", ".zmpl", ".zpl", ]
desktop = [".desktop", ".desktop.in", ]
eC = [".ec", ".eh", ]
edn = [".edn", ]
fish = [".fish", ]
mupad = [".mu", ]
nesC = [".nc", ]
ooc = [".ooc", ]
reStructuredText = [".rst", ".rest", ".rest.txt", ".rst.txt", ]
wisp = [".wisp", ]
xBase = [".prg", ".prw", ]

View File

@ -10,9 +10,9 @@ You must use the following JSON schema to format your answer:
"type": "string",
"description": "an informative title for the PR, describing its main theme"
},
"Type of PR": {
"PR Type": {
"type": "string",
"enum": ["Bug fix", "Tests", "Bug fix with tests", "Refactoring", "Enhancement", "Documentation", "Other"]
"description": possible values are: ["Bug fix", "Tests", "Bug fix with tests", "Refactoring", "Enhancement", "Documentation", "Other"]
},
"PR Description": {
"type": "string",

View File

@ -1,16 +1,17 @@
[pr_information_from_user_prompt]
system="""You are CodiumAI-PR-Reviewer, a language model designed to review git pull requests.
Given the PR Info and the PR Git Diff, generate 4 questions about the PR for the PR author.
Given the PR Info and the PR Git Diff, generate 3 short questions about the PR code for the PR author.
The goal of the questions is to help the language model understand the PR better, so the questions should be insightful, informative, non-trivial, and relevant to the PR.
Prefer yes\\no or multiple choice questions. If you have to ask open-ended questions, make sure they are not too difficult, and can be answered in a sentence or two.
You should prefer asking yes\\no questions, or multiple choice questions. Also add at least one open-ended question, but make sure they are not too difficult, and can be answered in a sentence or two.
Example output:
'
Questions to better understand the PR:
1. ...
2. ...
1) ...
2) ...
...
'
"""
user="""PR Info:

View File

@ -2,8 +2,11 @@
system="""You are CodiumAI-PR-Reviewer, a language model designed to review git pull requests.
Your task is to provide constructive and concise feedback for the PR, and also provide meaningfull code suggestions to improve the new PR code (the '+' lines).
- Provide up to {{ num_code_suggestions }} code suggestions.
{%- if num_code_suggestions > 0 %}
- Try to focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningfull code improvements, like performance, vulnerability, modularity, and best practices.
- Suggestions should focus on improving the new added code lines.
- Make sure not to provide suggestions repeating modifications already implemented in the new PR code (the '+' lines).
{%- endif %}
You must use the following JSON schema to format your answer:
```json
@ -17,12 +20,24 @@ You must use the following JSON schema to format your answer:
"type": "string",
"enum": ["Bug fix", "Tests", "Bug fix with tests", "Refactoring", "Enhancement", "Documentation", "Other"]
},
{%- if require_score %}
"Score": {
"type": "int",
"description": "Rate this PR on a scale of 0-100 (inclusive), where 0 means the worst possible PR code, and 100 means PR code of the highest quality, without any bugs or performance issues, that is ready to be merged immediately and run in production at scale."
},
{%- endif %}
{%- if require_tests %}
"Relevant tests added": {
"type": "string",
"description": "yes\\no question: does this PR have relevant tests ?"
},
{%- endif %}
{%- if question_str %}
"Insights from user's answer": {
"type": "string",
"description": "shortly summarize the insights you gained from the user's answers to the questions"
},
{%- endif %}
{%- if require_focused %}
"Focused PR": {
"type": "string",
@ -35,6 +50,7 @@ You must use the following JSON schema to format your answer:
"type": "string",
"description": "General suggestions and feedback for the contributors and maintainers of this PR. May include important suggestions for the overall structure, primary purpose, best practices, critical bugs, and other aspects of the PR. Explain your suggestions."
},
{%- if num_code_suggestions > 0 %}
"Code suggestions": {
"type": "array",
"maxItems": {{ num_code_suggestions }},
@ -54,6 +70,7 @@ You must use the following JSON schema to format your answer:
}
}
},
{%- endif %}
{%- if require_security %}
"Security concerns": {
"type": "string",
@ -72,6 +89,9 @@ Example output:
{
"Main theme": "xxx",
"Type of PR": "Bug fix",
{%- if require_score %}
"Score": 89,
{%- endif %}
{%- if require_tests %}
"Relevant tests added": "No",
{%- endif %}
@ -82,6 +102,7 @@ Example output:
"PR Feedback":
{
"General PR suggestions": "..., `xxx`...",
{%- if num_code_suggestions > 0 %}
"Code suggestions": [
{
"relevant file": "directory/xxx.py",
@ -90,8 +111,9 @@ Example output:
},
...
]
{%- if require_security %},
"Security concerns": "No, because ..."
{%- endif %}
{%- if require_security %}
"Security concerns": "No, because ..."
{%- endif %}
}
}
@ -108,6 +130,16 @@ Description: '{{description}}'
Main language: {{language}}
{%- endif %}
{%- if question_str %}
######
Here are questions to better understand the PR. Use the answers to provide better feedback.
{{question_str|trim}}
User answers:
{{answer_str|trim}}
######
{%- endif %}
The PR Git Diff:
```

View File

@ -0,0 +1,34 @@
[pr_update_changelog_prompt]
system="""You are a language model called CodiumAI-PR-Changlog-summarizer.
Your task is to update the CHANGELOG.md file of the project, to shortly summarize important changes introduced in this PR (the '+' lines).
- The output should match the existing CHANGELOG.md format, style and conventions, so it will look like a natural part of the file. For example, if previous changes were summarized in a single line, you should do the same.
- Don't repeat previous changes. Generate only new content, that is not already in the CHANGELOG.md file.
- Be general, and avoid specific details, files, etc. The output should be minimal, no more than 3-4 short lines. Ignore non-relevant subsections.
"""
user="""PR Info:
Title: '{{title}}'
Branch: '{{branch}}'
Description: '{{description}}'
{%- if language %}
Main language: {{language}}
{%- endif %}
The PR Diff:
```
{{diff}}
```
Current date:
```
{{today}}
```
The current CHANGELOG.md:
```
{{ changelog_file_str }}
```
Response:
"""

View File

@ -6,11 +6,11 @@ import textwrap
from jinja2 import Environment, StrictUndefined
from pr_agent.algo.ai_handler import AiHandler
from pr_agent.algo.pr_processing import get_pr_diff
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import convert_to_markdown, try_fix_json
from pr_agent.algo.utils import try_fix_json
from pr_agent.config_loader import settings
from pr_agent.git_providers import get_git_provider, BitbucketProvider
from pr_agent.git_providers import BitbucketProvider, get_git_provider
from pr_agent.git_providers.git_provider import get_main_pr_language
@ -42,28 +42,29 @@ class PRCodeSuggestions:
assert type(self.git_provider) != BitbucketProvider, "Bitbucket is not supported for now"
logging.info('Generating code suggestions for PR...')
if settings.config.publish_review:
if settings.config.publish_output:
self.git_provider.publish_comment("Preparing review...", is_temporary=True)
logging.info('Getting PR diff...')
# we are using extended hunk with line numbers for code suggestions
self.patches_diff = get_pr_diff(self.git_provider,
self.token_handler,
add_line_numbers_to_hunks=True,
disable_extra_lines=True)
logging.info('Getting AI prediction...')
self.prediction = await self._get_prediction()
await retry_with_fallback_models(self._prepare_prediction)
logging.info('Preparing PR review...')
data = self._prepare_pr_code_suggestions()
if settings.config.publish_review:
if settings.config.publish_output:
logging.info('Pushing PR review...')
self.git_provider.remove_initial_comment()
logging.info('Pushing inline code comments...')
self.push_inline_code_suggestions(data)
async def _prepare_prediction(self, model: str):
logging.info('Getting PR diff...')
# we are using extended hunk with line numbers for code suggestions
self.patches_diff = get_pr_diff(self.git_provider,
self.token_handler,
model,
add_line_numbers_to_hunks=True,
disable_extra_lines=True)
logging.info('Getting AI prediction...')
self.prediction = await self._get_prediction(model)
async def _get_prediction(self):
async def _get_prediction(self, model: str):
variables = copy.deepcopy(self.vars)
variables["diff"] = self.patches_diff # update diff
environment = Environment(undefined=StrictUndefined)
@ -72,7 +73,6 @@ class PRCodeSuggestions:
if settings.config.verbosity_level >= 2:
logging.info(f"\nSystem prompt:\n{system_prompt}")
logging.info(f"\nUser prompt:\n{user_prompt}")
model = settings.config.model
response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
system=system_prompt, user=user_prompt)
@ -80,7 +80,6 @@ class PRCodeSuggestions:
def _prepare_pr_code_suggestions(self) -> str:
review = self.prediction.strip()
data = None
try:
data = json.loads(review)
except json.decoder.JSONDecodeError:
@ -90,6 +89,7 @@ class PRCodeSuggestions:
return data
def push_inline_code_suggestions(self, data):
code_suggestions = []
for d in data['Code suggestions']:
if settings.config.verbosity_level >= 2:
logging.info(f"suggestion: {d}")
@ -98,30 +98,36 @@ class PRCodeSuggestions:
relevant_lines_start = int(relevant_lines_str.split('-')[0]) # absolute position
relevant_lines_end = int(relevant_lines_str.split('-')[-1])
content = d['suggestion content']
existing_code_snippet = d['existing code']
new_code_snippet = d['improved code']
if new_code_snippet:
try: # dedent code snippet
self.diff_files = self.git_provider.diff_files if self.git_provider.diff_files else self.git_provider.get_diff_files()
original_initial_line = None
for file in self.diff_files:
if file.filename.strip() == relevant_file:
original_initial_line = file.head_file.splitlines()[relevant_lines_start - 1]
break
if original_initial_line:
suggested_initial_line = new_code_snippet.splitlines()[0]
original_initial_spaces = len(original_initial_line) - len(original_initial_line.lstrip())
suggested_initial_spaces = len(suggested_initial_line) - len(suggested_initial_line.lstrip())
delta_spaces = original_initial_spaces - suggested_initial_spaces
if delta_spaces > 0:
new_code_snippet = textwrap.indent(new_code_snippet, delta_spaces * " ").rstrip('\n')
except Exception as e:
if settings.config.verbosity_level >= 2:
logging.info(f"Could not dedent code snippet for file {relevant_file}, error: {e}")
new_code_snippet = self.dedent_code(relevant_file, relevant_lines_start, new_code_snippet)
body = f"**Suggestion:** {content}\n```suggestion\n" + new_code_snippet + "\n```"
success = self.git_provider.publish_code_suggestion(body=body,
relevant_file=relevant_file,
relevant_lines_start=relevant_lines_start,
relevant_lines_end=relevant_lines_end)
code_suggestions.append({'body': body,'relevant_file': relevant_file,
'relevant_lines_start': relevant_lines_start,
'relevant_lines_end': relevant_lines_end})
self.git_provider.publish_code_suggestions(code_suggestions)
def dedent_code(self, relevant_file, relevant_lines_start, new_code_snippet):
try: # dedent code snippet
self.diff_files = self.git_provider.diff_files if self.git_provider.diff_files \
else self.git_provider.get_diff_files()
original_initial_line = None
for file in self.diff_files:
if file.filename.strip() == relevant_file:
original_initial_line = file.head_file.splitlines()[relevant_lines_start - 1]
break
if original_initial_line:
suggested_initial_line = new_code_snippet.splitlines()[0]
original_initial_spaces = len(original_initial_line) - len(original_initial_line.lstrip())
suggested_initial_spaces = len(suggested_initial_line) - len(suggested_initial_line.lstrip())
delta_spaces = original_initial_spaces - suggested_initial_spaces
if delta_spaces > 0:
new_code_snippet = textwrap.indent(new_code_snippet, delta_spaces * " ").rstrip('\n')
except Exception as e:
if settings.config.verbosity_level >= 2:
logging.info(f"Could not dedent code snippet for file {relevant_file}, error: {e}")
return new_code_snippet

View File

@ -1,13 +1,13 @@
import copy
import json
import logging
from typing import Tuple, List
from jinja2 import Environment, StrictUndefined
from pr_agent.algo.ai_handler import AiHandler
from pr_agent.algo.pr_processing import get_pr_diff
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import convert_to_markdown
from pr_agent.config_loader import settings
from pr_agent.git_providers import get_git_provider
from pr_agent.git_providers.git_provider import get_main_pr_language
@ -15,11 +15,22 @@ from pr_agent.git_providers.git_provider import get_main_pr_language
class PRDescription:
def __init__(self, pr_url: str):
"""
Initialize the PRDescription object with the necessary attributes and objects for generating a PR description using an AI model.
Args:
pr_url (str): The URL of the pull request.
"""
# Initialize the git provider and main PR language
self.git_provider = get_git_provider()(pr_url)
self.main_pr_language = get_main_pr_language(
self.git_provider.get_languages(), self.git_provider.get_files()
)
# Initialize the AI handler
self.ai_handler = AiHandler()
# Initialize the variables dictionary
self.vars = {
"title": self.git_provider.pr.title,
"branch": self.git_provider.get_pr_branch(),
@ -27,57 +38,135 @@ class PRDescription:
"language": self.main_pr_language,
"diff": "", # empty diff for initial calculation
}
self.token_handler = TokenHandler(self.git_provider.pr,
self.vars,
settings.pr_description_prompt.system,
settings.pr_description_prompt.user)
# Initialize the token handler
self.token_handler = TokenHandler(
self.git_provider.pr,
self.vars,
settings.pr_description_prompt.system,
settings.pr_description_prompt.user,
)
# Initialize patches_diff and prediction attributes
self.patches_diff = None
self.prediction = None
async def describe(self):
"""
Generates a PR description using an AI model and publishes it to the PR.
"""
logging.info('Generating a PR description...')
if settings.config.publish_review:
if settings.config.publish_output:
self.git_provider.publish_comment("Preparing pr description...", is_temporary=True)
logging.info('Getting PR diff...')
self.patches_diff = get_pr_diff(self.git_provider, self.token_handler)
logging.info('Getting AI prediction...')
self.prediction = await self._get_prediction()
await retry_with_fallback_models(self._prepare_prediction)
logging.info('Preparing answer...')
pr_title, pr_body = self._prepare_pr_answer()
if settings.config.publish_review:
pr_title, pr_body, pr_types, markdown_text = self._prepare_pr_answer()
if settings.config.publish_output:
logging.info('Pushing answer...')
self.git_provider.publish_description(pr_title, pr_body)
if settings.pr_description.publish_description_as_comment:
self.git_provider.publish_comment(markdown_text)
else:
self.git_provider.publish_description(pr_title, pr_body)
if self.git_provider.is_supported("get_labels"):
current_labels = self.git_provider.get_labels()
if current_labels is None:
current_labels = []
self.git_provider.publish_labels(pr_types + current_labels)
self.git_provider.remove_initial_comment()
return ""
async def _get_prediction(self):
async def _prepare_prediction(self, model: str) -> None:
"""
Prepare the AI prediction for the PR description based on the provided model.
Args:
model (str): The name of the model to be used for generating the prediction.
Returns:
None
Raises:
Any exceptions raised by the 'get_pr_diff' and '_get_prediction' functions.
"""
logging.info('Getting PR diff...')
self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model)
logging.info('Getting AI prediction...')
self.prediction = await self._get_prediction(model)
async def _get_prediction(self, model: str) -> str:
"""
Generate an AI prediction for the PR description based on the provided model.
Args:
model (str): The name of the model to be used for generating the prediction.
Returns:
str: The generated AI prediction.
"""
variables = copy.deepcopy(self.vars)
variables["diff"] = self.patches_diff # update diff
environment = Environment(undefined=StrictUndefined)
system_prompt = environment.from_string(settings.pr_description_prompt.system).render(variables)
user_prompt = environment.from_string(settings.pr_description_prompt.user).render(variables)
if settings.config.verbosity_level >= 2:
logging.info(f"\nSystem prompt:\n{system_prompt}")
logging.info(f"\nUser prompt:\n{user_prompt}")
model = settings.config.model
response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
system=system_prompt, user=user_prompt)
response, finish_reason = await self.ai_handler.chat_completion(
model=model,
temperature=0.2,
system=system_prompt,
user=user_prompt
)
return response
def _prepare_pr_answer(self):
def _prepare_pr_answer(self) -> Tuple[str, str, List[str], str]:
"""
Prepare the PR description based on the AI prediction data.
Returns:
- title: a string containing the PR title.
- pr_body: a string containing the PR body in a markdown format.
- pr_types: a list of strings containing the PR types.
- markdown_text: a string containing the AI prediction data in a markdown format.
"""
# Load the AI prediction data into a dictionary
data = json.loads(self.prediction)
pr_body = ""
# for key, value in data.items():
# markdown_text += f"## {key}\n\n"
# markdown_text += f"{value}\n\n"
title = data['PR Title']
del data['PR Title']
# Initialization
markdown_text = pr_body = ""
pr_types = []
# Iterate over the dictionary items and append the key and value to 'markdown_text' in a markdown format
for key, value in data.items():
markdown_text += f"## {key}\n\n"
markdown_text += f"{value}\n\n"
# If the 'PR Type' key is present in the dictionary, split its value by comma and assign it to 'pr_types'
if 'PR Type' in data:
pr_types = data['PR Type'].split(',')
# Assign the value of the 'PR Title' key to 'title' variable and remove it from the dictionary
title = data.pop('PR Title')
# Iterate over the remaining dictionary items and append the key and value to 'pr_body' in a markdown format,
# except for the items containing the word 'walkthrough'
for key, value in data.items():
pr_body += f"{key}:\n"
if 'walkthrough' in key.lower():
pr_body += f"{value}\n"
else:
pr_body += f"**{value}**\n\n___\n"
if settings.config.verbosity_level >= 2:
logging.info(f"title:\n{title}\n{pr_body}")
return title, pr_body
return title, pr_body, pr_types, markdown_text

View File

@ -4,13 +4,15 @@ import logging
from jinja2 import Environment, StrictUndefined
from pr_agent.algo.ai_handler import AiHandler
from pr_agent.algo.pr_processing import get_pr_diff
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.config_loader import settings
from pr_agent.git_providers import get_git_provider
from pr_agent.git_providers.git_provider import get_main_pr_language
class PRInformationFromUser:
def __init__(self, pr_url: str):
self.git_provider = get_git_provider()(pr_url)
@ -21,7 +23,7 @@ class PRInformationFromUser:
self.vars = {
"title": self.git_provider.pr.title,
"branch": self.git_provider.get_pr_branch(),
"description": self.git_provider.get_description(),
"description": self.git_provider.get_pr_description(),
"language": self.main_pr_language,
"diff": "", # empty diff for initial calculation
}
@ -34,21 +36,24 @@ class PRInformationFromUser:
async def generate_questions(self):
logging.info('Generating question to the user...')
if settings.config.publish_review:
self.git_provider.publish_comment("Preparing answer...", is_temporary=True)
logging.info('Getting PR diff...')
self.patches_diff = get_pr_diff(self.git_provider, self.token_handler)
logging.info('Getting AI prediction...')
self.prediction = await self._get_prediction()
if settings.config.publish_output:
self.git_provider.publish_comment("Preparing questions...", is_temporary=True)
await retry_with_fallback_models(self._prepare_prediction)
logging.info('Preparing questions...')
pr_comment = self._prepare_pr_answer()
if settings.config.publish_review:
if settings.config.publish_output:
logging.info('Pushing questions...')
self.git_provider.publish_comment(pr_comment)
self.git_provider.remove_initial_comment()
return ""
async def _get_prediction(self):
async def _prepare_prediction(self, model):
logging.info('Getting PR diff...')
self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model)
logging.info('Getting AI prediction...')
self.prediction = await self._get_prediction(model)
async def _get_prediction(self, model: str):
variables = copy.deepcopy(self.vars)
variables["diff"] = self.patches_diff # update diff
environment = Environment(undefined=StrictUndefined)
@ -57,7 +62,6 @@ class PRInformationFromUser:
if settings.config.verbosity_level >= 2:
logging.info(f"\nSystem prompt:\n{system_prompt}")
logging.info(f"\nUser prompt:\n{user_prompt}")
model = settings.config.model
response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
system=system_prompt, user=user_prompt)
return response
@ -66,6 +70,6 @@ class PRInformationFromUser:
model_output = self.prediction.strip()
if settings.config.verbosity_level >= 2:
logging.info(f"answer_str:\n{model_output}")
answer_str = f"{model_output}\n\n Please respond to the question above in the following format:\n\n" + \
f"/answer <question_id> <answer>\n\n" + f"Example:\n'\n/answer\n1. Yes, because ...\n2. No, because ...\n'"
answer_str = f"{model_output}\n\n Please respond to the questions above in the following format:\n\n" +\
"\n>/answer\n>1) ...\n>2) ...\n>...\n"
return answer_str

View File

@ -4,7 +4,7 @@ import logging
from jinja2 import Environment, StrictUndefined
from pr_agent.algo.ai_handler import AiHandler
from pr_agent.algo.pr_processing import get_pr_diff
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.config_loader import settings
from pr_agent.git_providers import get_git_provider
@ -12,7 +12,8 @@ from pr_agent.git_providers.git_provider import get_main_pr_language
class PRQuestions:
def __init__(self, pr_url: str, question_str: str):
def __init__(self, pr_url: str, args=None):
question_str = self.parse_args(args)
self.git_provider = get_git_provider()(pr_url)
self.main_pr_language = get_main_pr_language(
self.git_provider.get_languages(), self.git_provider.get_files()
@ -34,23 +35,33 @@ class PRQuestions:
self.patches_diff = None
self.prediction = None
def parse_args(self, args):
if args and len(args) > 0:
question_str = " ".join(args)
else:
question_str = ""
return question_str
async def answer(self):
logging.info('Answering a PR question...')
if settings.config.publish_review:
if settings.config.publish_output:
self.git_provider.publish_comment("Preparing answer...", is_temporary=True)
logging.info('Getting PR diff...')
self.patches_diff = get_pr_diff(self.git_provider, self.token_handler)
logging.info('Getting AI prediction...')
self.prediction = await self._get_prediction()
await retry_with_fallback_models(self._prepare_prediction)
logging.info('Preparing answer...')
pr_comment = self._prepare_pr_answer()
if settings.config.publish_review:
if settings.config.publish_output:
logging.info('Pushing answer...')
self.git_provider.publish_comment(pr_comment)
self.git_provider.remove_initial_comment()
return ""
async def _get_prediction(self):
async def _prepare_prediction(self, model: str):
logging.info('Getting PR diff...')
self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model)
logging.info('Getting AI prediction...')
self.prediction = await self._get_prediction(model)
async def _get_prediction(self, model: str):
variables = copy.deepcopy(self.vars)
variables["diff"] = self.patches_diff # update diff
environment = Environment(undefined=StrictUndefined)
@ -59,7 +70,6 @@ class PRQuestions:
if settings.config.verbosity_level >= 2:
logging.info(f"\nSystem prompt:\n{system_prompt}")
logging.info(f"\nUser prompt:\n{user_prompt}")
model = settings.config.model
response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
system=system_prompt, user=user_prompt)
return response

View File

@ -1,100 +1,197 @@
import copy
import json
import logging
from collections import OrderedDict
from typing import Tuple, List
from jinja2 import Environment, StrictUndefined
from pr_agent.algo.ai_handler import AiHandler
from pr_agent.algo.pr_processing import get_pr_diff
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import convert_to_markdown, try_fix_json
from pr_agent.config_loader import settings
from pr_agent.git_providers import get_git_provider
from pr_agent.git_providers.git_provider import get_main_pr_language
from pr_agent.servers.help import bot_help_text, actions_help_text
from pr_agent.git_providers.git_provider import get_main_pr_language, IncrementalPR
from pr_agent.servers.help import actions_help_text, bot_help_text
class PRReviewer:
def __init__(self, pr_url: str, cli_mode=False):
"""
The PRReviewer class is responsible for reviewing a pull request and generating feedback using an AI model.
"""
def __init__(self, pr_url: str, cli_mode: bool = False, is_answer: bool = False, args: list = None):
"""
Initialize the PRReviewer object with the necessary attributes and objects to review a pull request.
self.git_provider = get_git_provider()(pr_url)
Args:
pr_url (str): The URL of the pull request to be reviewed.
cli_mode (bool, optional): Indicates whether the review is being done in command-line interface mode. Defaults to False.
is_answer (bool, optional): Indicates whether the review is being done in answer mode. Defaults to False.
args (list, optional): List of arguments passed to the PRReviewer class. Defaults to None.
"""
self.parse_args(args)
self.git_provider = get_git_provider()(pr_url, incremental=self.incremental)
self.main_language = get_main_pr_language(
self.git_provider.get_languages(), self.git_provider.get_files()
)
self.pr_url = pr_url
self.is_answer = is_answer
if self.is_answer and not self.git_provider.is_supported("get_issue_comments"):
raise Exception(f"Answer mode is not supported for {settings.config.git_provider} for now")
self.ai_handler = AiHandler()
self.patches_diff = None
self.prediction = None
self.cli_mode = cli_mode
answer_str, question_str = self._get_user_answers()
self.vars = {
"title": self.git_provider.pr.title,
"branch": self.git_provider.get_pr_branch(),
"description": self.git_provider.get_pr_description(),
"language": self.main_language,
"diff": "", # empty diff for initial calculation
"require_score": settings.pr_reviewer.require_score_review,
"require_tests": settings.pr_reviewer.require_tests_review,
"require_security": settings.pr_reviewer.require_security_review,
"require_focused": settings.pr_reviewer.require_focused_review,
'num_code_suggestions': settings.pr_reviewer.num_code_suggestions,
'question_str': question_str,
'answer_str': answer_str,
}
self.token_handler = TokenHandler(self.git_provider.pr,
self.vars,
settings.pr_review_prompt.system,
settings.pr_review_prompt.user)
async def review(self):
self.token_handler = TokenHandler(
self.git_provider.pr,
self.vars,
settings.pr_review_prompt.system,
settings.pr_review_prompt.user
)
def parse_args(self, args: List[str]) -> None:
"""
Parse the arguments passed to the PRReviewer class and set the 'incremental' attribute accordingly.
Args:
args: A list of arguments passed to the PRReviewer class.
Returns:
None
"""
is_incremental = False
if args and len(args) >= 1:
arg = args[0]
if arg == "-i":
is_incremental = True
self.incremental = IncrementalPR(is_incremental)
async def review(self) -> None:
"""
Review the pull request and generate feedback.
"""
logging.info('Reviewing PR...')
if settings.config.publish_review:
self.git_provider.publish_comment("Preparing review...", is_temporary=True)
logging.info('Getting PR diff...')
self.patches_diff = get_pr_diff(self.git_provider, self.token_handler)
logging.info('Getting AI prediction...')
self.prediction = await self._get_prediction()
if settings.config.publish_output:
self.git_provider.publish_comment("Preparing review...", is_temporary=True)
await retry_with_fallback_models(self._prepare_prediction)
logging.info('Preparing PR review...')
pr_comment = self._prepare_pr_review()
if settings.config.publish_review:
if settings.config.publish_output:
logging.info('Pushing PR review...')
self.git_provider.publish_comment(pr_comment)
self.git_provider.remove_initial_comment()
if settings.pr_reviewer.inline_code_comments:
logging.info('Pushing inline code comments...')
self._publish_inline_code_comments()
return ""
async def _get_prediction(self):
async def _prepare_prediction(self, model: str) -> None:
"""
Prepare the AI prediction for the pull request review.
Args:
model: A string representing the AI model to be used for the prediction.
Returns:
None
"""
logging.info('Getting PR diff...')
self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model)
logging.info('Getting AI prediction...')
self.prediction = await self._get_prediction(model)
async def _get_prediction(self, model: str) -> str:
"""
Generate an AI prediction for the pull request review.
Args:
model: A string representing the AI model to be used for the prediction.
Returns:
A string representing the AI prediction for the pull request review.
"""
variables = copy.deepcopy(self.vars)
variables["diff"] = self.patches_diff # update diff
environment = Environment(undefined=StrictUndefined)
system_prompt = environment.from_string(settings.pr_review_prompt.system).render(variables)
user_prompt = environment.from_string(settings.pr_review_prompt.user).render(variables)
if settings.config.verbosity_level >= 2:
logging.info(f"\nSystem prompt:\n{system_prompt}")
logging.info(f"\nUser prompt:\n{user_prompt}")
model = settings.config.model
response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
system=system_prompt, user=user_prompt)
response, finish_reason = await self.ai_handler.chat_completion(
model=model,
temperature=0.2,
system=system_prompt,
user=user_prompt
)
return response
def _prepare_pr_review(self) -> str:
"""
Prepare the PR review by processing the AI prediction and generating a markdown-formatted text that summarizes the feedback.
"""
review = self.prediction.strip()
try:
data = json.loads(review)
except json.decoder.JSONDecodeError:
data = try_fix_json(review)
# reordering for nicer display
if 'PR Feedback' in data:
if 'Security concerns' in data['PR Feedback']:
val = data['PR Feedback']['Security concerns']
del data['PR Feedback']['Security concerns']
data['PR Analysis']['Security concerns'] = val
# Move 'Security concerns' key to 'PR Analysis' section for better display
if 'PR Feedback' in data and 'Security concerns' in data['PR Feedback']:
val = data['PR Feedback']['Security concerns']
del data['PR Feedback']['Security concerns']
data['PR Analysis']['Security concerns'] = val
if settings.config.git_provider == 'github' and settings.pr_reviewer.inline_code_comments:
del data['PR Feedback']['Code suggestions']
# Filter out code suggestions that can be submitted as inline comments
if settings.config.git_provider != 'bitbucket' and settings.pr_reviewer.inline_code_comments and 'Code suggestions' in data['PR Feedback']:
data['PR Feedback']['Code suggestions'] = [
d for d in data['PR Feedback']['Code suggestions']
if any(key not in d for key in ('relevant file', 'relevant line in file', 'suggestion content'))
]
if not data['PR Feedback']['Code suggestions']:
del data['PR Feedback']['Code suggestions']
# Add incremental review section
if self.incremental.is_incremental:
last_commit_url = f"{self.git_provider.get_pr_url()}/commits/{self.git_provider.incremental.first_new_commit_sha}"
data = OrderedDict(data)
data.update({'Incremental PR Review': {
"⏮️ Review for commits since previous PR-Agent review": f"Starting from commit {last_commit_url}"}})
data.move_to_end('Incremental PR Review', last=False)
markdown_text = convert_to_markdown(data)
user = self.git_provider.get_user_id()
# Add help text if not in CLI mode
if not self.cli_mode:
markdown_text += "\n### How to use\n"
if user and '[bot]' not in user:
@ -102,20 +199,64 @@ class PRReviewer:
else:
markdown_text += actions_help_text
# Log markdown response if verbosity level is high
if settings.config.verbosity_level >= 2:
logging.info(f"Markdown response:\n{markdown_text}")
return markdown_text
def _publish_inline_code_comments(self):
def _publish_inline_code_comments(self) -> None:
"""
Publishes inline comments on a pull request with code suggestions generated by the AI model.
"""
if settings.pr_reviewer.num_code_suggestions == 0:
return
review = self.prediction.strip()
try:
data = json.loads(review)
except json.decoder.JSONDecodeError:
data = try_fix_json(review)
for d in data['PR Feedback']['Code suggestions']:
relevant_file = d['relevant file'].strip()
relevant_line_in_file = d['relevant line in file'].strip()
content = d['suggestion content']
comments: List[str] = []
for suggestion in data.get('PR Feedback', {}).get('Code suggestions', []):
relevant_file = suggestion.get('relevant file', '').strip()
relevant_line_in_file = suggestion.get('relevant line in file', '').strip()
content = suggestion.get('suggestion content', '')
if not relevant_file or not relevant_line_in_file or not content:
logging.info("Skipping inline comment with missing file/line/content")
continue
self.git_provider.publish_inline_comment(content, relevant_file, relevant_line_in_file)
if self.git_provider.is_supported("create_inline_comment"):
comment = self.git_provider.create_inline_comment(content, relevant_file, relevant_line_in_file)
if comment:
comments.append(comment)
else:
self.git_provider.publish_inline_comment(content, relevant_file, relevant_line_in_file)
if comments:
self.git_provider.publish_inline_comments(comments)
def _get_user_answers(self) -> Tuple[str, str]:
"""
Retrieves the question and answer strings from the discussion messages related to a pull request.
Returns:
A tuple containing the question and answer strings.
"""
question_str = ""
answer_str = ""
if self.is_answer:
discussion_messages = self.git_provider.get_issue_comments()
for message in reversed(discussion_messages):
if "Questions to better understand the PR:" in message.body:
question_str = message.body
elif '/answer' in message.body:
answer_str = message.body
if answer_str and question_str:
break
return question_str, answer_str

View File

@ -0,0 +1,171 @@
import copy
import logging
from datetime import date
from time import sleep
from typing import Tuple
from jinja2 import Environment, StrictUndefined
from pr_agent.algo.ai_handler import AiHandler
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.config_loader import settings
from pr_agent.git_providers import get_git_provider, GithubProvider
from pr_agent.git_providers.git_provider import get_main_pr_language
CHANGELOG_LINES = 50
class PRUpdateChangelog:
def __init__(self, pr_url: str, cli_mode=False, args=None):
self.git_provider = get_git_provider()(pr_url)
self.main_language = get_main_pr_language(
self.git_provider.get_languages(), self.git_provider.get_files()
)
self.commit_changelog = self._parse_args(args, settings)
self._get_changlog_file() # self.changelog_file_str
self.ai_handler = AiHandler()
self.patches_diff = None
self.prediction = None
self.cli_mode = cli_mode
self.vars = {
"title": self.git_provider.pr.title,
"branch": self.git_provider.get_pr_branch(),
"description": self.git_provider.get_pr_description(),
"language": self.main_language,
"diff": "", # empty diff for initial calculation
"changelog_file_str": self.changelog_file_str,
"today": date.today(),
}
self.token_handler = TokenHandler(self.git_provider.pr,
self.vars,
settings.pr_update_changelog_prompt.system,
settings.pr_update_changelog_prompt.user)
async def update_changelog(self):
assert type(self.git_provider) == GithubProvider, "Currently only Github is supported"
logging.info('Updating the changelog...')
if settings.config.publish_output:
self.git_provider.publish_comment("Preparing changelog updates...", is_temporary=True)
await retry_with_fallback_models(self._prepare_prediction)
logging.info('Preparing PR changelog updates...')
new_file_content, answer = self._prepare_changelog_update()
if settings.config.publish_output:
self.git_provider.remove_initial_comment()
logging.info('Publishing changelog updates...')
if self.commit_changelog:
logging.info('Pushing PR changelog updates to repo...')
self._push_changelog_update(new_file_content, answer)
else:
logging.info('Publishing PR changelog as comment...')
self.git_provider.publish_comment(f"**Changelog updates:**\n\n{answer}")
async def _prepare_prediction(self, model: str):
logging.info('Getting PR diff...')
self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model)
logging.info('Getting AI prediction...')
self.prediction = await self._get_prediction(model)
async def _get_prediction(self, model: str):
variables = copy.deepcopy(self.vars)
variables["diff"] = self.patches_diff # update diff
environment = Environment(undefined=StrictUndefined)
system_prompt = environment.from_string(settings.pr_update_changelog_prompt.system).render(variables)
user_prompt = environment.from_string(settings.pr_update_changelog_prompt.user).render(variables)
if settings.config.verbosity_level >= 2:
logging.info(f"\nSystem prompt:\n{system_prompt}")
logging.info(f"\nUser prompt:\n{user_prompt}")
response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
system=system_prompt, user=user_prompt)
return response
def _prepare_changelog_update(self) -> Tuple[str, str]:
answer = self.prediction.strip().strip("```").strip()
if hasattr(self, "changelog_file"):
existing_content = self.changelog_file.decoded_content.decode()
else:
existing_content = ""
if existing_content:
new_file_content = answer + "\n\n" + self.changelog_file.decoded_content.decode()
else:
new_file_content = answer
if not self.commit_changelog:
answer += "\n\n\n>to commit the new content to the CHANGELOG.md file, please type:" \
"\n>'/update_changelog -commit'\n"
if settings.config.verbosity_level >= 2:
logging.info(f"answer:\n{answer}")
return new_file_content, answer
def _push_changelog_update(self, new_file_content, answer):
self.git_provider.repo_obj.update_file(path=self.changelog_file.path,
message="Update CHANGELOG.md",
content=new_file_content,
sha=self.changelog_file.sha,
branch=self.git_provider.get_pr_branch())
d = dict(body="CHANGELOG.md update",
path=self.changelog_file.path,
line=max(2, len(answer.splitlines())),
start_line=1)
sleep(5) # wait for the file to be updated
last_commit_id = list(self.git_provider.pr.get_commits())[-1]
try:
self.git_provider.pr.create_review(commit=last_commit_id, comments=[d])
except:
# we can't create a review for some reason, let's just publish a comment
self.git_provider.publish_comment(f"**Changelog updates:**\n\n{answer}")
def _get_default_changelog(self):
example_changelog = \
"""
Example:
## <current_date>
### Added
...
### Changed
...
### Fixed
...
"""
return example_changelog
def _parse_args(self, args, setting):
commit_changelog = False
if args and len(args) >= 1:
try:
if args[0] == "-commit":
commit_changelog = True
except:
pass
else:
commit_changelog = setting.pr_update_changelog.push_changelog_changes
return commit_changelog
def _get_changlog_file(self):
try:
self.changelog_file = self.git_provider.repo_obj.get_contents("CHANGELOG.md",
ref=self.git_provider.get_pr_branch())
changelog_file_lines = self.changelog_file.decoded_content.decode().splitlines()
changelog_file_lines = changelog_file_lines[:CHANGELOG_LINES]
self.changelog_file_str = "\n".join(changelog_file_lines)
except:
self.changelog_file_str = ""
if self.commit_changelog:
logging.info("No CHANGELOG.md file found in the repository. Creating one...")
changelog_file = self.git_provider.repo_obj.create_file(path="CHANGELOG.md",
message='add CHANGELOG.md',
content="",
branch=self.git_provider.get_pr_branch())
self.changelog_file = changelog_file['content']
if not self.changelog_file_str:
self.changelog_file_str = self._get_default_changelog()

View File

@ -1,3 +1,63 @@
[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"
[project]
name = "pr_agent"
version = "0.0.1"
authors = [
{name = "Itamar Friedman", email = "itamar.f@codium.ai"},
]
maintainers = [
{name = "Ori Kotek", email = "ori.k@codium.ai"},
{name = "Tal Ridnik", email = "tal.r@codium.ai"},
{name = "Hussam Lawen", email = "hussam.l@codium.ai"},
{name = "Sagi Medina", email = "sagi.m@codium.ai"}
]
description = "CodiumAI PR-Agent is an open-source tool to automatically analyze a pull request and provide several types of feedback"
readme = "README.md"
requires-python = ">=3.9"
keywords = ["ai", "tool", "developer", "review", "agent"]
license = {file = "LICENSE", name = "Apache 2.0 License"}
classifiers = [
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"Operating System :: Independent",
"Programming Language :: Python :: 3",
]
dependencies = [
"dynaconf==3.1.12",
"fastapi==0.99.0",
"PyGithub==1.59.*",
"retry==0.9.2",
"openai==0.27.8",
"Jinja2==3.1.2",
"tiktoken==0.4.0",
"uvicorn==0.22.0",
"python-gitlab==3.15.0",
"pytest~=7.4.0",
"aiohttp~=3.8.4",
"atlassian-python-api==3.39.0",
"GitPython~=3.1.32",
]
[project.urls]
"Homepage" = "https://github.com/Codium-ai/pr-agent"
[tool.setuptools]
include-package-data = false
license-files = ["LICENSE"]
[tool.setuptools.packages.find]
where = ["."]
include = ["pr_agent"]
[project.scripts]
pr-agent = "pr_agent.cli:run"
[tool.ruff]
line-length = 120

View File

@ -1,12 +1 @@
dynaconf==3.1.12
fastapi==0.99.0
PyGithub==1.58.2
retry==0.9.2
openai==0.27.8
Jinja2==3.1.2
tiktoken==0.4.0
uvicorn==0.22.0
python-gitlab==3.15.0
pytest~=7.4.0
aiohttp~=3.8.4
atlassian-python-api==3.39.0
-e .

5
setup.py Normal file
View File

@ -0,0 +1,5 @@
# for compatibility with legacy tools
# see: https://setuptools.pypa.io/en/latest/userguide/pyproject_config.html
from setuptools import setup
setup()

View File

@ -1,13 +1,12 @@
# Generated by CodiumAI
from pr_agent.algo.utils import try_fix_json
import pytest
class TestTryFixJson:
# Tests that JSON with complete 'Code suggestions' section returns expected output
def test_incomplete_code_suggestions(self):
review = '{"PR Analysis": {"Main theme": "xxx", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...'
review = '{"PR Analysis": {"Main theme": "xxx", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...' # noqa: E501
expected_output = {
'PR Analysis': {
'Main theme': 'xxx',
@ -26,7 +25,7 @@ class TestTryFixJson:
assert try_fix_json(review) == expected_output
def test_incomplete_code_suggestions_new_line(self):
review = '{"PR Analysis": {"Main theme": "xxx", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n\t, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...'
review = '{"PR Analysis": {"Main theme": "xxx", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n\t, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...' # noqa: E501
expected_output = {
'PR Analysis': {
'Main theme': 'xxx',
@ -45,7 +44,7 @@ class TestTryFixJson:
assert try_fix_json(review) == expected_output
def test_incomplete_code_suggestions_many_close_brackets(self):
review = '{"PR Analysis": {"Main theme": "xxx", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy }, [}\n ,incomp.} ,..'
review = '{"PR Analysis": {"Main theme": "xxx", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy }, [}\n ,incomp.} ,..' # noqa: E501
expected_output = {
'PR Analysis': {
'Main theme': 'xxx',
@ -64,7 +63,7 @@ class TestTryFixJson:
assert try_fix_json(review) == expected_output
def test_incomplete_code_suggestions_relevant_file(self):
review = '{"PR Analysis": {"Main theme": "xxx", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.p'
review = '{"PR Analysis": {"Main theme": "xxx", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.p' # noqa: E501
expected_output = {
'PR Analysis': {
'Main theme': 'xxx',