Merge pull request #139 from Codium-ai/tr/changelog

Add feature to update CHANGELOG.md based on PR content
This commit is contained in:
mrT23
2023-07-27 09:04:48 +03:00
committed by GitHub
8 changed files with 232 additions and 4 deletions

6
CHANGELOG.md Normal file
View File

@ -0,0 +1,6 @@
## 2023-07-26
### Added
- New feature for updating the CHANGELOG.md based on the contents of a PR.
- Added support for this feature for the Github provider.
- New configuration settings and prompts for the changelog update feature.

View File

@ -81,6 +81,7 @@ CodiumAI `PR-Agent` is an open-source tool aiming to help developers review pull
| | Auto-Description | :white_check_mark: | :white_check_mark: | |
| | Improve Code | :white_check_mark: | :white_check_mark: | |
| | Reflect and Review | :white_check_mark: | | |
| | Update CHANGELOG.md | :white_check_mark: | | |
| | | | | |
| USAGE | CLI | :white_check_mark: | :white_check_mark: | :white_check_mark: |
| | App / webhook | :white_check_mark: | :white_check_mark: | |
@ -98,6 +99,7 @@ Examples for invoking the different tools via the CLI:
- **Improve**: python cli.py --pr-url=<pr_url> improve
- **Ask**: python cli.py --pr-url=<pr_url> ask "Write me a poem about this PR"
- **Reflect**: python cli.py --pr-url=<pr_url> reflect
- **Update changelog**: python cli.py --pr-url=<pr_url> update_changelog
"<pr_url>" is the url of the relevant PR (for example: https://github.com/Codium-ai/pr-agent/pull/50).

View File

@ -6,6 +6,7 @@ from pr_agent.tools.pr_description import PRDescription
from pr_agent.tools.pr_information_from_user import PRInformationFromUser
from pr_agent.tools.pr_questions import PRQuestions
from pr_agent.tools.pr_reviewer import PRReviewer
from pr_agent.tools.pr_update_changelog import PRUpdateChangelog
class PRAgent:
@ -26,7 +27,9 @@ class PRAgent:
elif any(cmd == action for cmd in ["/improve", "/improve_code"]):
await PRCodeSuggestions(pr_url).suggest()
elif any(cmd == action for cmd in ["/ask", "/ask_question"]):
await PRQuestions(pr_url, args).answer()
await PRQuestions(pr_url, args=args).answer()
elif any(cmd == action for cmd in ["/update_changelog"]):
await PRUpdateChangelog(pr_url, args=args).update_changelog()
else:
return False

View File

@ -8,6 +8,7 @@ from pr_agent.tools.pr_description import PRDescription
from pr_agent.tools.pr_information_from_user import PRInformationFromUser
from pr_agent.tools.pr_questions import PRQuestions
from pr_agent.tools.pr_reviewer import PRReviewer
from pr_agent.tools.pr_update_changelog import PRUpdateChangelog
def run(args=None):
@ -27,13 +28,15 @@ ask / ask_question [question] - Ask a question about the PR.
describe / describe_pr - Modify the PR title and description based on the PR's contents.
improve / improve_code - Suggest improvements to the code in the PR as pull request comments ready to commit.
reflect - Ask the PR author questions about the PR.
update_changelog - Update the changelog based on the PR's contents.
""")
parser.add_argument('--pr_url', type=str, help='The URL of the PR to review', required=True)
parser.add_argument('command', type=str, help='The', choices=['review', 'review_pr',
'ask', 'ask_question',
'describe', 'describe_pr',
'improve', 'improve_code',
'reflect', 'review_after_reflect'],
'reflect', 'review_after_reflect',
'update_changelog'],
default='review')
parser.add_argument('rest', nargs=argparse.REMAINDER, default=[])
args = parser.parse_args(args)
@ -49,7 +52,8 @@ reflect - Ask the PR author questions about the PR.
'review': _handle_review_command,
'review_pr': _handle_review_command,
'reflect': _handle_reflect_command,
'review_after_reflect': _handle_review_after_reflect_command
'review_after_reflect': _handle_review_after_reflect_command,
'update_changelog': _handle_update_changelog,
}
if command in commands:
commands[command](args.pr_url, args.rest)
@ -96,6 +100,10 @@ def _handle_review_after_reflect_command(pr_url: str, rest: list):
reviewer = PRReviewer(pr_url, cli_mode=True, is_answer=True)
asyncio.run(reviewer.review())
def _handle_update_changelog(pr_url: str, rest: list):
print(f"Updating changlog for: {pr_url}")
reviewer = PRUpdateChangelog(pr_url, cli_mode=True, args=rest)
asyncio.run(reviewer.update_changelog())
if __name__ == '__main__':
run()

View File

@ -19,6 +19,7 @@ settings = Dynaconf(
"settings/pr_description_prompts.toml",
"settings/pr_code_suggestions_prompts.toml",
"settings/pr_information_from_user_prompts.toml",
"settings/pr_update_changelog.toml",
"settings_prod/.secrets.toml"
]]
)

View File

@ -1,6 +1,6 @@
[config]
model="gpt-4"
fallback-models=["gpt-3.5-turbo-16k", "gpt-3.5-turbo"]
fallback_models=["gpt-3.5-turbo-16k"]
git_provider="github"
publish_output=true
publish_output_progress=true
@ -24,6 +24,9 @@ publish_description_as_comment=false
[pr_code_suggestions]
num_code_suggestions=4
[pr_update_changelog]
push_changelog_changes=false
[github]
# The type of deployment to create. Valid values are 'app' or 'user'.
deployment_type = "user"

View File

@ -0,0 +1,34 @@
[pr_update_changelog_prompt]
system="""You are a language model called CodiumAI-PR-Changlog-summarizer.
Your task is to update the CHANGELOG.md file of the project, to shortly summarize important changes introduced in this PR (the '+' lines).
- The output should match the existing CHANGELOG.md format, style and conventions, so it will look like a natural part of the file. For example, if previous changes were summarized in a single line, you should do the same.
- Don't repeat previous changes. Generate only new content, that is not already in the CHANGELOG.md file.
- Be general, and avoid specific details, files, etc. The output should be minimal, no more than 3-4 short lines. Ignore non-relevant subsections.
"""
user="""PR Info:
Title: '{{title}}'
Branch: '{{branch}}'
Description: '{{description}}'
{%- if language %}
Main language: {{language}}
{%- endif %}
The PR Diff:
```
{{diff}}
```
Current date:
```
{{today}}
```
The current CHANGELOG.md:
```
{{ changelog_file_str }}
```
Response:
"""

View File

@ -0,0 +1,171 @@
import copy
import logging
from datetime import date
from time import sleep
from typing import Tuple
from jinja2 import Environment, StrictUndefined
from pr_agent.algo.ai_handler import AiHandler
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.config_loader import settings
from pr_agent.git_providers import get_git_provider, GithubProvider
from pr_agent.git_providers.git_provider import get_main_pr_language
CHANGELOG_LINES = 50
class PRUpdateChangelog:
def __init__(self, pr_url: str, cli_mode=False, args=None):
self.git_provider = get_git_provider()(pr_url)
self.main_language = get_main_pr_language(
self.git_provider.get_languages(), self.git_provider.get_files()
)
self.commit_changelog = self._parse_args(args, settings)
self._get_changlog_file() # self.changelog_file_str
self.ai_handler = AiHandler()
self.patches_diff = None
self.prediction = None
self.cli_mode = cli_mode
self.vars = {
"title": self.git_provider.pr.title,
"branch": self.git_provider.get_pr_branch(),
"description": self.git_provider.get_pr_description(),
"language": self.main_language,
"diff": "", # empty diff for initial calculation
"changelog_file_str": self.changelog_file_str,
"today": date.today(),
}
self.token_handler = TokenHandler(self.git_provider.pr,
self.vars,
settings.pr_update_changelog_prompt.system,
settings.pr_update_changelog_prompt.user)
async def update_changelog(self):
assert type(self.git_provider) == GithubProvider, "Currently only Github is supported"
logging.info('Updating the changelog...')
if settings.config.publish_output:
self.git_provider.publish_comment("Preparing changelog updates...", is_temporary=True)
await retry_with_fallback_models(self._prepare_prediction)
logging.info('Preparing PR changelog updates...')
new_file_content, answer = self._prepare_changelog_update()
if settings.config.publish_output:
self.git_provider.remove_initial_comment()
logging.info('Publishing changelog updates...')
if self.commit_changelog:
logging.info('Pushing PR changelog updates to repo...')
self._push_changelog_update(new_file_content, answer)
else:
logging.info('Publishing PR changelog as comment...')
self.git_provider.publish_comment(f"**Changelog updates:**\n\n{answer}")
async def _prepare_prediction(self, model: str):
logging.info('Getting PR diff...')
self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model)
logging.info('Getting AI prediction...')
self.prediction = await self._get_prediction(model)
async def _get_prediction(self, model: str):
variables = copy.deepcopy(self.vars)
variables["diff"] = self.patches_diff # update diff
environment = Environment(undefined=StrictUndefined)
system_prompt = environment.from_string(settings.pr_update_changelog_prompt.system).render(variables)
user_prompt = environment.from_string(settings.pr_update_changelog_prompt.user).render(variables)
if settings.config.verbosity_level >= 2:
logging.info(f"\nSystem prompt:\n{system_prompt}")
logging.info(f"\nUser prompt:\n{user_prompt}")
response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
system=system_prompt, user=user_prompt)
return response
def _prepare_changelog_update(self) -> Tuple[str, str]:
answer = self.prediction.strip().strip("```").strip()
if hasattr(self, "changelog_file"):
existing_content = self.changelog_file.decoded_content.decode()
else:
existing_content = ""
if existing_content:
new_file_content = answer + "\n\n" + self.changelog_file.decoded_content.decode()
else:
new_file_content = answer
if not self.commit_changelog:
answer += "\n\n\n>to commit the new content to the CHANGELOG.md file, please type:" \
"\n>'/update_changelog -commit'\n"
if settings.config.verbosity_level >= 2:
logging.info(f"answer:\n{answer}")
return new_file_content, answer
def _push_changelog_update(self, new_file_content, answer):
self.git_provider.repo_obj.update_file(path=self.changelog_file.path,
message="Update CHANGELOG.md",
content=new_file_content,
sha=self.changelog_file.sha,
branch=self.git_provider.get_pr_branch())
d = dict(body="CHANGELOG.md update",
path=self.changelog_file.path,
line=max(2, len(answer.splitlines())),
start_line=1)
sleep(5) # wait for the file to be updated
last_commit_id = list(self.git_provider.pr.get_commits())[-1]
try:
self.git_provider.pr.create_review(commit=last_commit_id, comments=[d])
except:
# we can't create a review for some reason, let's just publish a comment
self.git_provider.publish_comment(f"**Changelog updates:**\n\n{answer}")
def _get_default_changelog(self):
example_changelog = \
"""
Example:
## <current_date>
### Added
...
### Changed
...
### Fixed
...
"""
return example_changelog
def _parse_args(self, args, setting):
commit_changelog = False
if args and len(args) >= 1:
try:
if args[0] == "-commit":
commit_changelog = True
except:
pass
else:
commit_changelog = setting.pr_update_changelog.push_changelog_changes
return commit_changelog
def _get_changlog_file(self):
try:
self.changelog_file = self.git_provider.repo_obj.get_contents("CHANGELOG.md",
ref=self.git_provider.get_pr_branch())
changelog_file_lines = self.changelog_file.decoded_content.decode().splitlines()
changelog_file_lines = changelog_file_lines[:CHANGELOG_LINES]
self.changelog_file_str = "\n".join(changelog_file_lines)
except:
self.changelog_file_str = ""
if self.commit_changelog:
logging.info("No CHANGELOG.md file found in the repository. Creating one...")
changelog_file = self.git_provider.repo_obj.create_file(path="CHANGELOG.md",
message='add CHANGELOG.md',
content="",
branch=self.git_provider.get_pr_branch())
self.changelog_file = changelog_file['content']
if not self.changelog_file_str:
self.changelog_file_str = self._get_default_changelog()