diff --git a/.github/workflows/build-and-test.yaml b/.github/workflows/build-and-test.yaml new file mode 100644 index 00000000..960da61b --- /dev/null +++ b/.github/workflows/build-and-test.yaml @@ -0,0 +1,36 @@ +name: Build-and-test + +on: + push: + +jobs: + build-and-test: + runs-on: ubuntu-latest + + steps: + - id: checkout + uses: actions/checkout@v2 + + - id: dockerx + name: Setup Docker Buildx + uses: docker/setup-buildx-action@v2 + + - id: build + name: Build dev docker + uses: docker/build-push-action@v2 + with: + context: . + file: ./docker/Dockerfile + push: false + load: true + tags: codiumai/pr-agent:test + cache-from: type=gha,scope=dev + cache-to: type=gha,mode=max,scope=dev + target: test + + - id: test + name: Test dev docker + run: | + docker run --rm codiumai/pr-agent:test pytest -v + + diff --git a/.github/workflows/review.yaml b/.github/workflows/pr-agent-review.yaml similarity index 60% rename from .github/workflows/review.yaml rename to .github/workflows/pr-agent-review.yaml index e7612520..9dcf59b8 100644 --- a/.github/workflows/review.yaml +++ b/.github/workflows/pr-agent-review.yaml @@ -1,6 +1,17 @@ +# This workflow enables developers to call PR-Agents `/[actions]` in PR's comments and upon PR creation. +# Learn more at https://www.codium.ai/pr-agent/ +# This is v0.2 of this workflow file + +name: PR-Agent + on: pull_request: issue_comment: + +permissions: + issues: write + pull-requests: write + jobs: pr_agent_job: runs-on: ubuntu-latest diff --git a/README.md b/README.md index b8ab88c9..0ec00ec2 100644 --- a/README.md +++ b/README.md @@ -97,12 +97,12 @@ CodiumAI `PR-Agent` is an open-source tool aiming to help developers review pull | | Incremental PR Review | :white_check_mark: | | | Examples for invoking the different tools via the CLI: -- **Review**: python cli.py --pr-url= review -- **Describe**: python cli.py --pr-url= describe -- **Improve**: python cli.py --pr-url= improve -- **Ask**: python cli.py --pr-url= ask "Write me a poem about this PR" -- **Reflect**: python cli.py --pr-url= reflect -- **Update Changelog**: python cli.py --pr-url= update_changelog +- **Review**: python cli.py --pr_url= review +- **Describe**: python cli.py --pr_url= describe +- **Improve**: python cli.py --pr_url= improve +- **Ask**: python cli.py --pr_url= ask "Write me a poem about this PR" +- **Reflect**: python cli.py --pr_url= reflect +- **Update Changelog**: python cli.py --pr_url= update_changelog "" is the url of the relevant PR (for example: https://github.com/Codium-ai/pr-agent/pull/50). diff --git a/docker/Dockerfile b/docker/Dockerfile index 4a8b86d5..61ab74cf 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -4,17 +4,21 @@ WORKDIR /app ADD pyproject.toml . RUN pip install . && rm pyproject.toml ENV PYTHONPATH=/app -ADD pr_agent pr_agent FROM base as github_app +ADD pr_agent pr_agent CMD ["python", "pr_agent/servers/github_app.py"] FROM base as github_polling +ADD pr_agent pr_agent CMD ["python", "pr_agent/servers/github_polling.py"] FROM base as test ADD requirements-dev.txt . RUN pip install -r requirements-dev.txt && rm requirements-dev.txt +ADD pr_agent pr_agent +ADD tests tests FROM base as cli +ADD pr_agent pr_agent ENTRYPOINT ["python", "pr_agent/cli.py"] diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index 8b319446..3a08a86d 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -11,7 +11,7 @@ from github import RateLimitExceededException from pr_agent.algo import MAX_TOKENS from pr_agent.algo.git_patch_processing import convert_to_hunks_with_lines_numbers, extend_patch, handle_patch_deletions from pr_agent.algo.language_handler import sort_files_by_main_languages -from pr_agent.algo.token_handler import TokenHandler +from pr_agent.algo.token_handler import TokenHandler, get_token_encoder from pr_agent.config_loader import get_settings from pr_agent.git_providers.git_provider import FilePatchInfo, GitProvider @@ -284,3 +284,30 @@ def find_line_number_of_relevant_line_in_file(diff_files: List[FilePatchInfo], absolute_position = start2 + delta - 1 break return position, absolute_position + + +def clip_tokens(text: str, max_tokens: int) -> str: + """ + Clip the number of tokens in a string to a maximum number of tokens. + + Args: + text (str): The string to clip. + max_tokens (int): The maximum number of tokens allowed in the string. + + Returns: + str: The clipped string. + """ + # We'll estimate the number of tokens by hueristically assuming 2.5 tokens per word + try: + encoder = get_token_encoder() + num_input_tokens = len(encoder.encode(text)) + if num_input_tokens <= max_tokens: + return text + num_chars = len(text) + chars_per_token = num_chars / num_input_tokens + num_output_chars = int(chars_per_token * max_tokens) + clipped_text = text[:num_output_chars] + return clipped_text + except Exception as e: + logging.warning(f"Failed to clip tokens: {e}") + return text \ No newline at end of file diff --git a/pr_agent/algo/token_handler.py b/pr_agent/algo/token_handler.py index 3686f521..f018a92b 100644 --- a/pr_agent/algo/token_handler.py +++ b/pr_agent/algo/token_handler.py @@ -4,6 +4,10 @@ from tiktoken import encoding_for_model, get_encoding from pr_agent.config_loader import get_settings +def get_token_encoder(): + return encoding_for_model(get_settings().config.model) if "gpt" in get_settings().config.model else get_encoding( + "cl100k_base") + class TokenHandler: """ A class for handling tokens in the context of a pull request. @@ -27,7 +31,7 @@ class TokenHandler: - system: The system string. - user: The user string. """ - self.encoder = encoding_for_model(get_settings().config.model) if "gpt" in get_settings().config.model else get_encoding("cl100k_base") + self.encoder = get_token_encoder() self.prompt_tokens = self._get_system_user_tokens(pr, self.encoder, vars, system, user) def _get_system_user_tokens(self, pr, encoder, vars: dict, system, user): diff --git a/pr_agent/cli.py b/pr_agent/cli.py index 8dd21b3f..0f871041 100644 --- a/pr_agent/cli.py +++ b/pr_agent/cli.py @@ -10,13 +10,13 @@ from pr_agent.config_loader import get_settings def run(inargs=None): parser = argparse.ArgumentParser(description='AI based pull request analyzer', usage= """\ -Usage: cli.py --pr-url []. +Usage: cli.py --pr-url= []. For example: -- cli.py --pr-url=... review -- cli.py --pr-url=... describe -- cli.py --pr-url=... improve -- cli.py --pr-url=... ask "write me a poem about this PR" -- cli.py --pr-url=... reflect +- cli.py --pr_url=... review +- cli.py --pr_url=... describe +- cli.py --pr_url=... improve +- cli.py --pr_url=... ask "write me a poem about this PR" +- cli.py --pr_url=... reflect Supported commands: review / review_pr - Add a review that includes a summary of the PR and specific suggestions for improvement. @@ -27,7 +27,7 @@ reflect - Ask the PR author questions about the PR. update_changelog - Update the changelog based on the PR's contents. To edit any configuration parameter from 'configuration.toml', just add -config_path=. -For example: '- cli.py --pr-url=... review --pr_reviewer.extra_instructions="focus on the file: ..."' +For example: 'python cli.py --pr_url=... review --pr_reviewer.extra_instructions="focus on the file: ..."' """) parser.add_argument('--pr_url', type=str, help='The URL of the PR to review', required=True) parser.add_argument('command', type=str, help='The', choices=commands, default='review') diff --git a/pr_agent/git_providers/bitbucket_provider.py b/pr_agent/git_providers/bitbucket_provider.py index 122b0db3..07b92295 100644 --- a/pr_agent/git_providers/bitbucket_provider.py +++ b/pr_agent/git_providers/bitbucket_provider.py @@ -5,6 +5,7 @@ from urllib.parse import urlparse import requests from atlassian.bitbucket import Cloud +from ..algo.pr_processing import clip_tokens from ..config_loader import get_settings from .git_provider import FilePatchInfo @@ -81,6 +82,9 @@ class BitbucketProvider: return self.pr.source_branch def get_pr_description(self): + max_tokens = get_settings().get("CONFIG.MAX_DESCRIPTION_TOKENS", None) + if max_tokens: + return clip_tokens(self.pr.description, max_tokens) return self.pr.description def get_user_id(self): diff --git a/pr_agent/git_providers/git_provider.py b/pr_agent/git_providers/git_provider.py index 8e161252..2a891938 100644 --- a/pr_agent/git_providers/git_provider.py +++ b/pr_agent/git_providers/git_provider.py @@ -97,6 +97,10 @@ class GitProvider(ABC): def remove_reaction(self, issue_comment_id: int, reaction_id: int) -> bool: pass + @abstractmethod + def get_commit_messages(self): + pass + def get_main_pr_language(languages, files) -> str: """ Get the main language of the commit. Return an empty string if cannot determine. diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 38a0ba44..be0fa645 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -12,7 +12,7 @@ from starlette_context import context from .git_provider import FilePatchInfo, GitProvider, IncrementalPR from ..algo.language_handler import is_valid_file from ..algo.utils import load_large_diff -from ..algo.pr_processing import find_line_number_of_relevant_line_in_file +from ..algo.pr_processing import find_line_number_of_relevant_line_in_file, clip_tokens from ..config_loader import get_settings from ..servers.utils import RateLimitExceeded @@ -234,6 +234,9 @@ class GithubProvider(GitProvider): return self.pr.head.ref def get_pr_description(self): + max_tokens = get_settings().get("CONFIG.MAX_DESCRIPTION_TOKENS", None) + if max_tokens: + return clip_tokens(self.pr.body, max_tokens) return self.pr.body def get_user_id(self): @@ -375,19 +378,22 @@ class GithubProvider(GitProvider): logging.exception(f"Failed to get labels, error: {e}") return [] - def get_commit_messages(self) -> str: + def get_commit_messages(self): """ Retrieves the commit messages of a pull request. Returns: str: A string containing the commit messages of the pull request. """ + max_tokens = get_settings().get("CONFIG.MAX_COMMITS_TOKENS", None) try: commit_list = self.pr.get_commits() commit_messages = [commit.commit.message for commit in commit_list] commit_messages_str = "\n".join([f"{i + 1}. {message}" for i, message in enumerate(commit_messages)]) - except: + except Exception: commit_messages_str = "" + if max_tokens: + commit_messages_str = clip_tokens(commit_messages_str, max_tokens) return commit_messages_str def generate_link_to_relevant_line_number(self, suggestion) -> str: diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index a4d2d127..73a3a2f9 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -7,6 +7,7 @@ import gitlab from gitlab import GitlabGetError from ..algo.language_handler import is_valid_file +from ..algo.pr_processing import clip_tokens from ..algo.utils import load_large_diff from ..config_loader import get_settings from .git_provider import EDIT_TYPE, FilePatchInfo, GitProvider @@ -275,6 +276,9 @@ class GitLabProvider(GitProvider): return self.mr.source_branch def get_pr_description(self): + max_tokens = get_settings().get("CONFIG.MAX_DESCRIPTION_TOKENS", None) + if max_tokens: + return clip_tokens(self.mr.description, max_tokens) return self.mr.description def get_issue_comments(self): @@ -338,16 +342,19 @@ class GitLabProvider(GitProvider): def get_labels(self): return self.mr.labels - def get_commit_messages(self) -> str: + def get_commit_messages(self): """ Retrieves the commit messages of a pull request. Returns: str: A string containing the commit messages of the pull request. """ + max_tokens = get_settings().get("CONFIG.MAX_COMMITS_TOKENS", None) try: commit_messages_list = [commit['message'] for commit in self.mr.commits()._list] commit_messages_str = "\n".join([f"{i + 1}. {message}" for i, message in enumerate(commit_messages_list)]) - except: + except Exception: commit_messages_str = "" + if max_tokens: + commit_messages_str = clip_tokens(commit_messages_str, max_tokens) return commit_messages_str \ No newline at end of file diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 8334049d..0c502df9 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -8,6 +8,8 @@ verbosity_level=0 # 0,1,2 use_extra_bad_extensions=false use_repo_settings_file=true ai_timeout=180 +max_description_tokens = 500 +max_commits_tokens = 500 [pr_reviewer] # /review # require_focused_review=true diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index 7fdf6429..fd6479ae 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -10,7 +10,7 @@ from yaml import SafeLoader from pr_agent.algo.ai_handler import AiHandler from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models, \ - find_line_number_of_relevant_line_in_file + find_line_number_of_relevant_line_in_file, clip_tokens from pr_agent.algo.token_handler import TokenHandler from pr_agent.algo.utils import convert_to_markdown, try_fix_json, try_fix_yaml, load_yaml from pr_agent.config_loader import get_settings diff --git a/pyproject.toml b/pyproject.toml index 4ca0c0b6..2e8f2b5c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,8 @@ dependencies = [ "atlassian-python-api==3.39.0", "GitPython~=3.1.32", "starlette-context==0.3.6", - "litellm~=0.1.351" + "litellm~=0.1.351", + "PyYAML==6.0" ] [project.urls] diff --git a/requirements.txt b/requirements.txt index 07a33514..ebea2b71 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,4 +11,7 @@ pytest~=7.4.0 aiohttp~=3.8.4 atlassian-python-api==3.39.0 GitPython~=3.1.32 +litellm~=0.1.351 +PyYAML==6.0 +starlette-context==0.3.6 litellm~=0.1.351 \ No newline at end of file