From 02ecaa340fc330409288785a4c486174b86fe8c1 Mon Sep 17 00:00:00 2001 From: Patryk Kowalski Date: Mon, 24 Jul 2023 12:49:57 +0200 Subject: [PATCH 1/8] Local Git Provider Implementation --- pr_agent/algo/language_handler.py | 2 +- pr_agent/config_loader.py | 32 +++ pr_agent/git_providers/__init__.py | 2 + pr_agent/git_providers/local_git_provider.py | 201 +++++++++++++++++++ pr_agent/settings/configuration.toml | 5 + 5 files changed, 241 insertions(+), 1 deletion(-) create mode 100644 pr_agent/git_providers/local_git_provider.py diff --git a/pr_agent/algo/language_handler.py b/pr_agent/algo/language_handler.py index c1678eae..c81d7fd6 100644 --- a/pr_agent/algo/language_handler.py +++ b/pr_agent/algo/language_handler.py @@ -13,7 +13,7 @@ if settings.config.use_extra_bad_extensions: def filter_bad_extensions(files): - return [f for f in files if is_valid_file(f.filename)] + return [f for f in files if f.filename is not None and is_valid_file(f.filename)] def is_valid_file(filename): diff --git a/pr_agent/config_loader.py b/pr_agent/config_loader.py index 906d0aa1..6d9b443e 100644 --- a/pr_agent/config_loader.py +++ b/pr_agent/config_loader.py @@ -1,7 +1,11 @@ from os.path import abspath, dirname, join +from pathlib import Path +from typing import Optional from dynaconf import Dynaconf +PR_AGENT_TOML_KEY = 'pr-agent' + current_dir = dirname(abspath(__file__)) settings = Dynaconf( envvar_prefix=False, @@ -18,3 +22,31 @@ settings = Dynaconf( "settings_prod/.secrets.toml" ]] ) + + +# Add local configuration from pyproject.toml of the project being reviewed +def _find_repository_root() -> Path: + """ + Identify project root directory by recursively searching for the .git directory in the parent directories. + """ + cwd = Path.cwd().resolve() + no_way_up = False + while not no_way_up: + no_way_up = cwd == cwd.parent + if (cwd / ".git").is_dir(): + return cwd + cwd = cwd.parent + raise FileNotFoundError("Could not find the repository root directory") + + +def _find_pyproject() -> Optional[Path]: + """ + Search for file pyproject.toml in the repository root. + """ + pyproject = _find_repository_root() / "pyproject.toml" + return pyproject if pyproject.is_file() else None + + +pyproject_path = _find_pyproject() +if pyproject_path is not None: + settings.load_file(pyproject_path, env=f'tool.{PR_AGENT_TOML_KEY}') diff --git a/pr_agent/git_providers/__init__.py b/pr_agent/git_providers/__init__.py index 646876c5..421fcd5d 100644 --- a/pr_agent/git_providers/__init__.py +++ b/pr_agent/git_providers/__init__.py @@ -2,11 +2,13 @@ from pr_agent.config_loader import settings from pr_agent.git_providers.bitbucket_provider import BitbucketProvider from pr_agent.git_providers.github_provider import GithubProvider from pr_agent.git_providers.gitlab_provider import GitLabProvider +from pr_agent.git_providers.local_git_provider import LocalGitProvider _GIT_PROVIDERS = { 'github': GithubProvider, 'gitlab': GitLabProvider, 'bitbucket': BitbucketProvider, + 'local' : LocalGitProvider } def get_git_provider(): diff --git a/pr_agent/git_providers/local_git_provider.py b/pr_agent/git_providers/local_git_provider.py new file mode 100644 index 00000000..e9aa7bbb --- /dev/null +++ b/pr_agent/git_providers/local_git_provider.py @@ -0,0 +1,201 @@ +import logging +import uuid +from collections import Counter +from pathlib import Path +from typing import List + +from git import GitCommandError, Repo + +from pr_agent.config_loader import _find_repository_root, settings +from pr_agent.git_providers.git_provider import EDIT_TYPE, FilePatchInfo, GitProvider + + +class PullRequestMimic: + """ + This class mimics the PullRequest class from the PyGithub library for the LocalGitProvider. + """ + + def __init__(self, title: str, diff_files: List[FilePatchInfo]): + self.title = title + self.diff_files = diff_files + + +class LocalGitProvider(GitProvider): + """ + This class implements the GitProvider interface for local git repositories. + It mimics the PR functionality of the GitProvider interface, + but does not require a hosted git repository. + Instead of providing a PR url, the user provides a local branch path to generate a diff-patch. + For the MVP it only supports the /review and /describe capabilities. + """ + + def __init__(self, branch_name, incremental=False): + self.repo_path = _find_repository_root() + self.repo = Repo(self.repo_path) + self.head_branch_name = self.repo.head.ref.name + self.branch_name = branch_name + self.tmp_branch_name = f'pr_agent_{uuid.uuid4()}' + self.prepare_repo() + self.diff_files = None + self.pr = PullRequestMimic(self.get_pr_title(), self.get_diff_files()) + self.description_path = settings.get('local.description_path') \ + if settings.get('local.description_path') is not None else self.repo_path / 'description.md' + self.review_path = settings.get('local.review_path') \ + if settings.get('local.review_path') is not None else self.repo_path / 'review_path.md' + # inline code comments are not supported for local git repositories + settings.pr_reviewer.inline_code_comments = False + + def __del__(self): + logging.debug('Deleting temporary branch...') + self.repo.git.checkout(self.head_branch_name) # switch back to the original branch + # delete the temporary branch + if self.tmp_branch_name not in self.repo.heads: + return + try: + self.repo.delete_head(self.tmp_branch_name, force=True) + except GitCommandError as e: + raise ValueError( + 'Error while trying to delete the temporary branch.' + 'Ensure the branch exists.' + ) from e + + def prepare_repo(self): + """ + Prepare the repository for PR-mimic generation. + """ + logging.debug('Preparing repository for PR-mimic generation...') + if self.repo.is_dirty(): + raise ValueError('The repository is not in a clean state. Please check in all files.') + if self.tmp_branch_name in self.repo.heads: + self.repo.delete_head(self.tmp_branch_name, force=True) + self.repo.git.checkout('HEAD', b=self.tmp_branch_name) + + try: + logging.debug('Rebasing the temporary branch on the main branch...') + self.repo.git.rebase('main') + except GitCommandError as e: + raise ValueError('Error while rebasing. Resolve conflicts before retrying.') from e + + def is_supported(self, capability: str) -> bool: + # TODO implement + pass + + def get_diff_files(self) -> list[FilePatchInfo]: + diffs = self.repo.head.commit.diff( + self.repo.branches[self.branch_name].commit, + create_patch=True, + R=True + ) + diff_files = [] + for diff_item in diffs: + if diff_item.a_blob is not None: + original_file_content_str = diff_item.a_blob.data_stream.read().decode('utf-8') + else: + original_file_content_str = "" # empty file + if diff_item.b_blob is not None: + new_file_content_str = diff_item.b_blob.data_stream.read().decode('utf-8') + else: + new_file_content_str = "" # empty file + edit_type = EDIT_TYPE.MODIFIED + if diff_item.new_file: + edit_type = EDIT_TYPE.ADDED + elif diff_item.deleted_file: + edit_type = EDIT_TYPE.DELETED + elif diff_item.renamed_file: + edit_type = EDIT_TYPE.RENAMED + diff_files.append( + FilePatchInfo(original_file_content_str, + new_file_content_str, + diff_item.diff.decode('utf-8'), + diff_item.b_path, + edit_type=edit_type, + old_filename=None if diff_item.a_path == diff_item.b_path else diff_item.a_path + ) + ) + self.diff_files = diff_files + return diff_files + + def get_files(self) -> List[str]: + """ + Returns a list of files with changes in the diff. + """ + # Assert existence of specific branch + branch_names = [ref.name for ref in self.repo.branches] + if self.branch_name not in branch_names: + raise KeyError(f"Branch: {self.branch_name} does not exist") + branch = self.repo.branches[self.branch_name] + # Compare the two branches + diff_index = self.repo.head.commit.diff(branch.commit) + # Get the list of changed files + diff_files = [item.a_path for item in diff_index] + return diff_files + + def publish_description(self, pr_title: str, pr_body: str): + with open(self.description_path, "w") as file: + # Write the string to the file + file.write(pr_title + '\n' + pr_body) + + def publish_comment(self, pr_comment: str, is_temporary: bool = False): + with open(self.review_path, "w") as file: + # Write the string to the file + file.write(pr_comment) + + def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str): + raise NotImplementedError('Publishing inline comments is not implemented for the local git provider') + + def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str): + raise NotImplementedError('Creating inline comments is not implemented for the local git provider') + + def publish_inline_comments(self, comments: list[dict]): + raise NotImplementedError('Publishing inline comments is not implemented for the local git provider') + + def publish_code_suggestion(self, body: str, relevant_file: str, + relevant_lines_start: int, relevant_lines_end: int): + raise NotImplementedError('Publishing code suggestions is not implemented for the local git provider') + + def publish_code_suggestions(self, code_suggestions: list): + raise NotImplementedError('Publishing code suggestions is not implemented for the local git provider') + + def publish_labels(self, labels): + pass # Not applicable to the local git provider, but required by the interface + + def remove_initial_comment(self): + pass # Not applicable to the local git provider, but required by the interface + + def get_languages(self): + """ + Calculate percentage of languages in repository. Used for hunk prioritisation. + """ + # Get all files in repository + filepaths = [Path(item.path) for item in self.repo.tree().traverse() if item.type == 'blob'] + # Identify language by file extension and count + lang_count = Counter(ext.lstrip('.') for filepath in filepaths for ext in [filepath.suffix.lower()]) + # Convert counts to percentages + total_files = len(filepaths) + lang_percentage = {lang: count / total_files * 100 for lang, count in lang_count.items()} + return lang_percentage + + def get_pr_branch(self): + return self.repo.head + + def get_user_id(self): + return -1 # Not used anywhere for the local provider, but required by the interface + + def get_pr_description(self): + commits_diff = list(self.repo.iter_commits(self.branch_name + '..HEAD')) + # Get the commit messages and concatenate + commit_messages = " ".join([commit.message for commit in commits_diff]) + # TODO Handle the description better - maybe use gpt-3.5 summarisation here? + return commit_messages[:200] # Use max 200 characters + + def get_pr_title(self): + """ + Substitutes the branch-name as the PR-mimic title. + """ + return self.branch_name + + def get_issue_comments(self): + raise NotImplementedError('Getting issue comments is not implemented for the local git provider') + + def get_labels(self): + raise NotImplementedError('Getting labels is not implemented for the local git provider') diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 3861df13..fbf8ffec 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -40,3 +40,8 @@ magic_word = "AutoReview" # Polling interval polling_interval_seconds = 30 + +[local] +# LocalGitProvider settings - uncomment to use paths other than default +# description_path= "path/to/description.md" +# review_path= "path/to/review.md" \ No newline at end of file From 8e6518f0718db6f10fa25507d1528fb26e6b86ba Mon Sep 17 00:00:00 2001 From: Patryk Kowalski Date: Mon, 24 Jul 2023 14:28:37 +0200 Subject: [PATCH 2/8] Added GitPython to requirements. Changed default review path (aesthetics) --- pr_agent/git_providers/local_git_provider.py | 2 +- requirements.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pr_agent/git_providers/local_git_provider.py b/pr_agent/git_providers/local_git_provider.py index e9aa7bbb..57680b59 100644 --- a/pr_agent/git_providers/local_git_provider.py +++ b/pr_agent/git_providers/local_git_provider.py @@ -41,7 +41,7 @@ class LocalGitProvider(GitProvider): self.description_path = settings.get('local.description_path') \ if settings.get('local.description_path') is not None else self.repo_path / 'description.md' self.review_path = settings.get('local.review_path') \ - if settings.get('local.review_path') is not None else self.repo_path / 'review_path.md' + if settings.get('local.review_path') is not None else self.repo_path / 'review.md' # inline code comments are not supported for local git repositories settings.pr_reviewer.inline_code_comments = False diff --git a/requirements.txt b/requirements.txt index 399cd3ab..51dc6fee 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,4 @@ python-gitlab==3.15.0 pytest~=7.4.0 aiohttp~=3.8.4 atlassian-python-api==3.39.0 +GitPython~=3.1.32 \ No newline at end of file From d4cbcc465c9f044eeb30d358cb253f1007bd5bd2 Mon Sep 17 00:00:00 2001 From: "Patryk Kowalski (deepsense.ai)" <139119482+patryk-kowalski-ds@users.noreply.github.com> Date: Mon, 24 Jul 2023 16:46:36 +0200 Subject: [PATCH 3/8] Update pr_agent/git_providers/local_git_provider.py Co-authored-by: Ori Kotek --- pr_agent/git_providers/local_git_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pr_agent/git_providers/local_git_provider.py b/pr_agent/git_providers/local_git_provider.py index 57680b59..90b1b194 100644 --- a/pr_agent/git_providers/local_git_provider.py +++ b/pr_agent/git_providers/local_git_provider.py @@ -35,7 +35,7 @@ class LocalGitProvider(GitProvider): self.head_branch_name = self.repo.head.ref.name self.branch_name = branch_name self.tmp_branch_name = f'pr_agent_{uuid.uuid4()}' - self.prepare_repo() + self._prepare_repo() self.diff_files = None self.pr = PullRequestMimic(self.get_pr_title(), self.get_diff_files()) self.description_path = settings.get('local.description_path') \ From c0610afe2a03a05a5f758f1b502ad96ea1b14b04 Mon Sep 17 00:00:00 2001 From: "Patryk Kowalski (deepsense.ai)" <139119482+patryk-kowalski-ds@users.noreply.github.com> Date: Mon, 24 Jul 2023 16:46:46 +0200 Subject: [PATCH 4/8] Update pr_agent/git_providers/local_git_provider.py Co-authored-by: Ori Kotek --- pr_agent/git_providers/local_git_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pr_agent/git_providers/local_git_provider.py b/pr_agent/git_providers/local_git_provider.py index 90b1b194..90d512a1 100644 --- a/pr_agent/git_providers/local_git_provider.py +++ b/pr_agent/git_providers/local_git_provider.py @@ -59,7 +59,7 @@ class LocalGitProvider(GitProvider): 'Ensure the branch exists.' ) from e - def prepare_repo(self): + def _prepare_repo(self): """ Prepare the repository for PR-mimic generation. """ From ca42a54bc364b68a8e4517504217cfd3954a3a93 Mon Sep 17 00:00:00 2001 From: "Patryk Kowalski (deepsense.ai)" <139119482+patryk-kowalski-ds@users.noreply.github.com> Date: Mon, 24 Jul 2023 16:47:05 +0200 Subject: [PATCH 5/8] Update pr_agent/git_providers/local_git_provider.py Co-authored-by: Ori Kotek --- pr_agent/git_providers/local_git_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pr_agent/git_providers/local_git_provider.py b/pr_agent/git_providers/local_git_provider.py index 90d512a1..86673016 100644 --- a/pr_agent/git_providers/local_git_provider.py +++ b/pr_agent/git_providers/local_git_provider.py @@ -65,7 +65,7 @@ class LocalGitProvider(GitProvider): """ logging.debug('Preparing repository for PR-mimic generation...') if self.repo.is_dirty(): - raise ValueError('The repository is not in a clean state. Please check in all files.') + raise ValueError('The repository is not in a clean state. Please commit or stash pending changes.') if self.tmp_branch_name in self.repo.heads: self.repo.delete_head(self.tmp_branch_name, force=True) self.repo.git.checkout('HEAD', b=self.tmp_branch_name) From 6170995665a4d72814608f9e359902d0b19448a8 Mon Sep 17 00:00:00 2001 From: Patryk Kowalski Date: Mon, 24 Jul 2023 16:58:11 +0200 Subject: [PATCH 6/8] replaced hardcoded main with actual target_branch name' --- pr_agent/git_providers/local_git_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pr_agent/git_providers/local_git_provider.py b/pr_agent/git_providers/local_git_provider.py index 86673016..35439b65 100644 --- a/pr_agent/git_providers/local_git_provider.py +++ b/pr_agent/git_providers/local_git_provider.py @@ -72,7 +72,7 @@ class LocalGitProvider(GitProvider): try: logging.debug('Rebasing the temporary branch on the main branch...') - self.repo.git.rebase('main') + self.repo.git.rebase(self.branch_name) except GitCommandError as e: raise ValueError('Error while rebasing. Resolve conflicts before retrying.') from e From 918549a4fc430a86f3826454b6978b37d2e244e1 Mon Sep 17 00:00:00 2001 From: Patryk Kowalski Date: Tue, 25 Jul 2023 12:35:39 +0200 Subject: [PATCH 7/8] Implementing 'is_supported' method --- pr_agent/git_providers/local_git_provider.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pr_agent/git_providers/local_git_provider.py b/pr_agent/git_providers/local_git_provider.py index 35439b65..7c157d81 100644 --- a/pr_agent/git_providers/local_git_provider.py +++ b/pr_agent/git_providers/local_git_provider.py @@ -77,8 +77,9 @@ class LocalGitProvider(GitProvider): raise ValueError('Error while rebasing. Resolve conflicts before retrying.') from e def is_supported(self, capability: str) -> bool: - # TODO implement - pass + if capability in ['get_issue_comments', 'create_inline_comment', 'publish_inline_comments', 'get_labels']: + return False + return True def get_diff_files(self) -> list[FilePatchInfo]: diffs = self.repo.head.commit.diff( From 0815e2024ccb5049102ab7922438fa97f74d2507 Mon Sep 17 00:00:00 2001 From: Patryk Kowalski Date: Tue, 25 Jul 2023 13:07:21 +0200 Subject: [PATCH 8/8] - Replaced two dot diff with three dot diff. Cleaned up obsolete code linked to double dot diff. - Moved target_branch_existence assertion to _prepare_repo method - Renamed branch_name -> target_branch_name - Simplified get_files method --- pr_agent/git_providers/local_git_provider.py | 50 +++++--------------- 1 file changed, 12 insertions(+), 38 deletions(-) diff --git a/pr_agent/git_providers/local_git_provider.py b/pr_agent/git_providers/local_git_provider.py index 7c157d81..4a7775ac 100644 --- a/pr_agent/git_providers/local_git_provider.py +++ b/pr_agent/git_providers/local_git_provider.py @@ -1,10 +1,9 @@ import logging -import uuid from collections import Counter from pathlib import Path from typing import List -from git import GitCommandError, Repo +from git import Repo from pr_agent.config_loader import _find_repository_root, settings from pr_agent.git_providers.git_provider import EDIT_TYPE, FilePatchInfo, GitProvider @@ -29,12 +28,11 @@ class LocalGitProvider(GitProvider): For the MVP it only supports the /review and /describe capabilities. """ - def __init__(self, branch_name, incremental=False): + def __init__(self, target_branch_name, incremental=False): self.repo_path = _find_repository_root() self.repo = Repo(self.repo_path) self.head_branch_name = self.repo.head.ref.name - self.branch_name = branch_name - self.tmp_branch_name = f'pr_agent_{uuid.uuid4()}' + self.target_branch_name = target_branch_name self._prepare_repo() self.diff_files = None self.pr = PullRequestMimic(self.get_pr_title(), self.get_diff_files()) @@ -45,20 +43,6 @@ class LocalGitProvider(GitProvider): # inline code comments are not supported for local git repositories settings.pr_reviewer.inline_code_comments = False - def __del__(self): - logging.debug('Deleting temporary branch...') - self.repo.git.checkout(self.head_branch_name) # switch back to the original branch - # delete the temporary branch - if self.tmp_branch_name not in self.repo.heads: - return - try: - self.repo.delete_head(self.tmp_branch_name, force=True) - except GitCommandError as e: - raise ValueError( - 'Error while trying to delete the temporary branch.' - 'Ensure the branch exists.' - ) from e - def _prepare_repo(self): """ Prepare the repository for PR-mimic generation. @@ -66,15 +50,8 @@ class LocalGitProvider(GitProvider): logging.debug('Preparing repository for PR-mimic generation...') if self.repo.is_dirty(): raise ValueError('The repository is not in a clean state. Please commit or stash pending changes.') - if self.tmp_branch_name in self.repo.heads: - self.repo.delete_head(self.tmp_branch_name, force=True) - self.repo.git.checkout('HEAD', b=self.tmp_branch_name) - - try: - logging.debug('Rebasing the temporary branch on the main branch...') - self.repo.git.rebase(self.branch_name) - except GitCommandError as e: - raise ValueError('Error while rebasing. Resolve conflicts before retrying.') from e + if self.target_branch_name not in self.repo.heads: + raise KeyError(f'Branch: {self.target_branch_name} does not exist') def is_supported(self, capability: str) -> bool: if capability in ['get_issue_comments', 'create_inline_comment', 'publish_inline_comments', 'get_labels']: @@ -83,7 +60,7 @@ class LocalGitProvider(GitProvider): def get_diff_files(self) -> list[FilePatchInfo]: diffs = self.repo.head.commit.diff( - self.repo.branches[self.branch_name].commit, + self.repo.merge_base(self.repo.head, self.repo.branches[self.target_branch_name]), create_patch=True, R=True ) @@ -120,13 +97,10 @@ class LocalGitProvider(GitProvider): """ Returns a list of files with changes in the diff. """ - # Assert existence of specific branch - branch_names = [ref.name for ref in self.repo.branches] - if self.branch_name not in branch_names: - raise KeyError(f"Branch: {self.branch_name} does not exist") - branch = self.repo.branches[self.branch_name] - # Compare the two branches - diff_index = self.repo.head.commit.diff(branch.commit) + diff_index = self.repo.head.commit.diff( + self.repo.merge_base(self.repo.head, self.repo.branches[self.target_branch_name]), + R=True + ) # Get the list of changed files diff_files = [item.a_path for item in diff_index] return diff_files @@ -183,7 +157,7 @@ class LocalGitProvider(GitProvider): return -1 # Not used anywhere for the local provider, but required by the interface def get_pr_description(self): - commits_diff = list(self.repo.iter_commits(self.branch_name + '..HEAD')) + commits_diff = list(self.repo.iter_commits(self.target_branch_name + '..HEAD')) # Get the commit messages and concatenate commit_messages = " ".join([commit.message for commit in commits_diff]) # TODO Handle the description better - maybe use gpt-3.5 summarisation here? @@ -193,7 +167,7 @@ class LocalGitProvider(GitProvider): """ Substitutes the branch-name as the PR-mimic title. """ - return self.branch_name + return self.target_branch_name def get_issue_comments(self): raise NotImplementedError('Getting issue comments is not implemented for the local git provider')