From ed8cf27b05a5326cdd0eddccd5d31eefa1444a64 Mon Sep 17 00:00:00 2001 From: Albert Achtenberg Date: Fri, 7 Jul 2023 15:02:40 +0300 Subject: [PATCH 01/17] working example --- pr_agent/git_providers/__init__.py | 6 ++- pr_agent/git_providers/git_provider.py | 38 +++++++++++++++ pr_agent/git_providers/github_provider.py | 22 ++++----- pr_agent/git_providers/gitlab_provider.py | 57 +++++++++++++++++++++++ pr_agent/settings/.secrets_template.toml | 7 +++ pr_agent/tools/pr_reviewer.py | 5 +- requirements.txt | 1 + 7 files changed, 118 insertions(+), 18 deletions(-) create mode 100644 pr_agent/git_providers/git_provider.py create mode 100644 pr_agent/git_providers/gitlab_provider.py diff --git a/pr_agent/git_providers/__init__.py b/pr_agent/git_providers/__init__.py index 54e52767..15f95ba7 100644 --- a/pr_agent/git_providers/__init__.py +++ b/pr_agent/git_providers/__init__.py @@ -1,15 +1,17 @@ from pr_agent.config_loader import settings from pr_agent.git_providers.github_provider import GithubProvider +from pr_agent.git_providers.gitlab_provider import GitLabProvider _GIT_PROVIDERS = { - 'github': GithubProvider + 'github': GithubProvider, + 'gitlab': GitLabProvider, } def get_git_provider(): try: provider_id = settings.config.git_provider except AttributeError as e: - raise ValueError("github_provider is a required attribute in the configuration file") from e + raise ValueError("git_provider is a required attribute in the configuration file") from e if provider_id not in _GIT_PROVIDERS: raise ValueError(f"Unknown git provider: {provider_id}") return _GIT_PROVIDERS[provider_id] diff --git a/pr_agent/git_providers/git_provider.py b/pr_agent/git_providers/git_provider.py new file mode 100644 index 00000000..d3733b6d --- /dev/null +++ b/pr_agent/git_providers/git_provider.py @@ -0,0 +1,38 @@ + +from abc import ABC +from dataclasses import dataclass + + +@dataclass +class FilePatchInfo: + base_file: str + head_file: str + patch: str + filename: str + tokens: int = -1 + + +class GitProvider(ABC): + def get_diff_files(self) -> list[FilePatchInfo]: + pass + + def publish_comment(self, pr_comment: str, is_temporary: bool = False): + pass + + def remove_initial_comment(self): + pass + + def get_languages(self): + pass + + def get_main_pr_language(self) -> str: + pass + + def get_pr_branch(self): + pass + + def get_user_id(self): + pass + + def get_pr_description(): + pass \ No newline at end of file diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index a03d0bee..bd5576c4 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -1,25 +1,17 @@ import logging -from collections import namedtuple -from dataclasses import dataclass from datetime import datetime from typing import Optional, Tuple from urllib.parse import urlparse -from github import AppAuthentication, File, Github +from github import AppAuthentication, Github from pr_agent.config_loader import settings +from .git_provider import FilePatchInfo -@dataclass -class FilePatchInfo: - base_file: str - head_file: str - patch: str - filename: str - tokens: int = -1 class GithubProvider: - def __init__(self, pr_url: Optional[str] = None, installation_id: Optional[int] = None): - self.installation_id = installation_id + def __init__(self, pr_url: Optional[str] = None): + self.installation_id = settings.get("GITHUB.INSTALLATION_ID") self.github_client = self._get_github_client() self.repo = None self.pr_num = None @@ -65,7 +57,8 @@ class GithubProvider: return self.pr.body def get_languages(self): - return self._get_repo().get_languages() + languages = self._get_repo().get_languages() + return languages def get_main_pr_language(self) -> str: """ @@ -112,6 +105,9 @@ class GithubProvider: def get_pr_branch(self): return self.pr.head.ref + def get_pr_description(self): + return self.pr.body + def get_user_id(self): if not self.github_user_id: try: diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py new file mode 100644 index 00000000..54c27ad3 --- /dev/null +++ b/pr_agent/git_providers/gitlab_provider.py @@ -0,0 +1,57 @@ +import gitlab +from typing import Optional, Tuple + + +class GitLabProvider: + def __init__(self, merge_request_url: Optional[str] = None, personal_access_token: Optional[str] = None): + self.gl = gitlab.Gitlab('https://your.gitlab.com', private_token=personal_access_token) + self.project = None + self.mr_iid = None + self.mr = None + if merge_request_url: + self.set_merge_request(merge_request_url) + + def set_merge_request(self, merge_request_url: str): + self.project, self.mr_iid = self._parse_merge_request_url(merge_request_url) + self.mr = self._get_merge_request() + + def get_diff_files(self) -> list[FilePatchInfo]: + diffs = self.mr.diffs.list() + diff_files = [] + for diff in diffs: + # GitLab doesn't provide base and head files. Only diffs are available. + diff_files.append(FilePatchInfo("", "", diff['diff'], diff['new_path'])) + return diff_files + + def publish_comment(self, mr_comment: str): + self.mr.notes.create({'body': mr_comment}) + + def get_title(self): + return self.mr.title + + def get_description(self): + return self.mr.description + + def get_languages(self): + # GitLab does not have a direct equivalent to get_languages(). + # An alternative could be to manually parse all the repository files and determine the language from the file extensions. + raise NotImplementedError + + def get_main_pr_language(self) -> str: + # Similar issue as get_languages(). + raise NotImplementedError + + def get_pr_branch(self): + return self.mr.source_branch + + def get_notifications(self): + # GitLab doesn't provide a notifications API similar to GitHub's. + raise NotImplementedError + + @staticmethod + def _parse_merge_request_url(merge_request_url: str) -> Tuple[str, int]: + # This function will depend on your GitLab setup and URL structure + raise NotImplementedError + + def _get_merge_request(self): + return self.gl.projects.get(self.project).mergerequests.get(self.mr_iid) diff --git a/pr_agent/settings/.secrets_template.toml b/pr_agent/settings/.secrets_template.toml index 59f4625b..82812734 100644 --- a/pr_agent/settings/.secrets_template.toml +++ b/pr_agent/settings/.secrets_template.toml @@ -25,3 +25,10 @@ private_key = """\ """ app_id = 123456 # The GitHub App ID, replace with your own. webhook_secret = "" # Optional, may be commented out. + +[gitlab] +# The type of deployment to create. Valid values are 'app' or 'user'. +personal_access_token = "" + +# URL to the gitlab service +gitlab_url = "https://gitlab.com" \ No newline at end of file diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index b39a231e..214056a1 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -16,9 +16,8 @@ from pr_agent.git_providers import get_git_provider class PRReviewer: def __init__(self, pr_url: str, installation_id: Optional[int] = None, cli_mode=False): - self.git_provider = get_git_provider()(pr_url, installation_id) + self.git_provider = get_git_provider()(pr_url) self.main_language = self.git_provider.get_main_pr_language() - self.installation_id = installation_id self.ai_handler = AiHandler() self.patches_diff = None self.prediction = None @@ -26,7 +25,7 @@ class PRReviewer: self.vars = { "title": self.git_provider.pr.title, "branch": self.git_provider.get_pr_branch(), - "description": self.git_provider.pr.body, + "description": self.git_provider.get_pr_description(), "language": self.main_language, "diff": "", # empty diff for initial calculation "require_tests": settings.pr_reviewer.require_tests_review, diff --git a/requirements.txt b/requirements.txt index d7610000..4a6a6255 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,4 @@ openai==0.27.8 Jinja2==3.1.2 tiktoken==0.4.0 uvicorn==0.22.0 +python-gitlab==3.15.0 From 7ed12c2f8e9a88b3a909dacd5c9d362f2ea424b0 Mon Sep 17 00:00:00 2001 From: salberts Date: Fri, 7 Jul 2023 16:10:33 +0300 Subject: [PATCH 02/17] refactor --- pr_agent/git_providers/git_provider.py | 47 ++++++++-- pr_agent/git_providers/github_provider.py | 45 +--------- pr_agent/git_providers/gitlab_provider.py | 103 +++++++++++++++------- pr_agent/tools/pr_questions.py | 6 +- pr_agent/tools/pr_reviewer.py | 5 +- 5 files changed, 126 insertions(+), 80 deletions(-) diff --git a/pr_agent/git_providers/git_provider.py b/pr_agent/git_providers/git_provider.py index d3733b6d..30f7f7cd 100644 --- a/pr_agent/git_providers/git_provider.py +++ b/pr_agent/git_providers/git_provider.py @@ -25,14 +25,51 @@ class GitProvider(ABC): def get_languages(self): pass - def get_main_pr_language(self) -> str: - pass - def get_pr_branch(self): pass def get_user_id(self): pass - def get_pr_description(): - pass \ No newline at end of file + def get_pr_description(self): + pass + + def get_main_pr_language(self, languages, files) -> str: + """ + Get the main language of the commit. Return an empty string if cannot determine. + """ + main_language_str = "" + try: + top_language = max(languages, key=languages.get).lower() + + # validate that the specific commit uses the main language + extension_list = [] + for file in files: + extension_list.append(file.filename.rsplit('.')[-1]) + + # get the most common extension + most_common_extension = max(set(extension_list), key=extension_list.count) + + # look for a match. TBD: add more languages, do this systematically + if most_common_extension == 'py' and top_language == 'python' or \ + most_common_extension == 'js' and top_language == 'javascript' or \ + most_common_extension == 'ts' and top_language == 'typescript' or \ + most_common_extension == 'go' and top_language == 'go' or \ + most_common_extension == 'java' and top_language == 'java' or \ + most_common_extension == 'c' and top_language == 'c' or \ + most_common_extension == 'cpp' and top_language == 'c++' or \ + most_common_extension == 'cs' and top_language == 'c#' or \ + most_common_extension == 'swift' and top_language == 'swift' or \ + most_common_extension == 'php' and top_language == 'php' or \ + most_common_extension == 'rb' and top_language == 'ruby' or \ + most_common_extension == 'rs' and top_language == 'rust' or \ + most_common_extension == 'scala' and top_language == 'scala' or \ + most_common_extension == 'kt' and top_language == 'kotlin' or \ + most_common_extension == 'pl' and top_language == 'perl' or \ + most_common_extension == 'swift' and top_language == 'swift': + main_language_str = top_language + + except Exception: + pass + + return main_language_str \ No newline at end of file diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index bd5576c4..0b65c539 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -24,6 +24,9 @@ class GithubProvider: self.repo, self.pr_num = self._parse_pr_url(pr_url) self.pr = self._get_pr() + def get_files(self): + return self.pr.get_files() + def get_diff_files(self) -> list[FilePatchInfo]: files = self.pr.get_files() diff_files = [] @@ -60,48 +63,6 @@ class GithubProvider: languages = self._get_repo().get_languages() return languages - def get_main_pr_language(self) -> str: - """ - Get the main language of the commit. Return an empty string if cannot determine. - """ - main_language_str = "" - try: - languages = self.get_languages() - top_language = max(languages, key=languages.get).lower() - - # validate that the specific commit uses the main language - extension_list = [] - files = self.pr.get_files() - for file in files: - extension_list.append(file.filename.rsplit('.')[-1]) - - # get the most common extension - most_common_extension = max(set(extension_list), key=extension_list.count) - - # look for a match. TBD: add more languages, do this systematically - if most_common_extension == 'py' and top_language == 'python' or \ - most_common_extension == 'js' and top_language == 'javascript' or \ - most_common_extension == 'ts' and top_language == 'typescript' or \ - most_common_extension == 'go' and top_language == 'go' or \ - most_common_extension == 'java' and top_language == 'java' or \ - most_common_extension == 'c' and top_language == 'c' or \ - most_common_extension == 'cpp' and top_language == 'c++' or \ - most_common_extension == 'cs' and top_language == 'c#' or \ - most_common_extension == 'swift' and top_language == 'swift' or \ - most_common_extension == 'php' and top_language == 'php' or \ - most_common_extension == 'rb' and top_language == 'ruby' or \ - most_common_extension == 'rs' and top_language == 'rust' or \ - most_common_extension == 'scala' and top_language == 'scala' or \ - most_common_extension == 'kt' and top_language == 'kotlin' or \ - most_common_extension == 'pl' and top_language == 'perl' or \ - most_common_extension == 'swift' and top_language == 'swift': - main_language_str = top_language - - except Exception: - pass - - return main_language_str - def get_pr_branch(self): return self.pr.head.ref diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index 54c27ad3..3c1c1e6e 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -1,30 +1,54 @@ +from urllib.parse import urlparse import gitlab from typing import Optional, Tuple +from pr_agent.config_loader import settings -class GitLabProvider: - def __init__(self, merge_request_url: Optional[str] = None, personal_access_token: Optional[str] = None): - self.gl = gitlab.Gitlab('https://your.gitlab.com', private_token=personal_access_token) - self.project = None - self.mr_iid = None +from .git_provider import FilePatchInfo, GitProvider + + +class GitLabProvider(GitProvider): + def __init__(self, merge_request_url: Optional[str] = None): + self.gl = gitlab.Gitlab( + settings.get("GITLAB.URL"), + private_token=settings.get("GITLAB.PERSONAL_ACCESS_TOKEN") + ) + + self.id_project = None + self.id_mr = None self.mr = None - if merge_request_url: - self.set_merge_request(merge_request_url) + self.temp_comments = [] + + self.set_merge_request(merge_request_url) + + @property + def pr(self): + return self.mr def set_merge_request(self, merge_request_url: str): - self.project, self.mr_iid = self._parse_merge_request_url(merge_request_url) + self.id_project, self.id_mr = self._parse_merge_request_url(merge_request_url) self.mr = self._get_merge_request() def get_diff_files(self) -> list[FilePatchInfo]: - diffs = self.mr.diffs.list() - diff_files = [] - for diff in diffs: - # GitLab doesn't provide base and head files. Only diffs are available. - diff_files.append(FilePatchInfo("", "", diff['diff'], diff['new_path'])) + diffs = self.mr.changes()['changes'] + diff_files = [FilePatchInfo("", "", diff['diff'], diff['new_path']) for diff in diffs] return diff_files - def publish_comment(self, mr_comment: str): - self.mr.notes.create({'body': mr_comment}) + def get_files(self): + return [change['new_path'] for change in self.mr.changes()['changes']] + + def publish_comment(self, mr_comment: str, is_temporary: bool = False): + comment = self.mr.notes.create({'body': mr_comment}) + print(comment) + if is_temporary: + self.temp_comments.append(comment) + + def remove_initial_comment(self): + try: + for comment in self.temp_comments: + comment.delete() + except Exception as e: + logging.exception(f"Failed to remove temp comments, error: {e}") def get_title(self): return self.mr.title @@ -33,25 +57,44 @@ class GitLabProvider: return self.mr.description def get_languages(self): - # GitLab does not have a direct equivalent to get_languages(). - # An alternative could be to manually parse all the repository files and determine the language from the file extensions. - raise NotImplementedError - - def get_main_pr_language(self) -> str: - # Similar issue as get_languages(). - raise NotImplementedError + languages = self.gl.projects.get(self.id_project).languages() + return languages def get_pr_branch(self): return self.mr.source_branch - def get_notifications(self): - # GitLab doesn't provide a notifications API similar to GitHub's. - raise NotImplementedError + def get_pr_description(self): + return self.mr.description - @staticmethod - def _parse_merge_request_url(merge_request_url: str) -> Tuple[str, int]: - # This function will depend on your GitLab setup and URL structure - raise NotImplementedError + def _parse_merge_request_url(self, merge_request_url: str) -> Tuple[int, int]: + parsed_url = urlparse(merge_request_url) + + path_parts = parsed_url.path.strip('/').split('/') + print(path_parts) + if path_parts[-2] != 'merge_requests': + raise ValueError("The provided URL does not appear to be a GitLab merge request URL") + + try: + mr_id = int(path_parts[-1]) + except ValueError as e: + raise ValueError("Unable to convert merge request ID to integer") from e + + # Either user or group + namespace, project_name = path_parts[:2] + + # Workaround for gitlab API limitation + project_ids = [ + project.get_id() for project in self.gl.projects.list(search=project_name, simple=True) + if project.path_with_namespace == f"{namespace}/{project_name}" + ] + + if len(project_ids) == 0: + raise ValueError(f"Unable to find project with name {project_name}") + elif len(project_ids) > 1: + raise ValueError(f"Multiple projects found with name {namespace}/{project_name}") + + return project_ids[0], mr_id def _get_merge_request(self): - return self.gl.projects.get(self.project).mergerequests.get(self.mr_iid) + mr = self.gl.projects.get(self.id_project).mergerequests.get(self.id_mr) + return mr diff --git a/pr_agent/tools/pr_questions.py b/pr_agent/tools/pr_questions.py index 51cef14f..4cadf2d9 100644 --- a/pr_agent/tools/pr_questions.py +++ b/pr_agent/tools/pr_questions.py @@ -14,7 +14,9 @@ from pr_agent.git_providers import get_git_provider class PRQuestions: def __init__(self, pr_url: str, question_str: str, installation_id: Optional[int] = None): self.git_provider = get_git_provider()(pr_url, installation_id) - self.main_pr_language = self.git_provider.get_main_pr_language() + self.main_pr_language = self.git_provider.get_main_pr_language( + self.git_provider.get_languages(), self.git_provider.get_files() + ) self.installation_id = installation_id self.ai_handler = AiHandler() self.question_str = question_str @@ -22,7 +24,7 @@ class PRQuestions: "title": self.git_provider.pr.title, "branch": self.git_provider.get_pr_branch(), "description": self.git_provider.pr.body, - "language": self.git_provider.get_main_pr_language(), + "language": self.main_pr_language, "diff": "", # empty diff for initial calculation "questions": self.question_str, } diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index 214056a1..418b2b9d 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -17,7 +17,10 @@ class PRReviewer: def __init__(self, pr_url: str, installation_id: Optional[int] = None, cli_mode=False): self.git_provider = get_git_provider()(pr_url) - self.main_language = self.git_provider.get_main_pr_language() + print(dir(self.git_provider.pr)) + self.main_language = self.git_provider.get_main_pr_language( + self.git_provider.get_languages(), self.git_provider.get_files() + ) self.ai_handler = AiHandler() self.patches_diff = None self.prediction = None From 1a940799368a7717210cd34eb64171132f1a198d Mon Sep 17 00:00:00 2001 From: salberts Date: Fri, 7 Jul 2023 16:15:51 +0300 Subject: [PATCH 03/17] style --- pr_agent/tools/pr_questions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pr_agent/tools/pr_questions.py b/pr_agent/tools/pr_questions.py index 4cadf2d9..5b8c503f 100644 --- a/pr_agent/tools/pr_questions.py +++ b/pr_agent/tools/pr_questions.py @@ -25,7 +25,7 @@ class PRQuestions: "branch": self.git_provider.get_pr_branch(), "description": self.git_provider.pr.body, "language": self.main_pr_language, - "diff": "", # empty diff for initial calculation + "diff": "", # empty diff for initial calculation "questions": self.question_str, } self.token_handler = TokenHandler(self.git_provider.pr, From 7d49e080fcea3d4c57963e23381dc3ea9076ce56 Mon Sep 17 00:00:00 2001 From: salberts Date: Fri, 7 Jul 2023 16:24:02 +0300 Subject: [PATCH 04/17] remove prints --- pr_agent/git_providers/gitlab_provider.py | 2 -- pr_agent/tools/pr_reviewer.py | 1 - 2 files changed, 3 deletions(-) diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index 3c1c1e6e..a6bdd0ec 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -39,7 +39,6 @@ class GitLabProvider(GitProvider): def publish_comment(self, mr_comment: str, is_temporary: bool = False): comment = self.mr.notes.create({'body': mr_comment}) - print(comment) if is_temporary: self.temp_comments.append(comment) @@ -70,7 +69,6 @@ class GitLabProvider(GitProvider): parsed_url = urlparse(merge_request_url) path_parts = parsed_url.path.strip('/').split('/') - print(path_parts) if path_parts[-2] != 'merge_requests': raise ValueError("The provided URL does not appear to be a GitLab merge request URL") diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index 418b2b9d..2c0b606f 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -17,7 +17,6 @@ class PRReviewer: def __init__(self, pr_url: str, installation_id: Optional[int] = None, cli_mode=False): self.git_provider = get_git_provider()(pr_url) - print(dir(self.git_provider.pr)) self.main_language = self.git_provider.get_main_pr_language( self.git_provider.get_languages(), self.git_provider.get_files() ) From 8b3ff7a632fd15f5960aa8155f8a87c71abec9de Mon Sep 17 00:00:00 2001 From: salberts Date: Fri, 7 Jul 2023 16:31:28 +0300 Subject: [PATCH 05/17] bugfix --- pr_agent/git_providers/git_provider.py | 68 +++++++++++++------------- pr_agent/tools/pr_questions.py | 3 +- pr_agent/tools/pr_reviewer.py | 5 +- 3 files changed, 39 insertions(+), 37 deletions(-) diff --git a/pr_agent/git_providers/git_provider.py b/pr_agent/git_providers/git_provider.py index 30f7f7cd..b5d8df23 100644 --- a/pr_agent/git_providers/git_provider.py +++ b/pr_agent/git_providers/git_provider.py @@ -34,42 +34,42 @@ class GitProvider(ABC): def get_pr_description(self): pass - def get_main_pr_language(self, languages, files) -> str: - """ - Get the main language of the commit. Return an empty string if cannot determine. - """ - main_language_str = "" - try: - top_language = max(languages, key=languages.get).lower() +def get_main_pr_language(languages, files) -> str: + """ + Get the main language of the commit. Return an empty string if cannot determine. + """ + main_language_str = "" + try: + top_language = max(languages, key=languages.get).lower() - # validate that the specific commit uses the main language - extension_list = [] - for file in files: - extension_list.append(file.filename.rsplit('.')[-1]) + # validate that the specific commit uses the main language + extension_list = [] + for file in files: + extension_list.append(file.filename.rsplit('.')[-1]) - # get the most common extension - most_common_extension = max(set(extension_list), key=extension_list.count) + # get the most common extension + most_common_extension = max(set(extension_list), key=extension_list.count) - # look for a match. TBD: add more languages, do this systematically - if most_common_extension == 'py' and top_language == 'python' or \ - most_common_extension == 'js' and top_language == 'javascript' or \ - most_common_extension == 'ts' and top_language == 'typescript' or \ - most_common_extension == 'go' and top_language == 'go' or \ - most_common_extension == 'java' and top_language == 'java' or \ - most_common_extension == 'c' and top_language == 'c' or \ - most_common_extension == 'cpp' and top_language == 'c++' or \ - most_common_extension == 'cs' and top_language == 'c#' or \ - most_common_extension == 'swift' and top_language == 'swift' or \ - most_common_extension == 'php' and top_language == 'php' or \ - most_common_extension == 'rb' and top_language == 'ruby' or \ - most_common_extension == 'rs' and top_language == 'rust' or \ - most_common_extension == 'scala' and top_language == 'scala' or \ - most_common_extension == 'kt' and top_language == 'kotlin' or \ - most_common_extension == 'pl' and top_language == 'perl' or \ - most_common_extension == 'swift' and top_language == 'swift': - main_language_str = top_language + # look for a match. TBD: add more languages, do this systematically + if most_common_extension == 'py' and top_language == 'python' or \ + most_common_extension == 'js' and top_language == 'javascript' or \ + most_common_extension == 'ts' and top_language == 'typescript' or \ + most_common_extension == 'go' and top_language == 'go' or \ + most_common_extension == 'java' and top_language == 'java' or \ + most_common_extension == 'c' and top_language == 'c' or \ + most_common_extension == 'cpp' and top_language == 'c++' or \ + most_common_extension == 'cs' and top_language == 'c#' or \ + most_common_extension == 'swift' and top_language == 'swift' or \ + most_common_extension == 'php' and top_language == 'php' or \ + most_common_extension == 'rb' and top_language == 'ruby' or \ + most_common_extension == 'rs' and top_language == 'rust' or \ + most_common_extension == 'scala' and top_language == 'scala' or \ + most_common_extension == 'kt' and top_language == 'kotlin' or \ + most_common_extension == 'pl' and top_language == 'perl' or \ + most_common_extension == 'swift' and top_language == 'swift': + main_language_str = top_language - except Exception: - pass + except Exception: + pass - return main_language_str \ No newline at end of file + return main_language_str \ No newline at end of file diff --git a/pr_agent/tools/pr_questions.py b/pr_agent/tools/pr_questions.py index 5b8c503f..c36ce74e 100644 --- a/pr_agent/tools/pr_questions.py +++ b/pr_agent/tools/pr_questions.py @@ -9,12 +9,13 @@ from pr_agent.algo.pr_processing import get_pr_diff from pr_agent.algo.token_handler import TokenHandler from pr_agent.config_loader import settings from pr_agent.git_providers import get_git_provider +from pr_agent.git_providers.git_provider import get_main_pr_language class PRQuestions: def __init__(self, pr_url: str, question_str: str, installation_id: Optional[int] = None): self.git_provider = get_git_provider()(pr_url, installation_id) - self.main_pr_language = self.git_provider.get_main_pr_language( + self.main_pr_language = get_main_pr_language( self.git_provider.get_languages(), self.git_provider.get_files() ) self.installation_id = installation_id diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index 2c0b606f..ddc69e4d 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -10,14 +10,15 @@ from pr_agent.algo.pr_processing import get_pr_diff from pr_agent.algo.token_handler import TokenHandler from pr_agent.algo.utils import convert_to_markdown from pr_agent.config_loader import settings -from pr_agent.git_providers import get_git_provider +from pr_agent.git_providers import get_git_provider, +from pr_agent.git_provider import get_main_pr_language class PRReviewer: def __init__(self, pr_url: str, installation_id: Optional[int] = None, cli_mode=False): self.git_provider = get_git_provider()(pr_url) - self.main_language = self.git_provider.get_main_pr_language( + self.main_language = get_main_pr_language( self.git_provider.get_languages(), self.git_provider.get_files() ) self.ai_handler = AiHandler() From ee3cac98363c9636f57f82d533acd5ee8b9fad68 Mon Sep 17 00:00:00 2001 From: salberts Date: Fri, 7 Jul 2023 16:33:25 +0300 Subject: [PATCH 06/17] bugfix --- pr_agent/tools/pr_reviewer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index ddc69e4d..dd8d56d3 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -10,8 +10,8 @@ from pr_agent.algo.pr_processing import get_pr_diff from pr_agent.algo.token_handler import TokenHandler from pr_agent.algo.utils import convert_to_markdown from pr_agent.config_loader import settings -from pr_agent.git_providers import get_git_provider, -from pr_agent.git_provider import get_main_pr_language +from pr_agent.git_providers import get_git_provider +from pr_agent.git_providers.git_provider import get_main_pr_language class PRReviewer: From caff65613f9c1a2d7d33cd2a4f0d56e017a5ea02 Mon Sep 17 00:00:00 2001 From: salberts Date: Fri, 7 Jul 2023 16:36:56 +0300 Subject: [PATCH 07/17] docs --- pr_agent/git_providers/gitlab_provider.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index a6bdd0ec..b6b53eca 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -23,6 +23,7 @@ class GitLabProvider(GitProvider): @property def pr(self): + '''The GitLab terminology is merge request (MR) instead of pull request (PR)''' return self.mr def set_merge_request(self, merge_request_url: str): From e63a4f47ce6db3557664dd90f75abb09a1413d65 Mon Sep 17 00:00:00 2001 From: salberts Date: Fri, 7 Jul 2023 17:06:53 +0300 Subject: [PATCH 08/17] bugfixes --- pr_agent/tools/pr_questions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pr_agent/tools/pr_questions.py b/pr_agent/tools/pr_questions.py index c36ce74e..38cd68b8 100644 --- a/pr_agent/tools/pr_questions.py +++ b/pr_agent/tools/pr_questions.py @@ -14,7 +14,7 @@ from pr_agent.git_providers.git_provider import get_main_pr_language class PRQuestions: def __init__(self, pr_url: str, question_str: str, installation_id: Optional[int] = None): - self.git_provider = get_git_provider()(pr_url, installation_id) + self.git_provider = get_git_provider()(pr_url) self.main_pr_language = get_main_pr_language( self.git_provider.get_languages(), self.git_provider.get_files() ) @@ -24,7 +24,7 @@ class PRQuestions: self.vars = { "title": self.git_provider.pr.title, "branch": self.git_provider.get_pr_branch(), - "description": self.git_provider.pr.body, + "description": self.git_provider.get_description(), "language": self.main_pr_language, "diff": "", # empty diff for initial calculation "questions": self.question_str, From 75167c270062ec46bf4817d4e8b3fdabbdab346c Mon Sep 17 00:00:00 2001 From: salberts Date: Sat, 8 Jul 2023 08:52:11 +0300 Subject: [PATCH 09/17] add polling --- pr_agent/agent/pr_agent.py | 8 +-- pr_agent/config_loader.py | 1 + pr_agent/servers/gitlab_polling.py | 66 ++++++++++++++++++++++++ pr_agent/settings/.secrets_template.toml | 6 +-- pr_agent/settings/configuration.toml | 12 ++++- pr_agent/tools/pr_questions.py | 3 +- pr_agent/tools/pr_reviewer.py | 2 +- 7 files changed, 85 insertions(+), 13 deletions(-) create mode 100644 pr_agent/servers/gitlab_polling.py diff --git a/pr_agent/agent/pr_agent.py b/pr_agent/agent/pr_agent.py index 5958d15f..7e4a6c22 100644 --- a/pr_agent/agent/pr_agent.py +++ b/pr_agent/agent/pr_agent.py @@ -6,12 +6,12 @@ from pr_agent.tools.pr_reviewer import PRReviewer class PRAgent: - def __init__(self, installation_id: Optional[int] = None): - self.installation_id = installation_id + def __init__(self): + pass async def handle_request(self, pr_url, request): if 'please review' in request.lower() or 'review' == request.lower().strip() or len(request) == 0: - reviewer = PRReviewer(pr_url, self.installation_id) + reviewer = PRReviewer(pr_url) await reviewer.review() else: @@ -21,5 +21,5 @@ class PRAgent: question = re.split(r'(?i)answer', request)[1].strip() else: question = request - answerer = PRQuestions(pr_url, question, self.installation_id) + answerer = PRQuestions(pr_url, question) await answerer.answer() diff --git a/pr_agent/config_loader.py b/pr_agent/config_loader.py index 24dfdea7..550f743e 100644 --- a/pr_agent/config_loader.py +++ b/pr_agent/config_loader.py @@ -5,6 +5,7 @@ from dynaconf import Dynaconf current_dir = dirname(abspath(__file__)) settings = Dynaconf( envvar_prefix=False, + merge_enabled=True, settings_files=[join(current_dir, f) for f in [ "settings/.secrets.toml", "settings/configuration.toml", diff --git a/pr_agent/servers/gitlab_polling.py b/pr_agent/servers/gitlab_polling.py new file mode 100644 index 00000000..d86a7fe2 --- /dev/null +++ b/pr_agent/servers/gitlab_polling.py @@ -0,0 +1,66 @@ +import asyncio +import time +from urllib.parse import urlparse +import gitlab +from pr_agent.agent.pr_agent import PRAgent + +from pr_agent.config_loader import settings + + +gl = gitlab.Gitlab( + settings.get("GITLAB.URL"), + private_token=settings.get("GITLAB.PERSONAL_ACCESS_TOKEN") +) + +# Set the list of projects to monitor +projects_to_monitor = settings.get("GITLAB.PROJECTS_TO_MONITOR") +magic_word = settings.get("GITLAB.MAGIC_WORD") + +print(projects_to_monitor, magic_word, settings.get("GITLAB.PERSONAL_ACCESS_TOKEN")) +# Hold the previous seen comments +previous_comments = set() + + +def check_comments(): + print('Polling') + new_comments = {} + for project in projects_to_monitor: + project = gl.projects.get(project) + merge_requests = project.mergerequests.list(state='opened') + for mr in merge_requests: + notes = mr.notes.list(iterator=True) + for note in notes: + if note.id not in previous_comments and note.body.startswith(magic_word): + new_comments[note.id] = dict( + body=note.body[len(magic_word):], + project=project.name, + mr=mr + ) + previous_comments.add(note.id) + print(f"New comment in project {project.name}, merge request {mr.title}: {note.body}") + + return new_comments + + +def handle_new_comments(new_comments): + print('Handling new comments') + agent = PRAgent() + for _, comment in new_comments.items(): + print(f"Handling comment: {comment['body']}") + asyncio.run(agent.handle_request(comment['mr'].web_url, comment['body'])) + + +def run(): + + # Initial run to populate previous_comments + check_comments() + + # Run the check every minute + while True: + # time.sleep(60) + new_comments = check_comments() + if new_comments: + handle_new_comments(new_comments) + +if __name__ == '__main__': + run() diff --git a/pr_agent/settings/.secrets_template.toml b/pr_agent/settings/.secrets_template.toml index 82812734..9ed9ed15 100644 --- a/pr_agent/settings/.secrets_template.toml +++ b/pr_agent/settings/.secrets_template.toml @@ -27,8 +27,4 @@ app_id = 123456 # The GitHub App ID, replace with your own. webhook_secret = "" # Optional, may be commented out. [gitlab] -# The type of deployment to create. Valid values are 'app' or 'user'. -personal_access_token = "" - -# URL to the gitlab service -gitlab_url = "https://gitlab.com" \ No newline at end of file +personal_access_token = "" \ No newline at end of file diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 7368b1eb..086b96bb 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -12,4 +12,14 @@ extended_code_suggestions=false num_code_suggestions=4 -[pr_questions] \ No newline at end of file +[pr_questions] + +[gitlab] +# URL to the gitlab service +gitlab_url = "https://gitlab.com" + +# Polling +projects_to_monitor = [47494341] + +# Polling trigger +magic_word = "MagicRound" \ No newline at end of file diff --git a/pr_agent/tools/pr_questions.py b/pr_agent/tools/pr_questions.py index 38cd68b8..eb8e6bcb 100644 --- a/pr_agent/tools/pr_questions.py +++ b/pr_agent/tools/pr_questions.py @@ -13,12 +13,11 @@ from pr_agent.git_providers.git_provider import get_main_pr_language class PRQuestions: - def __init__(self, pr_url: str, question_str: str, installation_id: Optional[int] = None): + def __init__(self, pr_url: str, question_str: str): self.git_provider = get_git_provider()(pr_url) self.main_pr_language = get_main_pr_language( self.git_provider.get_languages(), self.git_provider.get_files() ) - self.installation_id = installation_id self.ai_handler = AiHandler() self.question_str = question_str self.vars = { diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index dd8d56d3..b43f274d 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -15,7 +15,7 @@ from pr_agent.git_providers.git_provider import get_main_pr_language class PRReviewer: - def __init__(self, pr_url: str, installation_id: Optional[int] = None, cli_mode=False): + def __init__(self, pr_url: str, cli_mode=False): self.git_provider = get_git_provider()(pr_url) self.main_language = get_main_pr_language( From d4adcb3c22df262bde9377328783e37e71d8f81e Mon Sep 17 00:00:00 2001 From: salberts Date: Sat, 8 Jul 2023 10:26:41 +0300 Subject: [PATCH 10/17] Configurable polling interval --- pr_agent/servers/gitlab_polling.py | 2 +- pr_agent/settings/configuration.toml | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/pr_agent/servers/gitlab_polling.py b/pr_agent/servers/gitlab_polling.py index d86a7fe2..529fae14 100644 --- a/pr_agent/servers/gitlab_polling.py +++ b/pr_agent/servers/gitlab_polling.py @@ -57,7 +57,7 @@ def run(): # Run the check every minute while True: - # time.sleep(60) + time.sleep(settings.get("GITLAB.POLLING_INTERVAL_SECONDS")) new_comments = check_comments() if new_comments: handle_new_comments(new_comments) diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 086b96bb..230b86e0 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -22,4 +22,7 @@ gitlab_url = "https://gitlab.com" projects_to_monitor = [47494341] # Polling trigger -magic_word = "MagicRound" \ No newline at end of file +magic_word = "MagicRound" + +# Polling interval +polling_interval_seconds = 60 \ No newline at end of file From 365559405fd6522d27323f8493bb963d06ca9ade Mon Sep 17 00:00:00 2001 From: salberts Date: Sat, 8 Jul 2023 11:46:41 +0300 Subject: [PATCH 11/17] Simplify gitlab project access --- pr_agent/git_providers/gitlab_provider.py | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index b6b53eca..2082fa33 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -78,21 +78,8 @@ class GitLabProvider(GitProvider): except ValueError as e: raise ValueError("Unable to convert merge request ID to integer") from e - # Either user or group - namespace, project_name = path_parts[:2] - - # Workaround for gitlab API limitation - project_ids = [ - project.get_id() for project in self.gl.projects.list(search=project_name, simple=True) - if project.path_with_namespace == f"{namespace}/{project_name}" - ] - - if len(project_ids) == 0: - raise ValueError(f"Unable to find project with name {project_name}") - elif len(project_ids) > 1: - raise ValueError(f"Multiple projects found with name {namespace}/{project_name}") - - return project_ids[0], mr_id + # Gitlab supports access by both project numeric ID as well as 'namespace/project_name' + return "/".join(path_parts[:2]), mr_id def _get_merge_request(self): mr = self.gl.projects.get(self.id_project).mergerequests.get(self.id_mr) From 9695d96799256720db445138e2d7f99a48609501 Mon Sep 17 00:00:00 2001 From: salberts Date: Sat, 8 Jul 2023 11:49:11 +0300 Subject: [PATCH 12/17] Simplify project identification --- pr_agent/settings/configuration.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 230b86e0..6a9d0694 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -18,11 +18,11 @@ num_code_suggestions=4 # URL to the gitlab service gitlab_url = "https://gitlab.com" -# Polling -projects_to_monitor = [47494341] +# Polling (either proheheject id or namespace/project_name) syntax can be used +projects_to_monitor = [47494341, 'my_user/my_project', 'my_group/my_project'] # Polling trigger -magic_word = "MagicRound" +magic_word = "AutoReview" # Polling interval polling_interval_seconds = 60 \ No newline at end of file From fb4badd160a3a31e0f4185c3f49698592ef960e2 Mon Sep 17 00:00:00 2001 From: salberts Date: Sat, 8 Jul 2023 12:14:32 +0300 Subject: [PATCH 13/17] changes --- pr_agent/servers/gitlab_polling.py | 5 ++--- pr_agent/settings/configuration.toml | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pr_agent/servers/gitlab_polling.py b/pr_agent/servers/gitlab_polling.py index 529fae14..4409d176 100644 --- a/pr_agent/servers/gitlab_polling.py +++ b/pr_agent/servers/gitlab_polling.py @@ -16,7 +16,6 @@ gl = gitlab.Gitlab( projects_to_monitor = settings.get("GITLAB.PROJECTS_TO_MONITOR") magic_word = settings.get("GITLAB.MAGIC_WORD") -print(projects_to_monitor, magic_word, settings.get("GITLAB.PERSONAL_ACCESS_TOKEN")) # Hold the previous seen comments previous_comments = set() @@ -28,7 +27,7 @@ def check_comments(): project = gl.projects.get(project) merge_requests = project.mergerequests.list(state='opened') for mr in merge_requests: - notes = mr.notes.list(iterator=True) + notes = mr.notes.list(get_all=True) for note in notes: if note.id not in previous_comments and note.body.startswith(magic_word): new_comments[note.id] = dict( @@ -51,7 +50,7 @@ def handle_new_comments(new_comments): def run(): - + assert settings.get('CONFIG.GIT_PROVIDER') == 'gitlab', 'This script is only for GitLab' # Initial run to populate previous_comments check_comments() diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 6a9d0694..9d5f1642 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -19,10 +19,10 @@ num_code_suggestions=4 gitlab_url = "https://gitlab.com" # Polling (either proheheject id or namespace/project_name) syntax can be used -projects_to_monitor = [47494341, 'my_user/my_project', 'my_group/my_project'] +projects_to_monitor = ['nuclai/algo', 'nuclai/pr-agent-test'] # Polling trigger magic_word = "AutoReview" # Polling interval -polling_interval_seconds = 60 \ No newline at end of file +polling_interval_seconds = 300 \ No newline at end of file From 567475c18c91b9780e44b2f117f50ca985935f13 Mon Sep 17 00:00:00 2001 From: Albert <1929813+Xyand@users.noreply.github.com> Date: Sat, 8 Jul 2023 15:29:05 +0300 Subject: [PATCH 14/17] Update pr_agent/settings/.secrets_template.toml Co-authored-by: Sergii Kovalev --- pr_agent/settings/.secrets_template.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pr_agent/settings/.secrets_template.toml b/pr_agent/settings/.secrets_template.toml index 82812734..fed6776f 100644 --- a/pr_agent/settings/.secrets_template.toml +++ b/pr_agent/settings/.secrets_template.toml @@ -27,7 +27,7 @@ app_id = 123456 # The GitHub App ID, replace with your own. webhook_secret = "" # Optional, may be commented out. [gitlab] -# The type of deployment to create. Valid values are 'app' or 'user'. +# Gitlab personal access token personal_access_token = "" # URL to the gitlab service From 301622216f82d19a708efe2a000df9fec23c1b3e Mon Sep 17 00:00:00 2001 From: mrT23 Date: Tue, 11 Jul 2023 08:50:28 +0300 Subject: [PATCH 15/17] Focused PR update --- README.md | 6 +++--- pr_agent/algo/utils.py | 2 +- pr_agent/settings/configuration.toml | 2 +- pr_agent/settings/pr_reviewer_prompts.toml | 10 +++++----- pr_agent/tools/pr_reviewer.py | 2 +- requirements.txt | 1 + tests/unit/test_convert_to_markdown.py | 10 ++++------ tests/unit/test_parse_code_suggestion.py | 6 +++--- 8 files changed, 19 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 084bd670..5b1e17af 100644 --- a/README.md +++ b/README.md @@ -186,7 +186,7 @@ Here is a quick overview of the different sub-tools of PR Reviewer: - PR description and title - PR type classification - Is the PR covered by relevant tests - - Is the PR minimal and focused + - Is this a focused PR - Are there security concerns - PR Feedback - General PR suggestions @@ -202,7 +202,7 @@ This is how a typical output of the PR Reviewer looks like: - ๐Ÿ” **Description and title:** Yes - ๐Ÿ“Œ **Type of PR:** Enhancement - ๐Ÿงช **Relevant tests added:** No -- โœจ **Minimal and focused:** Yes, the PR is focused on adding two new handlers for language extension and token counting. +- โœจ **Focused PR:** Yes, the PR is focused on adding two new handlers for language extension and token counting. - ๐Ÿ”’ **Security concerns:** No, the PR does not introduce possible security concerns or issues. #### PR Feedback @@ -245,7 +245,7 @@ The different tools and sub-tools used by CodiumAI pr-agent are easily configura You can enable/disable the different PR Reviewer sub-sections with the following flags: ``` -require_minimal_and_focused_review=true +require_focused_review=true require_tests_review=true require_security_review=true ``` diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index 70994fd8..045144d2 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -12,7 +12,7 @@ def convert_to_markdown(output_data: dict) -> str: "Type of PR": "๐Ÿ“Œ", "Relevant tests added": "๐Ÿงช", "Unrelated changes": "โš ๏ธ", - "Minimal and focused": "โœจ", + "Focused PR": "โœจ", "Security concerns": "๐Ÿ”’", "General PR suggestions": "๐Ÿ’ก", "Code suggestions": "๐Ÿค–" diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 7368b1eb..50b34cff 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -5,7 +5,7 @@ publish_review=true verbosity_level=0 # 0,1,2 [pr_reviewer] -require_minimal_and_focused_review=true +require_focused_review=true require_tests_review=true require_security_review=true extended_code_suggestions=false diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index 3b3549f0..678c7520 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -30,10 +30,10 @@ You must use the following JSON schema to format your answer: "description": "yes\\no question: does this PR have relevant tests ?" }, {%- endif %} -{%- if require_minimal_and_focused %} - "Minimal and focused": { +{%- if require_focused %} + "Focused PR": { "type": "string", - "description": "is this PR as minimal and focused as possible, with all code changes centered around a single coherent theme, described in the PR description and title ?" Make sure to explain your answer" + "description": "Is this a focused PR, in the sense that it has a clear and coherent title and description, and all PR code diff changes are properly derived from the title and description? Explain your response." } }, {%- endif %} @@ -106,8 +106,8 @@ Example output: {%- if require_tests %} "Relevant tests added": "No", {%- endif %} -{%- if require_minimal_and_focused %} - "Minimal and focused": "yes\\no, because ..." +{%- if require_focused %} + "Focused PR": "yes\\no, because ..." {%- endif %} }, "PR Feedback": diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index b39a231e..97d527c1 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -31,7 +31,7 @@ class PRReviewer: "diff": "", # empty diff for initial calculation "require_tests": settings.pr_reviewer.require_tests_review, "require_security": settings.pr_reviewer.require_security_review, - "require_minimal_and_focused": settings.pr_reviewer.require_minimal_and_focused_review, + "require_focused": settings.pr_reviewer.require_focused_review, 'extended_code_suggestions': settings.pr_reviewer.extended_code_suggestions, 'num_code_suggestions': settings.pr_reviewer.num_code_suggestions, } diff --git a/requirements.txt b/requirements.txt index d7610000..e7c6d4c1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,4 @@ openai==0.27.8 Jinja2==3.1.2 tiktoken==0.4.0 uvicorn==0.22.0 +pytest==7.4.0 \ No newline at end of file diff --git a/tests/unit/test_convert_to_markdown.py b/tests/unit/test_convert_to_markdown.py index 05c84a77..08a49f76 100644 --- a/tests/unit/test_convert_to_markdown.py +++ b/tests/unit/test_convert_to_markdown.py @@ -1,6 +1,6 @@ # Generated by CodiumAI from pr_agent.algo.utils import convert_to_markdown - +import pytest """ Code Analysis @@ -50,7 +50,7 @@ class TestConvertToMarkdown: 'Type of PR': 'Test type', 'Relevant tests added': 'no', 'Unrelated changes': 'n/a', # won't be included in the output - 'Minimal and focused': 'Yes', + 'Focused PR': 'Yes', 'General PR suggestions': 'general suggestion...', 'Code suggestions': [ { @@ -74,12 +74,11 @@ class TestConvertToMarkdown: - ๐Ÿ” **Description and title:** Test description - ๐Ÿ“Œ **Type of PR:** Test type - ๐Ÿงช **Relevant tests added:** no -- โœจ **Minimal and focused:** Yes +- โœจ **Focused PR:** Yes - ๐Ÿ’ก **General PR suggestions:** general suggestion... - ๐Ÿค– **Code suggestions:** -- **suggestion 1:** - **Code example:** - **Before:** ``` @@ -90,7 +89,6 @@ class TestConvertToMarkdown: Code after ``` -- **suggestion 2:** - **Code example:** - **Before:** ``` @@ -116,7 +114,7 @@ class TestConvertToMarkdown: 'Type of PR': {}, 'Relevant tests added': {}, 'Unrelated changes': {}, - 'Minimal and focused': {}, + 'Focused PR': {}, 'General PR suggestions': {}, 'Code suggestions': {} } diff --git a/tests/unit/test_parse_code_suggestion.py b/tests/unit/test_parse_code_suggestion.py index a0da856d..082fed77 100644 --- a/tests/unit/test_parse_code_suggestion.py +++ b/tests/unit/test_parse_code_suggestion.py @@ -47,7 +47,7 @@ class TestParseCodeSuggestion: "Suggestion number": "one", "Description": "This is a suggestion" } - expected_output = "- **suggestion one:**\n - **Description:** This is a suggestion\n\n" + expected_output = " **Description:** This is a suggestion\n\n" assert parse_code_suggestion(input_data) == expected_output # Tests that function returns correct output when 'before' or 'after' key has a non-string value @@ -70,7 +70,7 @@ class TestParseCodeSuggestion: 'before': 'Before 1', 'after': 'After 1' } - expected_output = "- **suggestion 1:**\n - **suggestion:** Suggestion 1\n - **description:** Description 1\n - **before:** Before 1\n - **after:** After 1\n\n" # noqa: E501 + expected_output = " **suggestion:** Suggestion 1\n **description:** Description 1\n **before:** Before 1\n **after:** After 1\n\n" assert parse_code_suggestion(code_suggestions) == expected_output # Tests that function returns correct output when input dictionary has 'code example' key @@ -84,5 +84,5 @@ class TestParseCodeSuggestion: 'after': 'After 2' } } - expected_output = "- **suggestion 2:**\n - **suggestion:** Suggestion 2\n - **description:** Description 2\n - **code example:**\n - **before:**\n ```\n Before 2\n ```\n - **after:**\n ```\n After 2\n ```\n\n" # noqa: E501 + expected_output = " **suggestion:** Suggestion 2\n **description:** Description 2\n - **code example:**\n - **before:**\n ```\n Before 2\n ```\n - **after:**\n ```\n After 2\n ```\n\n" assert parse_code_suggestion(code_suggestions) == expected_output From 38db65831ee48c51c7e325879f0749c9fb57004b Mon Sep 17 00:00:00 2001 From: Ilan Chemla Date: Tue, 11 Jul 2023 15:01:52 +0300 Subject: [PATCH 16/17] Fix secrets filename extension in README --- README.md | 8 ++++---- pr_agent/settings/.secrets_template.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 084bd670..a00989a9 100644 --- a/README.md +++ b/README.md @@ -86,8 +86,8 @@ pip install -r requirements.txt 3. Copy the secrets template file and fill in your OpenAI key and your GitHub user token: ``` -cp pr_agent/settings/.secrets_template.toml pr_agent/settings/.secrets -# Edit .secrets file +cp pr_agent/settings/.secrets_template.toml pr_agent/settings/.secrets.toml +# Edit .secrets.toml file ``` 4. Run the appropriate Python scripts from the scripts folder: @@ -147,8 +147,8 @@ git clone https://github.com/Codium-ai/pr-agent.git - Copy your app's webhook secret to the webhook_secret field. ``` -cp pr_agent/settings/.secrets_template.toml pr_agent/settings/.secrets -# Edit .secrets file +cp pr_agent/settings/.secrets_template.toml pr_agent/settings/.secrets.toml +# Edit .secrets.toml file ``` 6. Build a Docker image for the app and optionally push it to a Docker repository. We'll use Dockerhub as an example: diff --git a/pr_agent/settings/.secrets_template.toml b/pr_agent/settings/.secrets_template.toml index 59f4625b..420ad00e 100644 --- a/pr_agent/settings/.secrets_template.toml +++ b/pr_agent/settings/.secrets_template.toml @@ -1,5 +1,5 @@ # QUICKSTART: -# Copy this file to .secrets in the same folder. +# Copy this file to .secrets.toml in the same folder. # The minimum workable settings - set openai.key to your API key. # Set github.deployment_type to "user" and github.user_token to your GitHub personal access token. # This will allow you to run the CLI scripts in the scripts/ folder and the github_polling server. From b2d952cafacde80684f387d3a033a73435b84478 Mon Sep 17 00:00:00 2001 From: Ori Kotek Date: Tue, 11 Jul 2023 16:55:09 +0300 Subject: [PATCH 17/17] 1. Move deployment_type to configuration.toml 2. Lint 3. Inject GitHub app installation ID into GitHub provider using the settings mechanism. --- pr_agent/agent/pr_agent.py | 1 - pr_agent/algo/language_handler.py | 2 +- pr_agent/git_providers/git_provider.py | 13 ++++++++++--- pr_agent/git_providers/github_provider.py | 1 + pr_agent/git_providers/gitlab_provider.py | 22 ++++++++++++++-------- pr_agent/servers/github_app.py | 7 ++++--- pr_agent/servers/github_polling.py | 3 ++- pr_agent/servers/gitlab_polling.py | 5 ++--- pr_agent/settings/.secrets_template.toml | 5 ----- pr_agent/settings/configuration.toml | 11 +++++++---- pr_agent/tools/pr_questions.py | 1 - pr_agent/tools/pr_reviewer.py | 1 - requirements.txt | 2 ++ tests/unit/test_convert_to_markdown.py | 2 +- tests/unit/test_language_handler.py | 12 +++++++----- tests/unit/test_parse_code_suggestion.py | 4 ++-- 16 files changed, 53 insertions(+), 39 deletions(-) diff --git a/pr_agent/agent/pr_agent.py b/pr_agent/agent/pr_agent.py index 7e4a6c22..44815f56 100644 --- a/pr_agent/agent/pr_agent.py +++ b/pr_agent/agent/pr_agent.py @@ -1,5 +1,4 @@ import re -from typing import Optional from pr_agent.tools.pr_questions import PRQuestions from pr_agent.tools.pr_reviewer import PRReviewer diff --git a/pr_agent/algo/language_handler.py b/pr_agent/algo/language_handler.py index efc038ca..db99d20a 100644 --- a/pr_agent/algo/language_handler.py +++ b/pr_agent/algo/language_handler.py @@ -93,7 +93,7 @@ def sort_files_by_main_languages(languages: Dict, files: list): for ext in main_extensions: main_extensions_flat.extend(ext) - for extensions, lang in zip(main_extensions, languages_sorted_list): + for extensions, lang in zip(main_extensions, languages_sorted_list): # noqa: B905 tmp = [] for file in files_filtered: extension_str = f".{file.filename.split('.')[-1]}" diff --git a/pr_agent/git_providers/git_provider.py b/pr_agent/git_providers/git_provider.py index b5d8df23..a30df90b 100644 --- a/pr_agent/git_providers/git_provider.py +++ b/pr_agent/git_providers/git_provider.py @@ -1,5 +1,4 @@ - -from abc import ABC +from abc import ABC, abstractmethod from dataclasses import dataclass @@ -13,27 +12,35 @@ class FilePatchInfo: class GitProvider(ABC): + @abstractmethod def get_diff_files(self) -> list[FilePatchInfo]: pass + @abstractmethod def publish_comment(self, pr_comment: str, is_temporary: bool = False): pass + @abstractmethod def remove_initial_comment(self): pass + @abstractmethod def get_languages(self): pass + @abstractmethod def get_pr_branch(self): pass + @abstractmethod def get_user_id(self): pass + @abstractmethod def get_pr_description(self): pass + def get_main_pr_language(languages, files) -> str: """ Get the main language of the commit. Return an empty string if cannot determine. @@ -72,4 +79,4 @@ def get_main_pr_language(languages, files) -> str: except Exception: pass - return main_language_str \ No newline at end of file + return main_language_str diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 0b65c539..ecc624c7 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -6,6 +6,7 @@ from urllib.parse import urlparse from github import AppAuthentication, Github from pr_agent.config_loader import settings + from .git_provider import FilePatchInfo diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index 2082fa33..e9279a82 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -1,6 +1,8 @@ -from urllib.parse import urlparse -import gitlab +import logging from typing import Optional, Tuple +from urllib.parse import urlparse + +import gitlab from pr_agent.config_loader import settings @@ -9,24 +11,28 @@ from .git_provider import FilePatchInfo, GitProvider class GitLabProvider(GitProvider): def __init__(self, merge_request_url: Optional[str] = None): + gitlab_url = settings.get("GITLAB.URL", None) + if not gitlab_url: + raise ValueError("GitLab URL is not set in the config file") + gitlab_access_token = settings.get("GITLAB.PERSONAL_ACCESS_TOKEN", None) + if not gitlab_access_token: + raise ValueError("GitLab personal access token is not set in the config file") self.gl = gitlab.Gitlab( - settings.get("GITLAB.URL"), - private_token=settings.get("GITLAB.PERSONAL_ACCESS_TOKEN") + gitlab_url, + gitlab_access_token ) - self.id_project = None self.id_mr = None self.mr = None self.temp_comments = [] - - self.set_merge_request(merge_request_url) + self._set_merge_request(merge_request_url) @property def pr(self): '''The GitLab terminology is merge request (MR) instead of pull request (PR)''' return self.mr - def set_merge_request(self, merge_request_url: str): + def _set_merge_request(self, merge_request_url: str): self.id_project, self.id_mr = self._parse_merge_request_url(merge_request_url) self.mr = self._get_merge_request() diff --git a/pr_agent/servers/github_app.py b/pr_agent/servers/github_app.py index 6dc5782b..52425651 100644 --- a/pr_agent/servers/github_app.py +++ b/pr_agent/servers/github_app.py @@ -35,7 +35,8 @@ async def handle_github_webhooks(request: Request, response: Response): async def handle_request(body): action = body.get("action", None) installation_id = body.get("installation", {}).get("id", None) - agent = PRAgent(installation_id) + settings.set("GITHUB.INSTALLATION_ID", installation_id) + agent = PRAgent() if action == 'created': if "comment" not in body: return {} @@ -66,8 +67,8 @@ async def root(): def start(): - if settings.get("GITHUB.DEPLOYMENT_TYPE", "user") != "app": - raise Exception("Please set deployment type to app in .secrets.toml file") + # Override the deployment type to app + settings.set("GITHUB.DEPLOYMENT_TYPE", "app") app = FastAPI() app.include_router(router) diff --git a/pr_agent/servers/github_polling.py b/pr_agent/servers/github_polling.py index 06293fd6..e8cc4223 100644 --- a/pr_agent/servers/github_polling.py +++ b/pr_agent/servers/github_polling.py @@ -76,7 +76,8 @@ async def polling_loop(): if comment['user']['login'] == user_id: continue comment_body = comment['body'] if 'body' in comment else '' - commenter_github_user = comment['user']['login'] if 'user' in comment else '' + commenter_github_user = comment['user']['login'] \ + if 'user' in comment else '' logging.info(f"Commenter: {commenter_github_user}\nComment: {comment_body}") user_tag = "@" + user_id if user_tag not in comment_body: diff --git a/pr_agent/servers/gitlab_polling.py b/pr_agent/servers/gitlab_polling.py index 4409d176..c240310e 100644 --- a/pr_agent/servers/gitlab_polling.py +++ b/pr_agent/servers/gitlab_polling.py @@ -1,12 +1,11 @@ import asyncio import time -from urllib.parse import urlparse + import gitlab + from pr_agent.agent.pr_agent import PRAgent - from pr_agent.config_loader import settings - gl = gitlab.Gitlab( settings.get("GITLAB.URL"), private_token=settings.get("GITLAB.PERSONAL_ACCESS_TOKEN") diff --git a/pr_agent/settings/.secrets_template.toml b/pr_agent/settings/.secrets_template.toml index 7ce3d52f..eb2f9b76 100644 --- a/pr_agent/settings/.secrets_template.toml +++ b/pr_agent/settings/.secrets_template.toml @@ -11,9 +11,6 @@ key = "" # Acquire through https://platform.openai.com org = "" # Optional, may be commented out. [github] -# The type of deployment to create. Valid values are 'app' or 'user'. -deployment_type = "user" - # ---- Set the following only for deployment type == "user" user_token = "" # A GitHub personal access token with 'repo' scope. @@ -30,5 +27,3 @@ webhook_secret = "" # Optional, may be commented out. # Gitlab personal access token personal_access_token = "" -# URL to the gitlab service -gitlab_url = "https://gitlab.com" diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 6bb6db97..f0a646d1 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -11,18 +11,21 @@ require_security_review=true extended_code_suggestions=false num_code_suggestions=4 - [pr_questions] +[github] +# The type of deployment to create. Valid values are 'app' or 'user'. +deployment_type = "user" + [gitlab] # URL to the gitlab service gitlab_url = "https://gitlab.com" -# Polling (either proheheject id or namespace/project_name) syntax can be used -projects_to_monitor = ['nuclai/algo', 'nuclai/pr-agent-test'] +# Polling (either project id or namespace/project_name) syntax can be used +projects_to_monitor = ['org_name/repo_name'] # Polling trigger magic_word = "AutoReview" # Polling interval -polling_interval_seconds = 300 \ No newline at end of file +polling_interval_seconds = 30 diff --git a/pr_agent/tools/pr_questions.py b/pr_agent/tools/pr_questions.py index eb8e6bcb..1c26cf99 100644 --- a/pr_agent/tools/pr_questions.py +++ b/pr_agent/tools/pr_questions.py @@ -1,6 +1,5 @@ import copy import logging -from typing import Optional from jinja2 import Environment, StrictUndefined diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index f0a8c485..504548b1 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -1,7 +1,6 @@ import copy import json import logging -from typing import Optional from jinja2 import Environment, StrictUndefined diff --git a/requirements.txt b/requirements.txt index 4a6a6255..64134909 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,5 @@ Jinja2==3.1.2 tiktoken==0.4.0 uvicorn==0.22.0 python-gitlab==3.15.0 +pytest~=7.4.0 +aiohttp~=3.8.4 diff --git a/tests/unit/test_convert_to_markdown.py b/tests/unit/test_convert_to_markdown.py index 08a49f76..bfd9c9b5 100644 --- a/tests/unit/test_convert_to_markdown.py +++ b/tests/unit/test_convert_to_markdown.py @@ -1,6 +1,6 @@ # Generated by CodiumAI from pr_agent.algo.utils import convert_to_markdown -import pytest + """ Code Analysis diff --git a/tests/unit/test_language_handler.py b/tests/unit/test_language_handler.py index 5807b09b..875ec1a7 100644 --- a/tests/unit/test_language_handler.py +++ b/tests/unit/test_language_handler.py @@ -1,15 +1,15 @@ # Generated by CodiumAI + from pr_agent.algo.language_handler import sort_files_by_main_languages - -import pytest - """ Code Analysis Objective: -The objective of the function is to sort a list of files by their main language, putting the files that are in the main language first and the rest of the files after. It takes in a dictionary of languages and their sizes, and a list of files. +The objective of the function is to sort a list of files by their main language, putting the files that are in the main +language first and the rest of the files after. It takes in a dictionary of languages and their sizes, and a list of +files. Inputs: - languages: a dictionary containing the languages and their sizes @@ -33,6 +33,8 @@ Additional aspects: - The function uses the filter_bad_extensions function to filter out files with bad extensions - The function uses a rest_files dictionary to store the files that do not belong to any of the main extensions """ + + class TestSortFilesByMainLanguages: # Tests that files are sorted by main language, with files in main language first and the rest after def test_happy_path_sort_files_by_main_languages(self): @@ -118,4 +120,4 @@ class TestSortFilesByMainLanguages: {'language': 'C++', 'files': [files[2], files[7]]}, {'language': 'Other', 'files': []} ] - assert sort_files_by_main_languages(languages, files) == expected_output \ No newline at end of file + assert sort_files_by_main_languages(languages, files) == expected_output diff --git a/tests/unit/test_parse_code_suggestion.py b/tests/unit/test_parse_code_suggestion.py index 082fed77..87e3cac8 100644 --- a/tests/unit/test_parse_code_suggestion.py +++ b/tests/unit/test_parse_code_suggestion.py @@ -70,7 +70,7 @@ class TestParseCodeSuggestion: 'before': 'Before 1', 'after': 'After 1' } - expected_output = " **suggestion:** Suggestion 1\n **description:** Description 1\n **before:** Before 1\n **after:** After 1\n\n" + expected_output = " **suggestion:** Suggestion 1\n **description:** Description 1\n **before:** Before 1\n **after:** After 1\n\n" # noqa: E501 assert parse_code_suggestion(code_suggestions) == expected_output # Tests that function returns correct output when input dictionary has 'code example' key @@ -84,5 +84,5 @@ class TestParseCodeSuggestion: 'after': 'After 2' } } - expected_output = " **suggestion:** Suggestion 2\n **description:** Description 2\n - **code example:**\n - **before:**\n ```\n Before 2\n ```\n - **after:**\n ```\n After 2\n ```\n\n" + expected_output = " **suggestion:** Suggestion 2\n **description:** Description 2\n - **code example:**\n - **before:**\n ```\n Before 2\n ```\n - **after:**\n ```\n After 2\n ```\n\n" # noqa: E501 assert parse_code_suggestion(code_suggestions) == expected_output