diff --git a/pr_agent/git_providers/bitbucket_provider.py b/pr_agent/git_providers/bitbucket_provider.py index 56b9f711..fb8d24f9 100644 --- a/pr_agent/git_providers/bitbucket_provider.py +++ b/pr_agent/git_providers/bitbucket_provider.py @@ -14,8 +14,7 @@ from .git_provider import FilePatchInfo, GitProvider class BitbucketProvider(GitProvider): def __init__( - self, pr_url: Optional[str] = None, incremental: Optional[bool] = False - ): + self, pr_url: Optional[str] = None, incremental: Optional[bool] = False): s = requests.Session() try: bearer = context.get("bitbucket_bearer_token", None) @@ -32,12 +31,15 @@ class BitbucketProvider(GitProvider): self.repo = None self.pr_num = None self.pr = None + self.feature = None + self.issue_num = None + self.issue_name = None self.temp_comments = [] self.incremental = incremental - if pr_url: + if pr_url and 'pull' in pr_url: self.set_pr(pr_url) - self.bitbucket_comment_api_url = self.pr._BitbucketBase__data["links"]["comments"]["href"] - self.bitbucket_pull_request_api_url = self.pr._BitbucketBase__data["links"]['self']['href'] + self.bitbucket_comment_api_url = self.pr._BitbucketBase__data["links"]["comments"]["href"] + self.bitbucket_pull_request_api_url = self.pr._BitbucketBase__data["links"]['self']['href'] def get_repo_settings(self): try: @@ -228,6 +230,27 @@ class BitbucketProvider(GitProvider): raise ValueError("Unable to convert PR number to integer") from e return workspace_slug, repo_slug, pr_number + + @staticmethod + def _parse_issue_url(issue_url: str) -> Tuple[str, int]: + parsed_url = urlparse(issue_url) + + if "bitbucket.org" not in parsed_url.netloc: + raise ValueError("The provided URL is not a valid Bitbucket URL") + + path_parts = parsed_url.path.strip('/').split('/') + if len(path_parts) < 5 or path_parts[2] != "issues": + raise ValueError("The provided URL does not appear to be a Bitbucket issue URL") + + workspace_slug = path_parts[0] + repo_slug = path_parts[1] + try: + issue_number = int(path_parts[3]) + except ValueError as e: + raise ValueError("Unable to convert issue number to integer") from e + + return workspace_slug, repo_slug, issue_number + def _get_repo(self): if self.repo is None: @@ -263,3 +286,68 @@ class BitbucketProvider(GitProvider): # bitbucket does not support labels def get_labels(self): pass + + def get_issue(self, workspace_slug, repo_name, original_issue_number): + issue = self.bitbucket_client.repositories.get(workspace_slug, repo_name).issues.get(original_issue_number) + return issue + + def get_issue_url(self, issue): + return issue._BitbucketBase__data['links']['html']['href'] + + def get_issue_body(self, issue): + return issue.content['raw'] + + def get_issue_number(self, issue): + return issue.id + + def get_issue_comment_body(self, comment): + return comment['content']['raw'] + + def get_issue_comment_user(self, comment): + return comment['user']['display_name'] + + def get_issue_created_at(self, issue): + return str(issue.created_on) + + def get_username(self, issue, workspace_slug): + return workspace_slug + + + def get_repo_issues(self, repo_obj): + return repo_obj._Repository__issues.each() + + + def get_issues_comments(self, workspace_slug, repo_name, original_issue_number): + import requests + + url = f"https://api.bitbucket.org/2.0/repositories/{workspace_slug}/{repo_name}/issues/{original_issue_number}/comments" + + payload = {} + headers = {} + + response = requests.request("GET", url, headers=headers, data=payload) + return response.json()['values'] + + def create_issue_comment(self, similar_issues_str, workspace_slug, repo_name, original_issue_number): + url = f"https://api.bitbucket.org/2.0/repositories/{workspace_slug}/{repo_name}/issues/{original_issue_number}/comments" + payload = json.dumps({ + "content": { + "raw": similar_issues_str + } + }) + headers = { + 'Authorization': f'Bearer {get_settings().get("BITBUCKET.BEARER_TOKEN", None)}', + 'Content-Type': 'application/json' + } + + response = requests.request("POST", url, headers=headers, data=payload) + + def get_repo_obj(self, workspace_slug, repo_name): + return self.bitbucket_client.repositories.get(workspace_slug, repo_name) + + def get_repo_name_for_indexing(self, repo_obj): + return repo_obj._BitbucketBase__data['full_name'].lower().replace('/', '-').replace('_/', '-') + + def check_if_issue_pull_request(self, issue): + return False + diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index e5f62eb3..4a7ca48b 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -336,8 +336,9 @@ class GithubProvider(GitProvider): issue_number = int(path_parts[3]) except ValueError as e: raise ValueError("Unable to convert issue number to integer") from e + workspace_slug = None - return repo_name, issue_number + return workspace_slug, repo_name, issue_number def _get_github_client(self): deployment_type = get_settings().get("GITHUB.DEPLOYMENT_TYPE", "user") @@ -454,3 +455,56 @@ class GithubProvider(GitProvider): return pr_id except: return "" + + def get_repo_issues(self, repo_obj): + return list(repo_obj.get_issues(state='all')) + + def get_issues_comments(self, workspace_slug, repo_name, original_issue_number): + return self.repo_obj.get_issue(original_issue_number) + + def get_issue_url(self, issue): + return issue.html_url + + def create_issue_comment(self, similar_issues_str, workspace_slug, repo_name, original_issue_number): + try: + issue = self.repo_obj.get_issue(original_issue_number) + issue.create_comment(similar_issues_str) + except Exception as e: + logging.exception(f"Failed to create issue comment, error: {e}") + + def get_issue_body(self, issue): + return issue.body + + def get_issue_number(self, issue): + return issue.number + + def get_issues_comments(self, workspace_slug, repo_name, original_issue_number): + issue = self.repo_obj.get_issue(original_issue_number) + return list(issue.get_comments()) + + def get_issue_body(self, issue): + return issue.body + + def get_username(self, issue, workspace_slug): + return issue.user.login + + def get_issue_created_at(self, issue): + return str(issue.created_at) + + def get_issue_comment_body(self, comment): + return comment.body + + def get_issue(self, workspace_slug, repo_name, original_issue_number): + return self.repo_obj.get_issue(original_issue_number) + + def get_repo_obj(self, workspace_slug, repo_name): + return self.github_client.get_repo(repo_name) + + def get_repo_name_for_indexing(self, repo_obj): + return repo_obj.full_name.lower().replace('/', '-').replace('_/', '-') + + def check_if_issue_pull_request(self, issue): + if issue.pull_request: + return True + return False + diff --git a/pr_agent/tools/pr_similar_issue.py b/pr_agent/tools/pr_similar_issue.py index d7b6a799..9987c08a 100644 --- a/pr_agent/tools/pr_similar_issue.py +++ b/pr_agent/tools/pr_similar_issue.py @@ -19,29 +19,27 @@ MODEL = "text-embedding-ada-002" class PRSimilarIssue: def __init__(self, issue_url: str, args: list = None): - if get_settings().config.git_provider != "github": - raise Exception("Only github is supported for similar issue tool") self.cli_mode = get_settings().CONFIG.CLI_MODE self.max_issues_to_scan = get_settings().pr_similar_issue.max_issues_to_scan self.issue_url = issue_url self.git_provider = get_git_provider()() - repo_name, issue_number = self.git_provider._parse_issue_url(issue_url.split('=')[-1]) - self.git_provider.repo = repo_name - self.git_provider.repo_obj = self.git_provider.github_client.get_repo(repo_name) + self.workspace_slug, self.repo_name, self.issue_number = self.git_provider._parse_issue_url(issue_url.split('=')[-1]) + self.git_provider.repo = self.repo_name + self.git_provider.repo_obj = self.git_provider.get_repo_obj(self.workspace_slug, self.repo_name) self.token_handler = TokenHandler() repo_obj = self.git_provider.repo_obj - repo_name_for_index = self.repo_name_for_index = repo_obj.full_name.lower().replace('/', '-').replace('_/', '-') + repo_name_for_index = self.repo_name_for_index = self.git_provider.get_repo_name_for_indexing(repo_obj) index_name = self.index_name = "codium-ai-pr-agent-issues" # assuming pinecone api key and environment are set in secrets file try: - api_key = get_settings().pinecone.api_key - environment = get_settings().pinecone.environment + api_key = get_settings().github.api_key + environment = get_settings().github.environment except Exception: if not self.cli_mode: - repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1]) - issue_main = self.git_provider.repo_obj.get_issue(original_issue_number) + workspace_slug, repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1]) + issue_main = self.git_provider.get_issue(workspace_slug, repo_name, original_issue_number) issue_main.create_comment("Please set pinecone api key and environment in secrets file") raise Exception("Please set pinecone api key and environment in secrets file") @@ -65,19 +63,21 @@ class PRSimilarIssue: logging.info('Indexing the entire repo...') logging.info('Getting issues...') - issues = list(repo_obj.get_issues(state='all')) + issues = self.git_provider.get_repo_issues(repo_obj) logging.info('Done') self._update_index_with_issues(issues, repo_name_for_index, upsert=upsert) else: # update index if needed pinecone_index = pinecone.Index(index_name=index_name) issues_to_update = [] - issues_paginated_list = repo_obj.get_issues(state='all') + issues_paginated_list = [] + issues_paginated_list = self.git_provider.get_repo_issues(repo_obj) counter = 1 for issue in issues_paginated_list: - if issue.pull_request: + issue_pull_request = self.git_provider.check_if_issue_pull_request(issue) + if issue_pull_request: continue issue_str, comments, number = self._process_issue(issue) - issue_key = f"issue_{number}" + issue_key = f"issue_{number}" id = issue_key + "." + "issue" res = pinecone_index.fetch([id]).to_dict() is_new_issue = True @@ -99,8 +99,8 @@ class PRSimilarIssue: async def run(self): logging.info('Getting issue...') - repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1]) - issue_main = self.git_provider.repo_obj.get_issue(original_issue_number) + workspace_slug, repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1]) + issue_main = self.git_provider.get_issue(workspace_slug, repo_name, original_issue_number) issue_str, comments, number = self._process_issue(issue_main) openai.api_key = get_settings().openai.key logging.info('Done') @@ -132,25 +132,23 @@ class PRSimilarIssue: logging.info('Publishing response...') similar_issues_str = "### Similar Issues\n___\n\n" for i, issue_number_similar in enumerate(relevant_issues_number_list): - issue = self.git_provider.repo_obj.get_issue(issue_number_similar) + issue = self.git_provider.get_issue(workspace_slug, repo_name, issue_number_similar) title = issue.title - url = issue.html_url - if relevant_comment_number_list[i] != -1: - url = list(issue.get_comments())[relevant_comment_number_list[i]].html_url + url = self.git_provider.get_issue_url(issue) similar_issues_str += f"{i + 1}. **[{title}]({url})** (score={score_list[i]})\n\n" if get_settings().config.publish_output: - response = issue_main.create_comment(similar_issues_str) + response = self.git_provider.create_issue_comment(similar_issues_str, workspace_slug, repo_name, original_issue_number) logging.info(similar_issues_str) logging.info('Done') def _process_issue(self, issue): header = issue.title - body = issue.body - number = issue.number + body = self.git_provider.get_issue_body(issue) + number = self.git_provider.get_issue_number(issue) if get_settings().pr_similar_issue.skip_comments: comments = [] else: - comments = list(issue.get_comments()) + comments = self.git_provider.get_issues_comments(self.workspace_slug, self.repo_name, self.issue_number) issue_str = f"Issue Header: \"{header}\"\n\nIssue Body:\n{body}" return issue_str, comments, number @@ -158,7 +156,7 @@ class PRSimilarIssue: logging.info('Processing issues...') corpus = Corpus() example_issue_record = Record( - id=f"example_issue_{repo_name_for_index}", + id=str([issue.number for issue in issues_list]), text="example_issue", metadata=Metadata(repo=repo_name_for_index) ) @@ -166,7 +164,9 @@ class PRSimilarIssue: counter = 0 for issue in issues_list: - if issue.pull_request: + + issue_pull_request = self.git_provider.check_if_issue_pull_request(issue) + if issue_pull_request: continue counter += 1 @@ -178,8 +178,8 @@ class PRSimilarIssue: issue_str, comments, number = self._process_issue(issue) issue_key = f"issue_{number}" - username = issue.user.login - created_at = str(issue.created_at) + username = self.git_provider.get_username(issue, self.workspace_slug) + created_at = self.git_provider.get_issue_created_at(issue) if len(issue_str) < 8000 or \ self.token_handler.count_tokens(issue_str) < MAX_TOKENS[MODEL]: # fast reject first issue_record = Record( @@ -193,7 +193,7 @@ class PRSimilarIssue: corpus.append(issue_record) if comments: for j, comment in enumerate(comments): - comment_body = comment.body + comment_body = self.git_provider.get_issue_comment_body(comment) num_words_comment = len(comment_body.split()) if num_words_comment < 10 or not isinstance(comment_body, str): continue @@ -233,8 +233,8 @@ class PRSimilarIssue: ds = Dataset.from_pandas(df, meta) logging.info('Done') - api_key = get_settings().pinecone.api_key - environment = get_settings().pinecone.environment + api_key = get_settings().github.api_key + environment = get_settings().github.environment if not upsert: logging.info('Creating index from scratch...') ds.to_pinecone_index(self.index_name, api_key=api_key, environment=environment)