bitbucket similar issue

This commit is contained in:
sarbjitgrewal
2023-10-17 11:32:37 +05:30
parent 91afd29aef
commit 4cc9ab5bc6
3 changed files with 179 additions and 37 deletions

View File

@ -14,8 +14,7 @@ from .git_provider import FilePatchInfo, GitProvider
class BitbucketProvider(GitProvider): class BitbucketProvider(GitProvider):
def __init__( def __init__(
self, pr_url: Optional[str] = None, incremental: Optional[bool] = False self, pr_url: Optional[str] = None, incremental: Optional[bool] = False):
):
s = requests.Session() s = requests.Session()
try: try:
bearer = context.get("bitbucket_bearer_token", None) bearer = context.get("bitbucket_bearer_token", None)
@ -32,9 +31,12 @@ class BitbucketProvider(GitProvider):
self.repo = None self.repo = None
self.pr_num = None self.pr_num = None
self.pr = None self.pr = None
self.feature = None
self.issue_num = None
self.issue_name = None
self.temp_comments = [] self.temp_comments = []
self.incremental = incremental self.incremental = incremental
if pr_url: if pr_url and 'pull' in pr_url:
self.set_pr(pr_url) self.set_pr(pr_url)
self.bitbucket_comment_api_url = self.pr._BitbucketBase__data["links"]["comments"]["href"] self.bitbucket_comment_api_url = self.pr._BitbucketBase__data["links"]["comments"]["href"]
self.bitbucket_pull_request_api_url = self.pr._BitbucketBase__data["links"]['self']['href'] self.bitbucket_pull_request_api_url = self.pr._BitbucketBase__data["links"]['self']['href']
@ -229,6 +231,27 @@ class BitbucketProvider(GitProvider):
return workspace_slug, repo_slug, pr_number return workspace_slug, repo_slug, pr_number
@staticmethod
def _parse_issue_url(issue_url: str) -> Tuple[str, int]:
parsed_url = urlparse(issue_url)
if "bitbucket.org" not in parsed_url.netloc:
raise ValueError("The provided URL is not a valid Bitbucket URL")
path_parts = parsed_url.path.strip('/').split('/')
if len(path_parts) < 5 or path_parts[2] != "issues":
raise ValueError("The provided URL does not appear to be a Bitbucket issue URL")
workspace_slug = path_parts[0]
repo_slug = path_parts[1]
try:
issue_number = int(path_parts[3])
except ValueError as e:
raise ValueError("Unable to convert issue number to integer") from e
return workspace_slug, repo_slug, issue_number
def _get_repo(self): def _get_repo(self):
if self.repo is None: if self.repo is None:
self.repo = self.bitbucket_client.workspaces.get( self.repo = self.bitbucket_client.workspaces.get(
@ -263,3 +286,68 @@ class BitbucketProvider(GitProvider):
# bitbucket does not support labels # bitbucket does not support labels
def get_labels(self): def get_labels(self):
pass pass
def get_issue(self, workspace_slug, repo_name, original_issue_number):
issue = self.bitbucket_client.repositories.get(workspace_slug, repo_name).issues.get(original_issue_number)
return issue
def get_issue_url(self, issue):
return issue._BitbucketBase__data['links']['html']['href']
def get_issue_body(self, issue):
return issue.content['raw']
def get_issue_number(self, issue):
return issue.id
def get_issue_comment_body(self, comment):
return comment['content']['raw']
def get_issue_comment_user(self, comment):
return comment['user']['display_name']
def get_issue_created_at(self, issue):
return str(issue.created_on)
def get_username(self, issue, workspace_slug):
return workspace_slug
def get_repo_issues(self, repo_obj):
return repo_obj._Repository__issues.each()
def get_issues_comments(self, workspace_slug, repo_name, original_issue_number):
import requests
url = f"https://api.bitbucket.org/2.0/repositories/{workspace_slug}/{repo_name}/issues/{original_issue_number}/comments"
payload = {}
headers = {}
response = requests.request("GET", url, headers=headers, data=payload)
return response.json()['values']
def create_issue_comment(self, similar_issues_str, workspace_slug, repo_name, original_issue_number):
url = f"https://api.bitbucket.org/2.0/repositories/{workspace_slug}/{repo_name}/issues/{original_issue_number}/comments"
payload = json.dumps({
"content": {
"raw": similar_issues_str
}
})
headers = {
'Authorization': f'Bearer {get_settings().get("BITBUCKET.BEARER_TOKEN", None)}',
'Content-Type': 'application/json'
}
response = requests.request("POST", url, headers=headers, data=payload)
def get_repo_obj(self, workspace_slug, repo_name):
return self.bitbucket_client.repositories.get(workspace_slug, repo_name)
def get_repo_name_for_indexing(self, repo_obj):
return repo_obj._BitbucketBase__data['full_name'].lower().replace('/', '-').replace('_/', '-')
def check_if_issue_pull_request(self, issue):
return False

View File

@ -336,8 +336,9 @@ class GithubProvider(GitProvider):
issue_number = int(path_parts[3]) issue_number = int(path_parts[3])
except ValueError as e: except ValueError as e:
raise ValueError("Unable to convert issue number to integer") from e raise ValueError("Unable to convert issue number to integer") from e
workspace_slug = None
return repo_name, issue_number return workspace_slug, repo_name, issue_number
def _get_github_client(self): def _get_github_client(self):
deployment_type = get_settings().get("GITHUB.DEPLOYMENT_TYPE", "user") deployment_type = get_settings().get("GITHUB.DEPLOYMENT_TYPE", "user")
@ -454,3 +455,56 @@ class GithubProvider(GitProvider):
return pr_id return pr_id
except: except:
return "" return ""
def get_repo_issues(self, repo_obj):
return list(repo_obj.get_issues(state='all'))
def get_issues_comments(self, workspace_slug, repo_name, original_issue_number):
return self.repo_obj.get_issue(original_issue_number)
def get_issue_url(self, issue):
return issue.html_url
def create_issue_comment(self, similar_issues_str, workspace_slug, repo_name, original_issue_number):
try:
issue = self.repo_obj.get_issue(original_issue_number)
issue.create_comment(similar_issues_str)
except Exception as e:
logging.exception(f"Failed to create issue comment, error: {e}")
def get_issue_body(self, issue):
return issue.body
def get_issue_number(self, issue):
return issue.number
def get_issues_comments(self, workspace_slug, repo_name, original_issue_number):
issue = self.repo_obj.get_issue(original_issue_number)
return list(issue.get_comments())
def get_issue_body(self, issue):
return issue.body
def get_username(self, issue, workspace_slug):
return issue.user.login
def get_issue_created_at(self, issue):
return str(issue.created_at)
def get_issue_comment_body(self, comment):
return comment.body
def get_issue(self, workspace_slug, repo_name, original_issue_number):
return self.repo_obj.get_issue(original_issue_number)
def get_repo_obj(self, workspace_slug, repo_name):
return self.github_client.get_repo(repo_name)
def get_repo_name_for_indexing(self, repo_obj):
return repo_obj.full_name.lower().replace('/', '-').replace('_/', '-')
def check_if_issue_pull_request(self, issue):
if issue.pull_request:
return True
return False

View File

@ -19,29 +19,27 @@ MODEL = "text-embedding-ada-002"
class PRSimilarIssue: class PRSimilarIssue:
def __init__(self, issue_url: str, args: list = None): def __init__(self, issue_url: str, args: list = None):
if get_settings().config.git_provider != "github":
raise Exception("Only github is supported for similar issue tool")
self.cli_mode = get_settings().CONFIG.CLI_MODE self.cli_mode = get_settings().CONFIG.CLI_MODE
self.max_issues_to_scan = get_settings().pr_similar_issue.max_issues_to_scan self.max_issues_to_scan = get_settings().pr_similar_issue.max_issues_to_scan
self.issue_url = issue_url self.issue_url = issue_url
self.git_provider = get_git_provider()() self.git_provider = get_git_provider()()
repo_name, issue_number = self.git_provider._parse_issue_url(issue_url.split('=')[-1]) self.workspace_slug, self.repo_name, self.issue_number = self.git_provider._parse_issue_url(issue_url.split('=')[-1])
self.git_provider.repo = repo_name self.git_provider.repo = self.repo_name
self.git_provider.repo_obj = self.git_provider.github_client.get_repo(repo_name) self.git_provider.repo_obj = self.git_provider.get_repo_obj(self.workspace_slug, self.repo_name)
self.token_handler = TokenHandler() self.token_handler = TokenHandler()
repo_obj = self.git_provider.repo_obj repo_obj = self.git_provider.repo_obj
repo_name_for_index = self.repo_name_for_index = repo_obj.full_name.lower().replace('/', '-').replace('_/', '-') repo_name_for_index = self.repo_name_for_index = self.git_provider.get_repo_name_for_indexing(repo_obj)
index_name = self.index_name = "codium-ai-pr-agent-issues" index_name = self.index_name = "codium-ai-pr-agent-issues"
# assuming pinecone api key and environment are set in secrets file # assuming pinecone api key and environment are set in secrets file
try: try:
api_key = get_settings().pinecone.api_key api_key = get_settings().github.api_key
environment = get_settings().pinecone.environment environment = get_settings().github.environment
except Exception: except Exception:
if not self.cli_mode: if not self.cli_mode:
repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1]) workspace_slug, repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1])
issue_main = self.git_provider.repo_obj.get_issue(original_issue_number) issue_main = self.git_provider.get_issue(workspace_slug, repo_name, original_issue_number)
issue_main.create_comment("Please set pinecone api key and environment in secrets file") issue_main.create_comment("Please set pinecone api key and environment in secrets file")
raise Exception("Please set pinecone api key and environment in secrets file") raise Exception("Please set pinecone api key and environment in secrets file")
@ -65,16 +63,18 @@ class PRSimilarIssue:
logging.info('Indexing the entire repo...') logging.info('Indexing the entire repo...')
logging.info('Getting issues...') logging.info('Getting issues...')
issues = list(repo_obj.get_issues(state='all')) issues = self.git_provider.get_repo_issues(repo_obj)
logging.info('Done') logging.info('Done')
self._update_index_with_issues(issues, repo_name_for_index, upsert=upsert) self._update_index_with_issues(issues, repo_name_for_index, upsert=upsert)
else: # update index if needed else: # update index if needed
pinecone_index = pinecone.Index(index_name=index_name) pinecone_index = pinecone.Index(index_name=index_name)
issues_to_update = [] issues_to_update = []
issues_paginated_list = repo_obj.get_issues(state='all') issues_paginated_list = []
issues_paginated_list = self.git_provider.get_repo_issues(repo_obj)
counter = 1 counter = 1
for issue in issues_paginated_list: for issue in issues_paginated_list:
if issue.pull_request: issue_pull_request = self.git_provider.check_if_issue_pull_request(issue)
if issue_pull_request:
continue continue
issue_str, comments, number = self._process_issue(issue) issue_str, comments, number = self._process_issue(issue)
issue_key = f"issue_{number}" issue_key = f"issue_{number}"
@ -99,8 +99,8 @@ class PRSimilarIssue:
async def run(self): async def run(self):
logging.info('Getting issue...') logging.info('Getting issue...')
repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1]) workspace_slug, repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1])
issue_main = self.git_provider.repo_obj.get_issue(original_issue_number) issue_main = self.git_provider.get_issue(workspace_slug, repo_name, original_issue_number)
issue_str, comments, number = self._process_issue(issue_main) issue_str, comments, number = self._process_issue(issue_main)
openai.api_key = get_settings().openai.key openai.api_key = get_settings().openai.key
logging.info('Done') logging.info('Done')
@ -132,25 +132,23 @@ class PRSimilarIssue:
logging.info('Publishing response...') logging.info('Publishing response...')
similar_issues_str = "### Similar Issues\n___\n\n" similar_issues_str = "### Similar Issues\n___\n\n"
for i, issue_number_similar in enumerate(relevant_issues_number_list): for i, issue_number_similar in enumerate(relevant_issues_number_list):
issue = self.git_provider.repo_obj.get_issue(issue_number_similar) issue = self.git_provider.get_issue(workspace_slug, repo_name, issue_number_similar)
title = issue.title title = issue.title
url = issue.html_url url = self.git_provider.get_issue_url(issue)
if relevant_comment_number_list[i] != -1:
url = list(issue.get_comments())[relevant_comment_number_list[i]].html_url
similar_issues_str += f"{i + 1}. **[{title}]({url})** (score={score_list[i]})\n\n" similar_issues_str += f"{i + 1}. **[{title}]({url})** (score={score_list[i]})\n\n"
if get_settings().config.publish_output: if get_settings().config.publish_output:
response = issue_main.create_comment(similar_issues_str) response = self.git_provider.create_issue_comment(similar_issues_str, workspace_slug, repo_name, original_issue_number)
logging.info(similar_issues_str) logging.info(similar_issues_str)
logging.info('Done') logging.info('Done')
def _process_issue(self, issue): def _process_issue(self, issue):
header = issue.title header = issue.title
body = issue.body body = self.git_provider.get_issue_body(issue)
number = issue.number number = self.git_provider.get_issue_number(issue)
if get_settings().pr_similar_issue.skip_comments: if get_settings().pr_similar_issue.skip_comments:
comments = [] comments = []
else: else:
comments = list(issue.get_comments()) comments = self.git_provider.get_issues_comments(self.workspace_slug, self.repo_name, self.issue_number)
issue_str = f"Issue Header: \"{header}\"\n\nIssue Body:\n{body}" issue_str = f"Issue Header: \"{header}\"\n\nIssue Body:\n{body}"
return issue_str, comments, number return issue_str, comments, number
@ -158,7 +156,7 @@ class PRSimilarIssue:
logging.info('Processing issues...') logging.info('Processing issues...')
corpus = Corpus() corpus = Corpus()
example_issue_record = Record( example_issue_record = Record(
id=f"example_issue_{repo_name_for_index}", id=str([issue.number for issue in issues_list]),
text="example_issue", text="example_issue",
metadata=Metadata(repo=repo_name_for_index) metadata=Metadata(repo=repo_name_for_index)
) )
@ -166,7 +164,9 @@ class PRSimilarIssue:
counter = 0 counter = 0
for issue in issues_list: for issue in issues_list:
if issue.pull_request:
issue_pull_request = self.git_provider.check_if_issue_pull_request(issue)
if issue_pull_request:
continue continue
counter += 1 counter += 1
@ -178,8 +178,8 @@ class PRSimilarIssue:
issue_str, comments, number = self._process_issue(issue) issue_str, comments, number = self._process_issue(issue)
issue_key = f"issue_{number}" issue_key = f"issue_{number}"
username = issue.user.login username = self.git_provider.get_username(issue, self.workspace_slug)
created_at = str(issue.created_at) created_at = self.git_provider.get_issue_created_at(issue)
if len(issue_str) < 8000 or \ if len(issue_str) < 8000 or \
self.token_handler.count_tokens(issue_str) < MAX_TOKENS[MODEL]: # fast reject first self.token_handler.count_tokens(issue_str) < MAX_TOKENS[MODEL]: # fast reject first
issue_record = Record( issue_record = Record(
@ -193,7 +193,7 @@ class PRSimilarIssue:
corpus.append(issue_record) corpus.append(issue_record)
if comments: if comments:
for j, comment in enumerate(comments): for j, comment in enumerate(comments):
comment_body = comment.body comment_body = self.git_provider.get_issue_comment_body(comment)
num_words_comment = len(comment_body.split()) num_words_comment = len(comment_body.split())
if num_words_comment < 10 or not isinstance(comment_body, str): if num_words_comment < 10 or not isinstance(comment_body, str):
continue continue
@ -233,8 +233,8 @@ class PRSimilarIssue:
ds = Dataset.from_pandas(df, meta) ds = Dataset.from_pandas(df, meta)
logging.info('Done') logging.info('Done')
api_key = get_settings().pinecone.api_key api_key = get_settings().github.api_key
environment = get_settings().pinecone.environment environment = get_settings().github.environment
if not upsert: if not upsert:
logging.info('Creating index from scratch...') logging.info('Creating index from scratch...')
ds.to_pinecone_index(self.index_name, api_key=api_key, environment=environment) ds.to_pinecone_index(self.index_name, api_key=api_key, environment=environment)