bitbucket similar issue

This commit is contained in:
sarbjitgrewal
2023-10-17 11:32:37 +05:30
parent 91afd29aef
commit 4cc9ab5bc6
3 changed files with 179 additions and 37 deletions

View File

@ -14,8 +14,7 @@ from .git_provider import FilePatchInfo, GitProvider
class BitbucketProvider(GitProvider):
def __init__(
self, pr_url: Optional[str] = None, incremental: Optional[bool] = False
):
self, pr_url: Optional[str] = None, incremental: Optional[bool] = False):
s = requests.Session()
try:
bearer = context.get("bitbucket_bearer_token", None)
@ -32,12 +31,15 @@ class BitbucketProvider(GitProvider):
self.repo = None
self.pr_num = None
self.pr = None
self.feature = None
self.issue_num = None
self.issue_name = None
self.temp_comments = []
self.incremental = incremental
if pr_url:
if pr_url and 'pull' in pr_url:
self.set_pr(pr_url)
self.bitbucket_comment_api_url = self.pr._BitbucketBase__data["links"]["comments"]["href"]
self.bitbucket_pull_request_api_url = self.pr._BitbucketBase__data["links"]['self']['href']
self.bitbucket_comment_api_url = self.pr._BitbucketBase__data["links"]["comments"]["href"]
self.bitbucket_pull_request_api_url = self.pr._BitbucketBase__data["links"]['self']['href']
def get_repo_settings(self):
try:
@ -229,6 +231,27 @@ class BitbucketProvider(GitProvider):
return workspace_slug, repo_slug, pr_number
@staticmethod
def _parse_issue_url(issue_url: str) -> Tuple[str, int]:
parsed_url = urlparse(issue_url)
if "bitbucket.org" not in parsed_url.netloc:
raise ValueError("The provided URL is not a valid Bitbucket URL")
path_parts = parsed_url.path.strip('/').split('/')
if len(path_parts) < 5 or path_parts[2] != "issues":
raise ValueError("The provided URL does not appear to be a Bitbucket issue URL")
workspace_slug = path_parts[0]
repo_slug = path_parts[1]
try:
issue_number = int(path_parts[3])
except ValueError as e:
raise ValueError("Unable to convert issue number to integer") from e
return workspace_slug, repo_slug, issue_number
def _get_repo(self):
if self.repo is None:
self.repo = self.bitbucket_client.workspaces.get(
@ -263,3 +286,68 @@ class BitbucketProvider(GitProvider):
# bitbucket does not support labels
def get_labels(self):
pass
def get_issue(self, workspace_slug, repo_name, original_issue_number):
issue = self.bitbucket_client.repositories.get(workspace_slug, repo_name).issues.get(original_issue_number)
return issue
def get_issue_url(self, issue):
return issue._BitbucketBase__data['links']['html']['href']
def get_issue_body(self, issue):
return issue.content['raw']
def get_issue_number(self, issue):
return issue.id
def get_issue_comment_body(self, comment):
return comment['content']['raw']
def get_issue_comment_user(self, comment):
return comment['user']['display_name']
def get_issue_created_at(self, issue):
return str(issue.created_on)
def get_username(self, issue, workspace_slug):
return workspace_slug
def get_repo_issues(self, repo_obj):
return repo_obj._Repository__issues.each()
def get_issues_comments(self, workspace_slug, repo_name, original_issue_number):
import requests
url = f"https://api.bitbucket.org/2.0/repositories/{workspace_slug}/{repo_name}/issues/{original_issue_number}/comments"
payload = {}
headers = {}
response = requests.request("GET", url, headers=headers, data=payload)
return response.json()['values']
def create_issue_comment(self, similar_issues_str, workspace_slug, repo_name, original_issue_number):
url = f"https://api.bitbucket.org/2.0/repositories/{workspace_slug}/{repo_name}/issues/{original_issue_number}/comments"
payload = json.dumps({
"content": {
"raw": similar_issues_str
}
})
headers = {
'Authorization': f'Bearer {get_settings().get("BITBUCKET.BEARER_TOKEN", None)}',
'Content-Type': 'application/json'
}
response = requests.request("POST", url, headers=headers, data=payload)
def get_repo_obj(self, workspace_slug, repo_name):
return self.bitbucket_client.repositories.get(workspace_slug, repo_name)
def get_repo_name_for_indexing(self, repo_obj):
return repo_obj._BitbucketBase__data['full_name'].lower().replace('/', '-').replace('_/', '-')
def check_if_issue_pull_request(self, issue):
return False

View File

@ -336,8 +336,9 @@ class GithubProvider(GitProvider):
issue_number = int(path_parts[3])
except ValueError as e:
raise ValueError("Unable to convert issue number to integer") from e
workspace_slug = None
return repo_name, issue_number
return workspace_slug, repo_name, issue_number
def _get_github_client(self):
deployment_type = get_settings().get("GITHUB.DEPLOYMENT_TYPE", "user")
@ -454,3 +455,56 @@ class GithubProvider(GitProvider):
return pr_id
except:
return ""
def get_repo_issues(self, repo_obj):
return list(repo_obj.get_issues(state='all'))
def get_issues_comments(self, workspace_slug, repo_name, original_issue_number):
return self.repo_obj.get_issue(original_issue_number)
def get_issue_url(self, issue):
return issue.html_url
def create_issue_comment(self, similar_issues_str, workspace_slug, repo_name, original_issue_number):
try:
issue = self.repo_obj.get_issue(original_issue_number)
issue.create_comment(similar_issues_str)
except Exception as e:
logging.exception(f"Failed to create issue comment, error: {e}")
def get_issue_body(self, issue):
return issue.body
def get_issue_number(self, issue):
return issue.number
def get_issues_comments(self, workspace_slug, repo_name, original_issue_number):
issue = self.repo_obj.get_issue(original_issue_number)
return list(issue.get_comments())
def get_issue_body(self, issue):
return issue.body
def get_username(self, issue, workspace_slug):
return issue.user.login
def get_issue_created_at(self, issue):
return str(issue.created_at)
def get_issue_comment_body(self, comment):
return comment.body
def get_issue(self, workspace_slug, repo_name, original_issue_number):
return self.repo_obj.get_issue(original_issue_number)
def get_repo_obj(self, workspace_slug, repo_name):
return self.github_client.get_repo(repo_name)
def get_repo_name_for_indexing(self, repo_obj):
return repo_obj.full_name.lower().replace('/', '-').replace('_/', '-')
def check_if_issue_pull_request(self, issue):
if issue.pull_request:
return True
return False

View File

@ -19,29 +19,27 @@ MODEL = "text-embedding-ada-002"
class PRSimilarIssue:
def __init__(self, issue_url: str, args: list = None):
if get_settings().config.git_provider != "github":
raise Exception("Only github is supported for similar issue tool")
self.cli_mode = get_settings().CONFIG.CLI_MODE
self.max_issues_to_scan = get_settings().pr_similar_issue.max_issues_to_scan
self.issue_url = issue_url
self.git_provider = get_git_provider()()
repo_name, issue_number = self.git_provider._parse_issue_url(issue_url.split('=')[-1])
self.git_provider.repo = repo_name
self.git_provider.repo_obj = self.git_provider.github_client.get_repo(repo_name)
self.workspace_slug, self.repo_name, self.issue_number = self.git_provider._parse_issue_url(issue_url.split('=')[-1])
self.git_provider.repo = self.repo_name
self.git_provider.repo_obj = self.git_provider.get_repo_obj(self.workspace_slug, self.repo_name)
self.token_handler = TokenHandler()
repo_obj = self.git_provider.repo_obj
repo_name_for_index = self.repo_name_for_index = repo_obj.full_name.lower().replace('/', '-').replace('_/', '-')
repo_name_for_index = self.repo_name_for_index = self.git_provider.get_repo_name_for_indexing(repo_obj)
index_name = self.index_name = "codium-ai-pr-agent-issues"
# assuming pinecone api key and environment are set in secrets file
try:
api_key = get_settings().pinecone.api_key
environment = get_settings().pinecone.environment
api_key = get_settings().github.api_key
environment = get_settings().github.environment
except Exception:
if not self.cli_mode:
repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1])
issue_main = self.git_provider.repo_obj.get_issue(original_issue_number)
workspace_slug, repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1])
issue_main = self.git_provider.get_issue(workspace_slug, repo_name, original_issue_number)
issue_main.create_comment("Please set pinecone api key and environment in secrets file")
raise Exception("Please set pinecone api key and environment in secrets file")
@ -65,16 +63,18 @@ class PRSimilarIssue:
logging.info('Indexing the entire repo...')
logging.info('Getting issues...')
issues = list(repo_obj.get_issues(state='all'))
issues = self.git_provider.get_repo_issues(repo_obj)
logging.info('Done')
self._update_index_with_issues(issues, repo_name_for_index, upsert=upsert)
else: # update index if needed
pinecone_index = pinecone.Index(index_name=index_name)
issues_to_update = []
issues_paginated_list = repo_obj.get_issues(state='all')
issues_paginated_list = []
issues_paginated_list = self.git_provider.get_repo_issues(repo_obj)
counter = 1
for issue in issues_paginated_list:
if issue.pull_request:
issue_pull_request = self.git_provider.check_if_issue_pull_request(issue)
if issue_pull_request:
continue
issue_str, comments, number = self._process_issue(issue)
issue_key = f"issue_{number}"
@ -99,8 +99,8 @@ class PRSimilarIssue:
async def run(self):
logging.info('Getting issue...')
repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1])
issue_main = self.git_provider.repo_obj.get_issue(original_issue_number)
workspace_slug, repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1])
issue_main = self.git_provider.get_issue(workspace_slug, repo_name, original_issue_number)
issue_str, comments, number = self._process_issue(issue_main)
openai.api_key = get_settings().openai.key
logging.info('Done')
@ -132,25 +132,23 @@ class PRSimilarIssue:
logging.info('Publishing response...')
similar_issues_str = "### Similar Issues\n___\n\n"
for i, issue_number_similar in enumerate(relevant_issues_number_list):
issue = self.git_provider.repo_obj.get_issue(issue_number_similar)
issue = self.git_provider.get_issue(workspace_slug, repo_name, issue_number_similar)
title = issue.title
url = issue.html_url
if relevant_comment_number_list[i] != -1:
url = list(issue.get_comments())[relevant_comment_number_list[i]].html_url
url = self.git_provider.get_issue_url(issue)
similar_issues_str += f"{i + 1}. **[{title}]({url})** (score={score_list[i]})\n\n"
if get_settings().config.publish_output:
response = issue_main.create_comment(similar_issues_str)
response = self.git_provider.create_issue_comment(similar_issues_str, workspace_slug, repo_name, original_issue_number)
logging.info(similar_issues_str)
logging.info('Done')
def _process_issue(self, issue):
header = issue.title
body = issue.body
number = issue.number
body = self.git_provider.get_issue_body(issue)
number = self.git_provider.get_issue_number(issue)
if get_settings().pr_similar_issue.skip_comments:
comments = []
else:
comments = list(issue.get_comments())
comments = self.git_provider.get_issues_comments(self.workspace_slug, self.repo_name, self.issue_number)
issue_str = f"Issue Header: \"{header}\"\n\nIssue Body:\n{body}"
return issue_str, comments, number
@ -158,7 +156,7 @@ class PRSimilarIssue:
logging.info('Processing issues...')
corpus = Corpus()
example_issue_record = Record(
id=f"example_issue_{repo_name_for_index}",
id=str([issue.number for issue in issues_list]),
text="example_issue",
metadata=Metadata(repo=repo_name_for_index)
)
@ -166,7 +164,9 @@ class PRSimilarIssue:
counter = 0
for issue in issues_list:
if issue.pull_request:
issue_pull_request = self.git_provider.check_if_issue_pull_request(issue)
if issue_pull_request:
continue
counter += 1
@ -178,8 +178,8 @@ class PRSimilarIssue:
issue_str, comments, number = self._process_issue(issue)
issue_key = f"issue_{number}"
username = issue.user.login
created_at = str(issue.created_at)
username = self.git_provider.get_username(issue, self.workspace_slug)
created_at = self.git_provider.get_issue_created_at(issue)
if len(issue_str) < 8000 or \
self.token_handler.count_tokens(issue_str) < MAX_TOKENS[MODEL]: # fast reject first
issue_record = Record(
@ -193,7 +193,7 @@ class PRSimilarIssue:
corpus.append(issue_record)
if comments:
for j, comment in enumerate(comments):
comment_body = comment.body
comment_body = self.git_provider.get_issue_comment_body(comment)
num_words_comment = len(comment_body.split())
if num_words_comment < 10 or not isinstance(comment_body, str):
continue
@ -233,8 +233,8 @@ class PRSimilarIssue:
ds = Dataset.from_pandas(df, meta)
logging.info('Done')
api_key = get_settings().pinecone.api_key
environment = get_settings().pinecone.environment
api_key = get_settings().github.api_key
environment = get_settings().github.environment
if not upsert:
logging.info('Creating index from scratch...')
ds.to_pinecone_index(self.index_name, api_key=api_key, environment=environment)