Compare commits

...

12 Commits

4 changed files with 202 additions and 40 deletions

View File

@ -113,13 +113,13 @@ See the [Release notes](./RELEASE_NOTES.md) for updates on the latest changes.
| | ⮑ Extended | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | | :white_check_mark: | | | ⮑ Extended | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | | :white_check_mark: |
| | Reflect and Review | :white_check_mark: | :white_check_mark: | :white_check_mark: | | :white_check_mark: | :white_check_mark: | | | Reflect and Review | :white_check_mark: | :white_check_mark: | :white_check_mark: | | :white_check_mark: | :white_check_mark: |
| | Update CHANGELOG.md | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | | | | | Update CHANGELOG.md | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | | |
| | Find similar issue | :white_check_mark: | | | | | | | | Find similar issue | :white_check_mark: | | :white_check_mark: | | | |
| | Add Documentation | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | | :white_check_mark: | | | Add Documentation | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | | :white_check_mark: |
| | | | | | | | | | | | | | | |
| USAGE | CLI | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | | USAGE | CLI | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |
| | App / webhook | :white_check_mark: | :white_check_mark: | | | | | | App / webhook | :white_check_mark: | :white_check_mark: | | | |
| | Tagging bot | :white_check_mark: | | | | | | | Tagging bot | :white_check_mark: | | | | |
| | Actions | :white_check_mark: | | | | | | | Actions | :white_check_mark: | | :white_check_mark: | | |
| | Web server | | | | | | :white_check_mark: | | | Web server | | | | | | :white_check_mark: |
| | | | | | | | | | | | | | | |
| CORE | PR compression | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | | CORE | PR compression | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: |

View File

@ -10,12 +10,12 @@ from ..algo.pr_processing import find_line_number_of_relevant_line_in_file
from ..config_loader import get_settings from ..config_loader import get_settings
from ..log import get_logger from ..log import get_logger
from .git_provider import FilePatchInfo, GitProvider from .git_provider import FilePatchInfo, GitProvider
import ast
class BitbucketProvider(GitProvider): class BitbucketProvider(GitProvider):
def __init__( def __init__(
self, pr_url: Optional[str] = None, incremental: Optional[bool] = False self, pr_url: Optional[str] = None, incremental: Optional[bool] = False):
):
s = requests.Session() s = requests.Session()
try: try:
bearer = context.get("bitbucket_bearer_token", None) bearer = context.get("bitbucket_bearer_token", None)
@ -32,12 +32,15 @@ class BitbucketProvider(GitProvider):
self.repo = None self.repo = None
self.pr_num = None self.pr_num = None
self.pr = None self.pr = None
self.feature = None
self.issue_num = None
self.issue_name = None
self.temp_comments = [] self.temp_comments = []
self.incremental = incremental self.incremental = incremental
if pr_url: if pr_url and 'pull' in pr_url:
self.set_pr(pr_url) self.set_pr(pr_url)
self.bitbucket_comment_api_url = self.pr._BitbucketBase__data["links"]["comments"]["href"] self.bitbucket_comment_api_url = self.pr._BitbucketBase__data["links"]["comments"]["href"]
self.bitbucket_pull_request_api_url = self.pr._BitbucketBase__data["links"]['self']['href'] self.bitbucket_pull_request_api_url = self.pr._BitbucketBase__data["links"]['self']['href']
def get_repo_settings(self): def get_repo_settings(self):
try: try:
@ -228,6 +231,27 @@ class BitbucketProvider(GitProvider):
raise ValueError("Unable to convert PR number to integer") from e raise ValueError("Unable to convert PR number to integer") from e
return workspace_slug, repo_slug, pr_number return workspace_slug, repo_slug, pr_number
@staticmethod
def _parse_issue_url(issue_url: str) -> Tuple[str, int]:
parsed_url = urlparse(issue_url)
if "bitbucket.org" not in parsed_url.netloc:
raise ValueError("The provided URL is not a valid Bitbucket URL")
path_parts = parsed_url.path.strip('/').split('/')
if len(path_parts) < 5 or path_parts[2] != "issues":
raise ValueError("The provided URL does not appear to be a Bitbucket issue URL")
workspace_slug = path_parts[0]
repo_slug = path_parts[1]
try:
issue_number = int(path_parts[3])
except ValueError as e:
raise ValueError("Unable to convert issue number to integer") from e
return workspace_slug, repo_slug, issue_number
def _get_repo(self): def _get_repo(self):
if self.repo is None: if self.repo is None:
@ -263,3 +287,81 @@ class BitbucketProvider(GitProvider):
# bitbucket does not support labels # bitbucket does not support labels
def get_labels(self): def get_labels(self):
pass pass
def get_issue(self, workspace_slug, repo_name, original_issue_number):
issue = self.bitbucket_client.repositories.get(workspace_slug, repo_name).issues.get(original_issue_number)
return issue
def get_issue_url(self, issue):
return issue._BitbucketBase__data['links']['html']['href']
def get_issue_body(self, issue):
return issue.content['raw']
def get_issue_number(self, issue):
return issue.id
def get_issue_comment_body(self, comment):
return comment['content']['raw']
def get_issue_comment_user(self, comment):
return comment['user']['display_name']
def get_issue_created_at(self, issue):
return str(issue.created_on)
def get_username(self, issue, workspace_slug):
return workspace_slug
def get_repo_issues(self, repo_obj):
return repo_obj._Repository__issues.each()
def get_issues_comments(self, workspace_slug, repo_name, original_issue_number):
import requests
url = f"https://api.bitbucket.org/2.0/repositories/{workspace_slug}/{repo_name}/issues/{original_issue_number}/comments"
payload = {}
headers = {}
response = requests.request("GET", url, headers=headers, data=payload)
return response.json()['values']
def create_issue_comment(self, similar_issues_str, workspace_slug, repo_name, original_issue_number):
url = f"https://api.bitbucket.org/2.0/repositories/{workspace_slug}/{repo_name}/issues/{original_issue_number}/comments"
payload = json.dumps({
"content": {
"raw": similar_issues_str
}
})
headers = {
'Authorization': f'Bearer {get_settings().get("BITBUCKET.BEARER_TOKEN", None)}',
'Content-Type': 'application/json'
}
response = requests.request("POST", url, headers=headers, data=payload)
def get_repo_obj(self, workspace_slug, repo_name):
return self.bitbucket_client.repositories.get(workspace_slug, repo_name)
def get_repo_name_for_indexing(self, repo_obj):
return repo_obj._BitbucketBase__data['full_name'].lower().replace('/', '-').replace('_/', '-')
def check_if_issue_pull_request(self, issue):
return False
def get_issue_numbers(self, issue):
list_of_issue_numbers = []
for issue in issue:
list_of_issue_numbers.append(issue.id)
return str(list_of_issue_numbers)
def get_issue_numbers_from_list(self, issues):
# convert str to list'
int_list = ast.literal_eval(issues)
int_list = [int(x) for x in int_list]
for issue_number in int_list:
return issue_number

View File

@ -241,7 +241,7 @@ class GithubProvider(GitProvider):
self.github_user_id = self.github_client.get_user().raw_data['login'] self.github_user_id = self.github_client.get_user().raw_data['login']
except Exception as e: except Exception as e:
self.github_user_id = "" self.github_user_id = ""
# logging.exception(f"Failed to get user id, error: {e}") # get_logger().exception(f"Failed to get user id, error: {e}")
return self.github_user_id return self.github_user_id
def get_notifications(self, since: datetime): def get_notifications(self, since: datetime):
@ -335,8 +335,9 @@ class GithubProvider(GitProvider):
issue_number = int(path_parts[3]) issue_number = int(path_parts[3])
except ValueError as e: except ValueError as e:
raise ValueError("Unable to convert issue number to integer") from e raise ValueError("Unable to convert issue number to integer") from e
workspace_slug = None
return repo_name, issue_number return workspace_slug, repo_name, issue_number
def _get_github_client(self): def _get_github_client(self):
deployment_type = get_settings().get("GITHUB.DEPLOYMENT_TYPE", "user") deployment_type = get_settings().get("GITHUB.DEPLOYMENT_TYPE", "user")
@ -453,3 +454,62 @@ class GithubProvider(GitProvider):
return pr_id return pr_id
except: except:
return "" return ""
def get_repo_issues(self, repo_obj):
return list(repo_obj.get_issues(state='all'))
def get_issues_comments(self, workspace_slug, repo_name, original_issue_number):
return self.repo_obj.get_issue(original_issue_number)
def get_issue_url(self, issue):
return issue.html_url
def create_issue_comment(self, similar_issues_str, workspace_slug, repo_name, original_issue_number):
try:
issue = self.repo_obj.get_issue(original_issue_number)
issue.create_comment(similar_issues_str)
except Exception as e:
get_logger().exception(f"Failed to create issue comment, error: {e}")
def get_issue_body(self, issue):
return issue.body
def get_issue_number(self, issue):
return issue.number
def get_issues_comments(self, workspace_slug, repo_name, original_issue_number):
issue = self.repo_obj.get_issue(original_issue_number)
return list(issue.get_comments())
def get_issue_body(self, issue):
return issue.body
def get_username(self, issue, workspace_slug):
return issue.user.login
def get_issue_created_at(self, issue):
return str(issue.created_at)
def get_issue_comment_body(self, comment):
return comment.body
def get_issue(self, workspace_slug, repo_name, original_issue_number):
return self.repo_obj.get_issue(int(original_issue_number))
def get_repo_obj(self, workspace_slug, repo_name):
return self.github_client.get_repo(repo_name)
def get_repo_name_for_indexing(self, repo_obj):
return repo_obj.full_name.lower().replace('/', '-').replace('_/', '-')
def check_if_issue_pull_request(self, issue):
if issue.pull_request:
return True
return False
def get_issue_numbers(self, issues_list):
return str([issue.number for issue in issues_list])
def get_issue_numbers_from_list(self, r):
return int(r.split('.')[0].split('_')[-1])

View File

@ -19,19 +19,17 @@ MODEL = "text-embedding-ada-002"
class PRSimilarIssue: class PRSimilarIssue:
def __init__(self, issue_url: str, args: list = None): def __init__(self, issue_url: str, args: list = None):
if get_settings().config.git_provider != "github":
raise Exception("Only github is supported for similar issue tool")
self.cli_mode = get_settings().CONFIG.CLI_MODE self.cli_mode = get_settings().CONFIG.CLI_MODE
self.max_issues_to_scan = get_settings().pr_similar_issue.max_issues_to_scan self.max_issues_to_scan = get_settings().pr_similar_issue.max_issues_to_scan
self.issue_url = issue_url self.issue_url = issue_url
self.git_provider = get_git_provider()() self.git_provider = get_git_provider()()
repo_name, issue_number = self.git_provider._parse_issue_url(issue_url.split('=')[-1]) self.workspace_slug, self.repo_name, self.issue_number = self.git_provider._parse_issue_url(issue_url.split('=')[-1])
self.git_provider.repo = repo_name self.git_provider.repo = self.repo_name
self.git_provider.repo_obj = self.git_provider.github_client.get_repo(repo_name) self.git_provider.repo_obj = self.git_provider.get_repo_obj(self.workspace_slug, self.repo_name)
self.token_handler = TokenHandler() self.token_handler = TokenHandler()
repo_obj = self.git_provider.repo_obj repo_obj = self.git_provider.repo_obj
repo_name_for_index = self.repo_name_for_index = repo_obj.full_name.lower().replace('/', '-').replace('_/', '-') repo_name_for_index = self.repo_name_for_index = self.git_provider.get_repo_name_for_indexing(repo_obj)
index_name = self.index_name = "codium-ai-pr-agent-issues" index_name = self.index_name = "codium-ai-pr-agent-issues"
# assuming pinecone api key and environment are set in secrets file # assuming pinecone api key and environment are set in secrets file
@ -40,17 +38,17 @@ class PRSimilarIssue:
environment = get_settings().pinecone.environment environment = get_settings().pinecone.environment
except Exception: except Exception:
if not self.cli_mode: if not self.cli_mode:
repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1]) workspace_slug, repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1])
issue_main = self.git_provider.repo_obj.get_issue(original_issue_number) issue_main = self.git_provider.get_issue(workspace_slug, repo_name, original_issue_number)
issue_main.create_comment("Please set pinecone api key and environment in secrets file") issue_main.create_comment("Please set pinecone api key and environment in secrets file")
raise Exception("Please set pinecone api key and environment in secrets file") raise Exception("Please set pinecone api key and environment in secrets file")
# check if index exists, and if repo is already indexed # check if index exists, and if repo is already indexed
run_from_scratch = False run_from_scratch = False
if run_from_scratch: # for debugging if run_from_scratch: # for debugging
if not index_name in pinecone.list_indexes(): pinecone.init(api_key=api_key, environment=environment)
if index_name in pinecone.list_indexes():
get_logger().info('Removing index...') get_logger().info('Removing index...')
pinecone.init(api_key=api_key, environment=environment)
pinecone.delete_index(index_name) pinecone.delete_index(index_name)
get_logger().info('Done') get_logger().info('Done')
@ -72,19 +70,20 @@ class PRSimilarIssue:
get_logger().info('Indexing the entire repo...') get_logger().info('Indexing the entire repo...')
get_logger().info('Getting issues...') get_logger().info('Getting issues...')
issues = list(repo_obj.get_issues(state='all')) issues = self.git_provider.get_repo_issues(repo_obj)
get_logger().info('Done') get_logger().info('Done')
self._update_index_with_issues(issues, repo_name_for_index, upsert=upsert) self._update_index_with_issues(issues, repo_name_for_index, upsert=upsert)
else: # update index if needed else: # update index if needed
pinecone_index = pinecone.Index(index_name=index_name) pinecone_index = pinecone.Index(index_name=index_name)
issues_to_update = [] issues_to_update = []
issues_paginated_list = repo_obj.get_issues(state='all') issues_paginated_list = self.git_provider.get_repo_issues(repo_obj)
counter = 1 counter = 1
for issue in issues_paginated_list: for issue in issues_paginated_list:
if issue.pull_request: issue_pull_request = self.git_provider.check_if_issue_pull_request(issue)
if issue_pull_request:
continue continue
issue_str, comments, number = self._process_issue(issue) issue_str, comments, number = self._process_issue(issue)
issue_key = f"issue_{number}" issue_key = f"issue_{number}"
id = issue_key + "." + "issue" id = issue_key + "." + "issue"
res = pinecone_index.fetch([id]).to_dict() res = pinecone_index.fetch([id]).to_dict()
is_new_issue = True is_new_issue = True
@ -106,8 +105,8 @@ class PRSimilarIssue:
async def run(self): async def run(self):
get_logger().info('Getting issue...') get_logger().info('Getting issue...')
repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1]) workspace_slug, repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1])
issue_main = self.git_provider.repo_obj.get_issue(original_issue_number) issue_main = self.git_provider.get_issue(workspace_slug, repo_name, original_issue_number)
issue_str, comments, number = self._process_issue(issue_main) issue_str, comments, number = self._process_issue(issue_main)
openai.api_key = get_settings().openai.key openai.api_key = get_settings().openai.key
get_logger().info('Done') get_logger().info('Done')
@ -129,11 +128,11 @@ class PRSimilarIssue:
continue continue
try: try:
issue_number = int(r["id"].split('.')[0].split('_')[-1]) issue_id= r['id']
issue_number = self.git_provider.get_issue_numbers_from_list(issue_id)
except: except:
get_logger().debug(f"Failed to parse issue number from {r['id']}") get_logger().debug(f"Failed to parse issue number from {r['id']}")
continue continue
if original_issue_number == issue_number: if original_issue_number == issue_number:
continue continue
if issue_number not in relevant_issues_number_list: if issue_number not in relevant_issues_number_list:
@ -148,33 +147,32 @@ class PRSimilarIssue:
get_logger().info('Publishing response...') get_logger().info('Publishing response...')
similar_issues_str = "### Similar Issues\n___\n\n" similar_issues_str = "### Similar Issues\n___\n\n"
for i, issue_number_similar in enumerate(relevant_issues_number_list): for i, issue_number_similar in enumerate(relevant_issues_number_list):
issue = self.git_provider.repo_obj.get_issue(issue_number_similar) issue = self.git_provider.get_issue(workspace_slug, repo_name, issue_number_similar)
title = issue.title title = issue.title
url = issue.html_url url = self.git_provider.get_issue_url(issue)
if relevant_comment_number_list[i] != -1:
url = list(issue.get_comments())[relevant_comment_number_list[i]].html_url
similar_issues_str += f"{i + 1}. **[{title}]({url})** (score={score_list[i]})\n\n" similar_issues_str += f"{i + 1}. **[{title}]({url})** (score={score_list[i]})\n\n"
if get_settings().config.publish_output: if get_settings().config.publish_output:
response = issue_main.create_comment(similar_issues_str) response = self.git_provider.create_issue_comment(similar_issues_str, workspace_slug, repo_name, original_issue_number)
get_logger().info(similar_issues_str) get_logger().info(similar_issues_str)
get_logger().info('Done') get_logger().info('Done')
def _process_issue(self, issue): def _process_issue(self, issue):
header = issue.title header = issue.title
body = issue.body body = self.git_provider.get_issue_body(issue)
number = issue.number number = self.git_provider.get_issue_number(issue)
if get_settings().pr_similar_issue.skip_comments: if get_settings().pr_similar_issue.skip_comments:
comments = [] comments = []
else: else:
comments = list(issue.get_comments()) comments = self.git_provider.get_issues_comments(self.workspace_slug, self.repo_name, self.issue_number)
issue_str = f"Issue Header: \"{header}\"\n\nIssue Body:\n{body}" issue_str = f"Issue Header: \"{header}\"\n\nIssue Body:\n{body}"
return issue_str, comments, number return issue_str, comments, number
def _update_index_with_issues(self, issues_list, repo_name_for_index, upsert=False): def _update_index_with_issues(self, issues_list, repo_name_for_index, upsert=False):
get_logger().info('Processing issues...') get_logger().info('Processing issues...')
corpus = Corpus() corpus = Corpus()
issues = self.git_provider.get_issue_numbers(issues_list)
example_issue_record = Record( example_issue_record = Record(
id=f"example_issue_{repo_name_for_index}", id=str(issues),
text="example_issue", text="example_issue",
metadata=Metadata(repo=repo_name_for_index) metadata=Metadata(repo=repo_name_for_index)
) )
@ -182,7 +180,9 @@ class PRSimilarIssue:
counter = 0 counter = 0
for issue in issues_list: for issue in issues_list:
if issue.pull_request:
issue_pull_request = self.git_provider.check_if_issue_pull_request(issue)
if issue_pull_request:
continue continue
counter += 1 counter += 1
@ -194,8 +194,8 @@ class PRSimilarIssue:
issue_str, comments, number = self._process_issue(issue) issue_str, comments, number = self._process_issue(issue)
issue_key = f"issue_{number}" issue_key = f"issue_{number}"
username = issue.user.login username = self.git_provider.get_username(issue, self.workspace_slug)
created_at = str(issue.created_at) created_at = self.git_provider.get_issue_created_at(issue)
if len(issue_str) < 8000 or \ if len(issue_str) < 8000 or \
self.token_handler.count_tokens(issue_str) < MAX_TOKENS[MODEL]: # fast reject first self.token_handler.count_tokens(issue_str) < MAX_TOKENS[MODEL]: # fast reject first
issue_record = Record( issue_record = Record(
@ -209,7 +209,7 @@ class PRSimilarIssue:
corpus.append(issue_record) corpus.append(issue_record)
if comments: if comments:
for j, comment in enumerate(comments): for j, comment in enumerate(comments):
comment_body = comment.body comment_body = self.git_provider.get_issue_comment_body(comment)
num_words_comment = len(comment_body.split()) num_words_comment = len(comment_body.split())
if num_words_comment < 10 or not isinstance(comment_body, str): if num_words_comment < 10 or not isinstance(comment_body, str):
continue continue