Support cloning repo

Support forcing accurate token calculation (claude)
Help docs: Add desired branch in case of user supplied git repo, with default set to "main"
Better documentation for getting canonical url parts
This commit is contained in:
Eyal Sharon
2025-03-23 09:55:58 +02:00
parent 5e7e353670
commit dd80276f3f
13 changed files with 263 additions and 47 deletions

View File

@ -7,6 +7,8 @@ from urllib.parse import quote_plus, urlparse
from atlassian.bitbucket import Bitbucket
from requests.exceptions import HTTPError
import shlex
import subprocess
from ..algo.git_patch_processing import decode_if_bytes
from ..algo.language_handler import is_valid_file
@ -47,6 +49,35 @@ class BitbucketServerProvider(GitProvider):
if pr_url:
self.set_pr(pr_url)
def get_git_repo_url(self, pr_url: str=None) -> str: #bitbucket server does not support issue url, so ignore param
try:
parsed_url = urlparse(self.pr_url)
return f"{parsed_url.scheme}://{parsed_url.netloc}/scm/{self.workspace_slug.lower()}/{self.repo_slug.lower()}.git"
except Exception as e:
get_logger().exception(f"url is not a valid merge requests url: {self.pr_url}")
return ""
# Given a git repo url, return prefix and suffix of the provider in order to view a given file belonging to that repo.
# Example: https://bitbucket.dev.my_inc.com/scm/my_work/my_repo.git and branch: my_branch -> prefix: "https://bitbucket.dev.my_inc.com/projects/MY_WORK/repos/my_repo/browse/src", suffix: "?at=refs%2Fheads%2Fmy_branch"
# In case git url is not provided, provider will use PR context (which includes branch) to determine the prefix and suffix.
def get_canonical_url_parts(self, repo_git_url:str=None, desired_branch:str=None) -> Tuple[str, str]:
workspace_name = None
project_name = None
if not repo_git_url:
desired_branch = self.get_pr_branch()
workspace_name = self.workspace_slug
project_name = self.repo_slug
else:
repo_path = repo_git_url.split('.git')[0].split('scm/')[-1]
if repo_path.count('/') == 1: # Has to have the form <workspace>/<repo>
workspace_name, project_name = repo_path.split('/')
if not workspace_name or not project_name:
get_logger().error(f"workspace_name or project_name not found in context, either git url: {repo_git_url} or uninitialized workspace/project.")
return ("", "")
prefix = f"{self.bitbucket_server_url}/projects/{workspace_name}/repos/{project_name}/browse"
suffix = f"?at=refs%2Fheads%2F{desired_branch}"
return (prefix, suffix)
def get_repo_settings(self):
try:
content = self.bitbucket_client.get_content_of_file(self.workspace_slug, self.repo_slug, ".pr_agent.toml", self.get_pr_branch())
@ -138,31 +169,6 @@ class BitbucketServerProvider(GitProvider):
return False
return True
def get_git_repo_url(self, pr_url: str=None) -> str: #bitbucket server does not support issue url, so ignore param
try:
parsed_url = urlparse(self.pr_url)
return f"{parsed_url.scheme}://{parsed_url.netloc}/scm/{self.workspace_slug.lower()}/{self.repo_slug.lower()}.git"
except Exception as e:
get_logger().exception(f"url is not a valid merge requests url: {self.pr_url}")
return ""
def get_canonical_url_parts(self, repo_git_url:str=None, desired_branch:str=None) -> Tuple[str, str]:
workspace_name = None
project_name = None
if not repo_git_url:
workspace_name = self.workspace_slug
project_name = self.repo_slug
else:
repo_path = repo_git_url.split('.git')[0].split('scm/')[-1]
if repo_path.count('/') == 1: # Has to have the form <workspace>/<repo>
workspace_name, project_name = repo_path.split('/')
if not workspace_name or not project_name:
get_logger().error(f"workspace_name or project_name not found in context, either git url: {repo_git_url} or uninitialized workspace/project.")
return ("", "")
prefix = f"{self.bitbucket_server_url}/projects/{workspace_name}/repos/{project_name}/browse"
suffix = f"?at=refs%2Fheads%2F{desired_branch}"
return (prefix, suffix)
def set_pr(self, pr_url: str):
self.workspace_slug, self.repo_slug, self.pr_num = self._parse_pr_url(pr_url)
self.pr = self._get_pr()
@ -506,3 +512,28 @@ class BitbucketServerProvider(GitProvider):
def _get_merge_base(self):
return f"rest/api/latest/projects/{self.workspace_slug}/repos/{self.repo_slug}/pull-requests/{self.pr_num}/merge-base"
# Clone related
def _prepare_clone_url_with_token(self, repo_url_to_clone: str) -> str | None:
if 'bitbucket.' not in repo_url_to_clone:
get_logger().error("Repo URL is not a valid bitbucket URL.")
return None
bearer_token = self.bearer_token
if not bearer_token:
get_logger().error("No bearer token provided. Returning None")
return None
# Return unmodified URL as the token is passed via HTTP headers in _clone_inner, as seen below.
return repo_url_to_clone
#Overriding the shell command, since for some reason usage of x-token-auth doesn't work, as mentioned here:
# https://stackoverflow.com/questions/56760396/cloning-bitbucket-server-repo-with-access-tokens
def _clone_inner(self, repo_url: str, dest_folder: str, operation_timeout_in_seconds: int=None):
bearer_token = self.bearer_token
if not bearer_token:
#Shouldn't happen since this is checked in _prepare_clone, therefore - throwing an exception.
raise RuntimeError(f"Bearer token is required!")
cli_args = shlex.split(f"git clone -c http.extraHeader='Authorization: Bearer {bearer_token}' "
f"--filter=blob:none --depth 1 {repo_url} {dest_folder}")
subprocess.run(cli_args, check=True, # check=True will raise an exception if the command fails
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=operation_timeout_in_seconds)