Merge branch 'Codium-ai:main' into main

This commit is contained in:
Stephen Quinn
2024-08-13 16:04:55 +01:00
committed by GitHub
9 changed files with 80 additions and 19 deletions

View File

@ -106,6 +106,10 @@ class LiteLLMAIHandler(BaseAiHandler):
deployment_id = self.deployment_id
if self.azure:
model = 'azure/' + model
if 'claude' in model and not system:
system = "\n"
get_logger().warning(
"Empty system prompt for claude model. Adding a newline character to prevent OpenAI API error.")
messages = [{"role": "system", "content": system}, {"role": "user", "content": user}]
if img_path:
try:

View File

@ -14,7 +14,9 @@ def filter_bad_extensions(files):
return [f for f in files if f.filename is not None and is_valid_file(f.filename, bad_extensions)]
def is_valid_file(filename, bad_extensions=None):
def is_valid_file(filename:str, bad_extensions=None) -> bool:
if not filename:
return False
if not bad_extensions:
bad_extensions = get_settings().bad_extensions.default
if get_settings().config.use_extra_bad_extensions:

View File

@ -147,7 +147,7 @@ def convert_to_markdown_v2(output_data: dict,
else:
markdown_text += f"### {emoji} {key_nice}: {value}\n\n"
elif 'relevant tests' in key_nice.lower():
value = value.strip().lower()
value = str(value).strip().lower()
if gfm_supported:
markdown_text += f"<tr><td>"
if is_value_no(value):
@ -674,14 +674,16 @@ def get_user_labels(current_labels: List[str] = None):
Only keep labels that has been added by the user
"""
try:
enable_custom_labels = get_settings().config.get('enable_custom_labels', False)
custom_labels = get_settings().get('custom_labels', [])
if current_labels is None:
current_labels = []
user_labels = []
for label in current_labels:
if label.lower() in ['bug fix', 'tests', 'enhancement', 'documentation', 'other']:
continue
if get_settings().config.enable_custom_labels:
if label in get_settings().custom_labels:
if enable_custom_labels:
if label in custom_labels:
continue
user_labels.append(label)
if user_labels:

View File

@ -15,6 +15,12 @@ from ..log import get_logger
from .git_provider import GitProvider
def _gef_filename(diff):
if diff.new.path:
return diff.new.path
return diff.old.path
class BitbucketProvider(GitProvider):
def __init__(
self, pr_url: Optional[str] = None, incremental: Optional[bool] = False
@ -40,6 +46,7 @@ class BitbucketProvider(GitProvider):
self.temp_comments = []
self.incremental = incremental
self.diff_files = None
self.git_files = None
if pr_url:
self.set_pr(pr_url)
self.bitbucket_comment_api_url = self.pr._BitbucketBase__data["links"]["comments"]["href"]
@ -123,7 +130,18 @@ class BitbucketProvider(GitProvider):
self.pr = self._get_pr()
def get_files(self):
return [diff.new.path for diff in self.pr.diffstat()]
try:
git_files = context.get("git_files", None)
if git_files:
return git_files
self.git_files = [_gef_filename(diff) for diff in self.pr.diffstat()]
context["git_files"] = self.git_files
return self.git_files
except Exception:
if not self.git_files:
self.git_files = [_gef_filename(diff) for diff in self.pr.diffstat()]
return self.git_files
def get_diff_files(self) -> list[FilePatchInfo]:
if self.diff_files:
@ -172,14 +190,18 @@ class BitbucketProvider(GitProvider):
diff_split_lines[5].startswith("@@"))):
diff_split[i] = "\n".join(diff_split_lines[4:])
else:
get_logger().error(f"Error - failed to remove the bitbucket header from diff {i}")
break
if diffs[i].data.get('lines_added', 0) == 0 and diffs[i].data.get('lines_removed', 0) == 0:
diff_split[i] = ""
else:
get_logger().error(f"Error - failed to remove the bitbucket header from diff {i}")
break
invalid_files_names = []
diff_files = []
for index, diff in enumerate(diffs):
if not is_valid_file(diff.new.path):
invalid_files_names.append(diff.new.path)
file_path = _gef_filename(diff)
if not is_valid_file(file_path):
invalid_files_names.append(file_path)
continue
try:
@ -200,7 +222,7 @@ class BitbucketProvider(GitProvider):
original_file_content_str,
new_file_content_str,
diff_split[index],
diff.new.path,
file_path,
)
if diff.data['status'] == 'added':

View File

@ -22,7 +22,7 @@ class GoogleCloudStorageSecretProvider(SecretProvider):
blob = self.bucket.blob(secret_name)
return blob.download_as_string()
except Exception as e:
get_logger().error(f"Failed to get secret {secret_name} from Google Cloud Storage: {e}")
get_logger().warning(f"Failed to get secret {secret_name} from Google Cloud Storage: {e}")
return ""
def store_secret(self, secret_name: str, secret_value: str):

View File

@ -108,13 +108,18 @@ async def handle_github_webhooks(background_tasks: BackgroundTasks, request: Req
return "OK"
except KeyError:
get_logger().error("Failed to get actor type, check previous logs, this shouldn't happen.")
# Get the username of the sender
try:
owner = data["data"]["repository"]["owner"]["username"]
except Exception as e:
get_logger().error(f"Failed to get owner, will continue: {e}")
owner = "unknown"
username = data["data"]["actor"]["username"]
except KeyError:
try:
username = data["data"]["actor"]["display_name"]
except KeyError:
username = data["data"]["actor"]["nickname"]
log_context["sender"] = username
sender_id = data["data"]["actor"]["account_id"]
log_context["sender"] = owner
log_context["sender_id"] = sender_id
jwt_parts = input_jwt.split(".")
claim_part = jwt_parts[1]

View File

@ -87,6 +87,10 @@ async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request):
if request.headers.get("X-Gitlab-Token") and secret_provider:
request_token = request.headers.get("X-Gitlab-Token")
secret = secret_provider.get_secret(request_token)
if not secret:
get_logger().warning(f"Empty secret retrieved, request_token: {request_token}")
return JSONResponse(status_code=status.HTTP_401_UNAUTHORIZED,
content=jsonable_encoder({"message": "unauthorized"}))
try:
secret_dict = json.loads(secret)
gitlab_token = secret_dict["gitlab_token"]

View File

@ -450,8 +450,24 @@ class PRCodeSuggestions:
original_initial_line = None
for file in self.diff_files:
if file.filename.strip() == relevant_file:
if file.head_file: # in bitbucket, head_file is empty. toDo: fix this
original_initial_line = file.head_file.splitlines()[relevant_lines_start - 1]
if file.head_file:
file_lines = file.head_file.splitlines()
if relevant_lines_start > len(file_lines):
get_logger().warning(
"Could not dedent code snippet, because relevant_lines_start is out of range",
artifact={'filename': file.filename,
'file_content': file.head_file,
'relevant_lines_start': relevant_lines_start,
'new_code_snippet': new_code_snippet})
return new_code_snippet
else:
original_initial_line = file_lines[relevant_lines_start - 1]
else:
get_logger().warning("Could not dedent code snippet, because head_file is missing",
artifact={'filename': file.filename,
'relevant_lines_start': relevant_lines_start,
'new_code_snippet': new_code_snippet})
return new_code_snippet
break
if original_initial_line:
suggested_initial_line = new_code_snippet.splitlines()[0]
@ -461,7 +477,7 @@ class PRCodeSuggestions:
if delta_spaces > 0:
new_code_snippet = textwrap.indent(new_code_snippet, delta_spaces * " ").rstrip('\n')
except Exception as e:
get_logger().error(f"Could not dedent code snippet for file {relevant_file}, error: {e}")
get_logger().error(f"Error when dedenting code snippet for file {relevant_file}, error: {e}")
return new_code_snippet

View File

@ -510,6 +510,12 @@ extra_file_yaml =
file_label_dict = {}
for file in self.data['pr_files']:
try:
required_fields = ['changes_summary', 'changes_title', 'filename', 'label']
if not all(field in file for field in required_fields):
# can happen for example if a YAML generation was interrupted in the middle (no more tokens)
get_logger().warning(f"Missing required fields in file label dict {self.pr_id}, skipping file",
artifact={"file": file})
continue
filename = file['filename'].replace("'", "`").replace('"', '`')
changes_summary = file['changes_summary']
changes_title = file['changes_title'].strip()