Add file ignore functionality and update documentation for ignore patterns

This commit is contained in:
mrT23
2024-06-13 12:09:52 +03:00
parent 2076454798
commit 20f6af803c
3 changed files with 49 additions and 41 deletions

View File

@ -28,11 +28,11 @@ def filter_ignored(files):
pass pass
# keep filenames that _don't_ match the ignore regex # keep filenames that _don't_ match the ignore regex
if files: if files and isinstance(files, list):
if hasattr(files[0], 'filename'): # github if hasattr(files[0], 'filename'): # github
for r in compiled_patterns: for r in compiled_patterns:
files = [f for f in files if (f.filename and not r.match(f.filename))] files = [f for f in files if (f.filename and not r.match(f.filename))]
elif 'new_path' in files[0]: # gitlab elif isinstance(files[0], dict) and 'new_path' in files[0]: # gitlab
for r in compiled_patterns: for r in compiled_patterns:
files = [f for f in files if (f['new_path'] and not r.match(f['new_path']))] files = [f for f in files if (f['new_path'] and not r.match(f['new_path']))]

View File

@ -146,7 +146,7 @@ class GithubProvider(GitProvider):
if self.diff_files: if self.diff_files:
return self.diff_files return self.diff_files
# filter using [ignore] patterns # filter files using [ignore] patterns
files_original = self.get_files() files_original = self.get_files()
files = filter_ignored(files_original) files = filter_ignored(files_original)
if files_original != files: if files_original != files:
@ -160,9 +160,10 @@ class GithubProvider(GitProvider):
pass pass
diff_files = [] diff_files = []
invalid_files_names = []
for file in files: for file in files:
if not is_valid_file(file.filename): if not is_valid_file(file.filename):
get_logger().info(f"Skipping a non-code file: {file.filename}") invalid_files_names.append(file.filename)
continue continue
new_file_content_str = self._get_pr_file_content(file, self.pr.head.sha) # communication with GitHub new_file_content_str = self._get_pr_file_content(file, self.pr.head.sha) # communication with GitHub
@ -198,6 +199,8 @@ class GithubProvider(GitProvider):
num_plus_lines=num_plus_lines, num_plus_lines=num_plus_lines,
num_minus_lines=num_minus_lines,) num_minus_lines=num_minus_lines,)
diff_files.append(file_patch_canonical_structure) diff_files.append(file_patch_canonical_structure)
if invalid_files_names:
get_logger().info(f"Filtered out files with invalid extensions: {invalid_files_names}")
self.diff_files = diff_files self.diff_files = diff_files
try: try:

View File

@ -85,61 +85,66 @@ class GitLabProvider(GitProvider):
if self.diff_files: if self.diff_files:
return self.diff_files return self.diff_files
# filter using [ignore] patterns # filter files using [ignore] patterns
diffs_original = self.mr.changes()['changes'] diffs_original = self.mr.changes()['changes']
diffs = filter_ignored(diffs_original) diffs = filter_ignored(diffs_original)
if diffs != diffs_original: if diffs != diffs_original:
try: try:
names_original = [diff['new_path'] for diff in diffs_original] names_original = [diff['new_path'] for diff in diffs_original]
names_filtered = [diff['new_path'] for diff in diffs] names_filtered = [diff['new_path'] for diff in diffs]
get_logger().info(f"Filtered out [ignore] files for merge request {self.id_mr}",extra={ get_logger().info(f"Filtered out [ignore] files for merge request {self.id_mr}", extra={
'original_files': names_original, 'original_files': names_original,
'filtered_files': names_filtered 'filtered_files': names_filtered
}) })
except Exception as e: except Exception as e:
get_logger().exception(f"Could not log filtered files for merge request {self.id_mr}: {e}") pass
diff_files = [] diff_files = []
invalid_files_names = []
for diff in diffs: for diff in diffs:
if is_valid_file(diff['new_path']): if not is_valid_file(diff['new_path']):
original_file_content_str = self.get_pr_file_content(diff['old_path'], self.mr.diff_refs['base_sha']) invalid_files_names.append(diff['new_path'])
new_file_content_str = self.get_pr_file_content(diff['new_path'], self.mr.diff_refs['head_sha']) continue
try: original_file_content_str = self.get_pr_file_content(diff['old_path'], self.mr.diff_refs['base_sha'])
if isinstance(original_file_content_str, bytes): new_file_content_str = self.get_pr_file_content(diff['new_path'], self.mr.diff_refs['head_sha'])
original_file_content_str = bytes.decode(original_file_content_str, 'utf-8') try:
if isinstance(new_file_content_str, bytes): if isinstance(original_file_content_str, bytes):
new_file_content_str = bytes.decode(new_file_content_str, 'utf-8') original_file_content_str = bytes.decode(original_file_content_str, 'utf-8')
except UnicodeDecodeError: if isinstance(new_file_content_str, bytes):
get_logger().warning( new_file_content_str = bytes.decode(new_file_content_str, 'utf-8')
f"Cannot decode file {diff['old_path']} or {diff['new_path']} in merge request {self.id_mr}") except UnicodeDecodeError:
get_logger().warning(
f"Cannot decode file {diff['old_path']} or {diff['new_path']} in merge request {self.id_mr}")
edit_type = EDIT_TYPE.MODIFIED edit_type = EDIT_TYPE.MODIFIED
if diff['new_file']: if diff['new_file']:
edit_type = EDIT_TYPE.ADDED edit_type = EDIT_TYPE.ADDED
elif diff['deleted_file']: elif diff['deleted_file']:
edit_type = EDIT_TYPE.DELETED edit_type = EDIT_TYPE.DELETED
elif diff['renamed_file']: elif diff['renamed_file']:
edit_type = EDIT_TYPE.RENAMED edit_type = EDIT_TYPE.RENAMED
filename = diff['new_path'] filename = diff['new_path']
patch = diff['diff'] patch = diff['diff']
if not patch: if not patch:
patch = load_large_diff(filename, new_file_content_str, original_file_content_str) patch = load_large_diff(filename, new_file_content_str, original_file_content_str)
# count number of lines added and removed # count number of lines added and removed
patch_lines = patch.splitlines(keepends=True) patch_lines = patch.splitlines(keepends=True)
num_plus_lines = len([line for line in patch_lines if line.startswith('+')]) num_plus_lines = len([line for line in patch_lines if line.startswith('+')])
num_minus_lines = len([line for line in patch_lines if line.startswith('-')]) num_minus_lines = len([line for line in patch_lines if line.startswith('-')])
diff_files.append( diff_files.append(
FilePatchInfo(original_file_content_str, new_file_content_str, FilePatchInfo(original_file_content_str, new_file_content_str,
patch=patch, patch=patch,
filename=filename, filename=filename,
edit_type=edit_type, edit_type=edit_type,
old_filename=None if diff['old_path'] == diff['new_path'] else diff['old_path'], old_filename=None if diff['old_path'] == diff['new_path'] else diff['old_path'],
num_plus_lines=num_plus_lines, num_plus_lines=num_plus_lines,
num_minus_lines=num_minus_lines, )) num_minus_lines=num_minus_lines, ))
if invalid_files_names:
get_logger().info(f"Filtered out files with invalid extensions: {invalid_files_names}")
self.diff_files = diff_files self.diff_files = diff_files
return diff_files return diff_files