From 20f6af803c0a5e39924546fbae7a3fbcb50236f7 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Thu, 13 Jun 2024 12:09:52 +0300 Subject: [PATCH] Add file ignore functionality and update documentation for ignore patterns --- pr_agent/algo/file_filter.py | 4 +- pr_agent/git_providers/github_provider.py | 7 +- pr_agent/git_providers/gitlab_provider.py | 79 ++++++++++++----------- 3 files changed, 49 insertions(+), 41 deletions(-) diff --git a/pr_agent/algo/file_filter.py b/pr_agent/algo/file_filter.py index 3d6dc973..05c769d7 100644 --- a/pr_agent/algo/file_filter.py +++ b/pr_agent/algo/file_filter.py @@ -28,11 +28,11 @@ def filter_ignored(files): pass # keep filenames that _don't_ match the ignore regex - if files: + if files and isinstance(files, list): if hasattr(files[0], 'filename'): # github for r in compiled_patterns: files = [f for f in files if (f.filename and not r.match(f.filename))] - elif 'new_path' in files[0]: # gitlab + elif isinstance(files[0], dict) and 'new_path' in files[0]: # gitlab for r in compiled_patterns: files = [f for f in files if (f['new_path'] and not r.match(f['new_path']))] diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 996f539a..5e7601b3 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -146,7 +146,7 @@ class GithubProvider(GitProvider): if self.diff_files: return self.diff_files - # filter using [ignore] patterns + # filter files using [ignore] patterns files_original = self.get_files() files = filter_ignored(files_original) if files_original != files: @@ -160,9 +160,10 @@ class GithubProvider(GitProvider): pass diff_files = [] + invalid_files_names = [] for file in files: if not is_valid_file(file.filename): - get_logger().info(f"Skipping a non-code file: {file.filename}") + invalid_files_names.append(file.filename) continue new_file_content_str = self._get_pr_file_content(file, self.pr.head.sha) # communication with GitHub @@ -198,6 +199,8 @@ class GithubProvider(GitProvider): num_plus_lines=num_plus_lines, num_minus_lines=num_minus_lines,) diff_files.append(file_patch_canonical_structure) + if invalid_files_names: + get_logger().info(f"Filtered out files with invalid extensions: {invalid_files_names}") self.diff_files = diff_files try: diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index c8a0ee6b..a69e01e4 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -85,61 +85,66 @@ class GitLabProvider(GitProvider): if self.diff_files: return self.diff_files - # filter using [ignore] patterns + # filter files using [ignore] patterns diffs_original = self.mr.changes()['changes'] diffs = filter_ignored(diffs_original) if diffs != diffs_original: try: names_original = [diff['new_path'] for diff in diffs_original] names_filtered = [diff['new_path'] for diff in diffs] - get_logger().info(f"Filtered out [ignore] files for merge request {self.id_mr}",extra={ + get_logger().info(f"Filtered out [ignore] files for merge request {self.id_mr}", extra={ 'original_files': names_original, 'filtered_files': names_filtered }) except Exception as e: - get_logger().exception(f"Could not log filtered files for merge request {self.id_mr}: {e}") + pass diff_files = [] + invalid_files_names = [] for diff in diffs: - if is_valid_file(diff['new_path']): - original_file_content_str = self.get_pr_file_content(diff['old_path'], self.mr.diff_refs['base_sha']) - new_file_content_str = self.get_pr_file_content(diff['new_path'], self.mr.diff_refs['head_sha']) + if not is_valid_file(diff['new_path']): + invalid_files_names.append(diff['new_path']) + continue - try: - if isinstance(original_file_content_str, bytes): - original_file_content_str = bytes.decode(original_file_content_str, 'utf-8') - if isinstance(new_file_content_str, bytes): - new_file_content_str = bytes.decode(new_file_content_str, 'utf-8') - except UnicodeDecodeError: - get_logger().warning( - f"Cannot decode file {diff['old_path']} or {diff['new_path']} in merge request {self.id_mr}") + original_file_content_str = self.get_pr_file_content(diff['old_path'], self.mr.diff_refs['base_sha']) + new_file_content_str = self.get_pr_file_content(diff['new_path'], self.mr.diff_refs['head_sha']) + try: + if isinstance(original_file_content_str, bytes): + original_file_content_str = bytes.decode(original_file_content_str, 'utf-8') + if isinstance(new_file_content_str, bytes): + new_file_content_str = bytes.decode(new_file_content_str, 'utf-8') + except UnicodeDecodeError: + get_logger().warning( + f"Cannot decode file {diff['old_path']} or {diff['new_path']} in merge request {self.id_mr}") - edit_type = EDIT_TYPE.MODIFIED - if diff['new_file']: - edit_type = EDIT_TYPE.ADDED - elif diff['deleted_file']: - edit_type = EDIT_TYPE.DELETED - elif diff['renamed_file']: - edit_type = EDIT_TYPE.RENAMED + edit_type = EDIT_TYPE.MODIFIED + if diff['new_file']: + edit_type = EDIT_TYPE.ADDED + elif diff['deleted_file']: + edit_type = EDIT_TYPE.DELETED + elif diff['renamed_file']: + edit_type = EDIT_TYPE.RENAMED - filename = diff['new_path'] - patch = diff['diff'] - if not patch: - patch = load_large_diff(filename, new_file_content_str, original_file_content_str) + filename = diff['new_path'] + patch = diff['diff'] + if not patch: + patch = load_large_diff(filename, new_file_content_str, original_file_content_str) - # count number of lines added and removed - patch_lines = patch.splitlines(keepends=True) - num_plus_lines = len([line for line in patch_lines if line.startswith('+')]) - num_minus_lines = len([line for line in patch_lines if line.startswith('-')]) - diff_files.append( - FilePatchInfo(original_file_content_str, new_file_content_str, - patch=patch, - filename=filename, - edit_type=edit_type, - old_filename=None if diff['old_path'] == diff['new_path'] else diff['old_path'], - num_plus_lines=num_plus_lines, - num_minus_lines=num_minus_lines, )) + # count number of lines added and removed + patch_lines = patch.splitlines(keepends=True) + num_plus_lines = len([line for line in patch_lines if line.startswith('+')]) + num_minus_lines = len([line for line in patch_lines if line.startswith('-')]) + diff_files.append( + FilePatchInfo(original_file_content_str, new_file_content_str, + patch=patch, + filename=filename, + edit_type=edit_type, + old_filename=None if diff['old_path'] == diff['new_path'] else diff['old_path'], + num_plus_lines=num_plus_lines, + num_minus_lines=num_minus_lines, )) + if invalid_files_names: + get_logger().info(f"Filtered out files with invalid extensions: {invalid_files_names}") self.diff_files = diff_files return diff_files