From 207645479861ae344e080c152f84d2fb8bbcf8fa Mon Sep 17 00:00:00 2001 From: mrT23 Date: Thu, 13 Jun 2024 12:01:50 +0300 Subject: [PATCH] Add file ignore functionality and update documentation for ignore patterns --- pr_agent/algo/file_filter.py | 9 +++++++-- pr_agent/git_providers/github_provider.py | 16 +++++++++++++--- pr_agent/git_providers/gitlab_provider.py | 18 ++++++++++++++++-- 3 files changed, 36 insertions(+), 7 deletions(-) diff --git a/pr_agent/algo/file_filter.py b/pr_agent/algo/file_filter.py index 9f396549..3d6dc973 100644 --- a/pr_agent/algo/file_filter.py +++ b/pr_agent/algo/file_filter.py @@ -28,8 +28,13 @@ def filter_ignored(files): pass # keep filenames that _don't_ match the ignore regex - for r in compiled_patterns: - files = [f for f in files if (f.filename and not r.match(f.filename))] + if files: + if hasattr(files[0], 'filename'): # github + for r in compiled_patterns: + files = [f for f in files if (f.filename and not r.match(f.filename))] + elif 'new_path' in files[0]: # gitlab + for r in compiled_patterns: + files = [f for f in files if (f['new_path'] and not r.match(f['new_path']))] except Exception as e: print(f"Could not filter file list: {e}") diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 6fd693b0..996f539a 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -146,10 +146,20 @@ class GithubProvider(GitProvider): if self.diff_files: return self.diff_files - files = self.get_files() - files = filter_ignored(files) - diff_files = [] + # filter using [ignore] patterns + files_original = self.get_files() + files = filter_ignored(files_original) + if files_original != files: + try: + names_original = [file.filename for file in files_original] + names_new = [file.filename for file in files] + get_logger().info(f"Filtered out [ignore] files for pull request:", extra= + {"files": names_original, + "filtered_files": names_new}) + except Exception: + pass + diff_files = [] for file in files: if not is_valid_file(file.filename): get_logger().info(f"Skipping a non-code file: {file.filename}") diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index ad18ba1d..c8a0ee6b 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -6,6 +6,7 @@ from urllib.parse import urlparse import gitlab from gitlab import GitlabGetError +from ..algo.file_filter import filter_ignored from ..algo.language_handler import is_valid_file from ..algo.utils import load_large_diff, clip_tokens, find_line_number_of_relevant_line_in_file from ..config_loader import get_settings @@ -84,7 +85,20 @@ class GitLabProvider(GitProvider): if self.diff_files: return self.diff_files - diffs = self.mr.changes()['changes'] + # filter using [ignore] patterns + diffs_original = self.mr.changes()['changes'] + diffs = filter_ignored(diffs_original) + if diffs != diffs_original: + try: + names_original = [diff['new_path'] for diff in diffs_original] + names_filtered = [diff['new_path'] for diff in diffs] + get_logger().info(f"Filtered out [ignore] files for merge request {self.id_mr}",extra={ + 'original_files': names_original, + 'filtered_files': names_filtered + }) + except Exception as e: + get_logger().exception(f"Could not log filtered files for merge request {self.id_mr}: {e}") + diff_files = [] for diff in diffs: if is_valid_file(diff['new_path']): @@ -130,7 +144,7 @@ class GitLabProvider(GitProvider): self.diff_files = diff_files return diff_files - def get_files(self): + def get_files(self) -> list: if not self.git_files: self.git_files = [change['new_path'] for change in self.mr.changes()['changes']] return self.git_files