Add support for ignoring files

Add ignore.toml, configuration for ignoring files
Add file_filter.py, for matching files against glob/regex patterns
Update relevant code to use file filter
+Tests
This commit is contained in:
jamesrom
2023-10-06 01:43:35 +11:00
parent fd8c90041c
commit e387086890
5 changed files with 104 additions and 11 deletions

View File

@ -0,0 +1,23 @@
import fnmatch
import re
from pr_agent.config_loader import get_settings
def filter_ignored(files):
"""
Filter out files that match the ignore patterns.
"""
# load regex patterns, and translate glob patterns to regex
patterns = get_settings().ignore.regex
patterns += [fnmatch.translate(glob) for glob in get_settings().ignore.glob]
compiled_patterns = [re.compile(r) for r in patterns]
filenames = [file.filename for file in files]
# keep filenames that don't match the ignore regex
for r in compiled_patterns:
filenames = [f for f in filenames if not r.match(f)]
# map filenames back to files
return [file for file in files if file.filename in filenames]

View File

@ -11,6 +11,7 @@ from github import RateLimitExceededException
from pr_agent.algo import MAX_TOKENS
from pr_agent.algo.git_patch_processing import convert_to_hunks_with_lines_numbers, extend_patch, handle_patch_deletions
from pr_agent.algo.language_handler import sort_files_by_main_languages
from pr_agent.algo.file_filter import filter_ignored
from pr_agent.algo.token_handler import TokenHandler, get_token_encoder
from pr_agent.config_loader import get_settings
from pr_agent.git_providers.git_provider import FilePatchInfo, GitProvider
@ -53,6 +54,8 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: s
logging.error(f"Rate limit exceeded for git provider API. original message {e}")
raise
diff_files = filter_ignored(diff_files)
# get pr languages
pr_languages = sort_files_by_main_languages(git_provider.get_languages(), diff_files)
@ -367,6 +370,8 @@ def get_pr_multi_diffs(git_provider: GitProvider,
logging.error(f"Rate limit exceeded for git provider API. original message {e}")
raise
diff_files = filter_ignored(diff_files)
# Sort files by main language
pr_languages = sort_files_by_main_languages(git_provider.get_languages(), diff_files)

View File

@ -12,18 +12,19 @@ global_settings = Dynaconf(
envvar_prefix=False,
merge_enabled=True,
settings_files=[join(current_dir, f) for f in [
"settings_prod/.secrets.toml"
"settings/.secrets.toml",
"settings/configuration.toml",
"settings/ignore.toml",
"settings/language_extensions.toml",
"settings/pr_reviewer_prompts.toml",
"settings/pr_questions_prompts.toml",
"settings/pr_description_prompts.toml",
"settings/pr_code_suggestions_prompts.toml",
"settings/pr_sort_code_suggestions_prompts.toml",
"settings/pr_information_from_user_prompts.toml",
"settings/pr_update_changelog_prompts.toml",
"settings/pr_add_docs.toml",
"settings_prod/.secrets.toml"
"settings/pr_code_suggestions_prompts.toml",
"settings/pr_description_prompts.toml",
"settings/pr_information_from_user_prompts.toml",
"settings/pr_questions_prompts.toml",
"settings/pr_reviewer_prompts.toml",
"settings/pr_sort_code_suggestions_prompts.toml",
"settings/pr_update_changelog_prompts.toml",
]]
)

View File

@ -0,0 +1,5 @@
[ignore]
# Ignore files and directories matching these patterns.
glob = []
regex = []

View File

@ -0,0 +1,59 @@
import pytest
from pr_agent.algo.file_filter import filter_ignored
from pr_agent.config_loader import global_settings
class TestIgnoreFilter:
def test_no_ignores(self):
"""
Test no files are ignored when no patterns are specified.
"""
files = [
type('', (object,), {'filename': 'file1.py'})(),
type('', (object,), {'filename': 'file2.java'})(),
type('', (object,), {'filename': 'file3.cpp'})(),
type('', (object,), {'filename': 'file4.py'})(),
type('', (object,), {'filename': 'file5.py'})()
]
assert filter_ignored(files) == files
def test_glob_ignores(self, monkeypatch):
"""
Test files are ignored when glob patterns are specified.
"""
monkeypatch.setattr(global_settings.ignore, 'glob', ['*.py'])
files = [
type('', (object,), {'filename': 'file1.py'})(),
type('', (object,), {'filename': 'file2.java'})(),
type('', (object,), {'filename': 'file3.cpp'})(),
type('', (object,), {'filename': 'file4.py'})(),
type('', (object,), {'filename': 'file5.py'})()
]
expected = [
files[1],
files[2]
]
filtered_files = filter_ignored(files)
assert filtered_files == expected, f"Expected {[file.filename for file in expected]}, but got {[file.filename for file in filtered_files]}."
def test_regex_ignores(self, monkeypatch):
"""
Test files are ignored when regex patterns are specified.
"""
monkeypatch.setattr(global_settings.ignore, 'regex', ['^file[2-4]\..*$'])
files = [
type('', (object,), {'filename': 'file1.py'})(),
type('', (object,), {'filename': 'file2.java'})(),
type('', (object,), {'filename': 'file3.cpp'})(),
type('', (object,), {'filename': 'file4.py'})(),
type('', (object,), {'filename': 'file5.py'})()
]
expected = [
files[0],
files[4]
]
filtered_files = filter_ignored(files)
assert filtered_files == expected, f"Expected {[file.filename for file in expected]}, but got {[file.filename for file in filtered_files]}."