Add support for ignoring files

Add ignore.toml, configuration for ignoring files
Add file_filter.py, for matching files against glob/regex patterns
Update relevant code to use file filter
+Tests
This commit is contained in:
jamesrom
2023-10-06 01:43:35 +11:00
parent fd8c90041c
commit e387086890
5 changed files with 104 additions and 11 deletions

View File

@ -0,0 +1,23 @@
import fnmatch
import re
from pr_agent.config_loader import get_settings
def filter_ignored(files):
"""
Filter out files that match the ignore patterns.
"""
# load regex patterns, and translate glob patterns to regex
patterns = get_settings().ignore.regex
patterns += [fnmatch.translate(glob) for glob in get_settings().ignore.glob]
compiled_patterns = [re.compile(r) for r in patterns]
filenames = [file.filename for file in files]
# keep filenames that don't match the ignore regex
for r in compiled_patterns:
filenames = [f for f in filenames if not r.match(f)]
# map filenames back to files
return [file for file in files if file.filename in filenames]

View File

@ -11,6 +11,7 @@ from github import RateLimitExceededException
from pr_agent.algo import MAX_TOKENS
from pr_agent.algo.git_patch_processing import convert_to_hunks_with_lines_numbers, extend_patch, handle_patch_deletions
from pr_agent.algo.language_handler import sort_files_by_main_languages
from pr_agent.algo.file_filter import filter_ignored
from pr_agent.algo.token_handler import TokenHandler, get_token_encoder
from pr_agent.config_loader import get_settings
from pr_agent.git_providers.git_provider import FilePatchInfo, GitProvider
@ -53,6 +54,8 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: s
logging.error(f"Rate limit exceeded for git provider API. original message {e}")
raise
diff_files = filter_ignored(diff_files)
# get pr languages
pr_languages = sort_files_by_main_languages(git_provider.get_languages(), diff_files)
@ -348,16 +351,16 @@ def get_pr_multi_diffs(git_provider: GitProvider,
"""
Retrieves the diff files from a Git provider, sorts them by main language, and generates patches for each file.
The patches are split into multiple groups based on the maximum number of tokens allowed for the given model.
Args:
git_provider (GitProvider): An object that provides access to Git provider APIs.
token_handler (TokenHandler): An object that handles tokens in the context of a pull request.
model (str): The name of the model.
max_calls (int, optional): The maximum number of calls to retrieve diff files. Defaults to 5.
Returns:
List[str]: A list of final diff strings, split into multiple groups based on the maximum number of tokens allowed for the given model.
Raises:
RateLimitExceededException: If the rate limit for the Git provider API is exceeded.
"""
@ -367,6 +370,8 @@ def get_pr_multi_diffs(git_provider: GitProvider,
logging.error(f"Rate limit exceeded for git provider API. original message {e}")
raise
diff_files = filter_ignored(diff_files)
# Sort files by main language
pr_languages = sort_files_by_main_languages(git_provider.get_languages(), diff_files)

View File

@ -12,18 +12,19 @@ global_settings = Dynaconf(
envvar_prefix=False,
merge_enabled=True,
settings_files=[join(current_dir, f) for f in [
"settings_prod/.secrets.toml"
"settings/.secrets.toml",
"settings/configuration.toml",
"settings/ignore.toml",
"settings/language_extensions.toml",
"settings/pr_reviewer_prompts.toml",
"settings/pr_questions_prompts.toml",
"settings/pr_description_prompts.toml",
"settings/pr_code_suggestions_prompts.toml",
"settings/pr_sort_code_suggestions_prompts.toml",
"settings/pr_information_from_user_prompts.toml",
"settings/pr_update_changelog_prompts.toml",
"settings/pr_add_docs.toml",
"settings_prod/.secrets.toml"
"settings/pr_code_suggestions_prompts.toml",
"settings/pr_description_prompts.toml",
"settings/pr_information_from_user_prompts.toml",
"settings/pr_questions_prompts.toml",
"settings/pr_reviewer_prompts.toml",
"settings/pr_sort_code_suggestions_prompts.toml",
"settings/pr_update_changelog_prompts.toml",
]]
)

View File

@ -0,0 +1,5 @@
[ignore]
# Ignore files and directories matching these patterns.
glob = []
regex = []

View File

@ -0,0 +1,59 @@
import pytest
from pr_agent.algo.file_filter import filter_ignored
from pr_agent.config_loader import global_settings
class TestIgnoreFilter:
def test_no_ignores(self):
"""
Test no files are ignored when no patterns are specified.
"""
files = [
type('', (object,), {'filename': 'file1.py'})(),
type('', (object,), {'filename': 'file2.java'})(),
type('', (object,), {'filename': 'file3.cpp'})(),
type('', (object,), {'filename': 'file4.py'})(),
type('', (object,), {'filename': 'file5.py'})()
]
assert filter_ignored(files) == files
def test_glob_ignores(self, monkeypatch):
"""
Test files are ignored when glob patterns are specified.
"""
monkeypatch.setattr(global_settings.ignore, 'glob', ['*.py'])
files = [
type('', (object,), {'filename': 'file1.py'})(),
type('', (object,), {'filename': 'file2.java'})(),
type('', (object,), {'filename': 'file3.cpp'})(),
type('', (object,), {'filename': 'file4.py'})(),
type('', (object,), {'filename': 'file5.py'})()
]
expected = [
files[1],
files[2]
]
filtered_files = filter_ignored(files)
assert filtered_files == expected, f"Expected {[file.filename for file in expected]}, but got {[file.filename for file in filtered_files]}."
def test_regex_ignores(self, monkeypatch):
"""
Test files are ignored when regex patterns are specified.
"""
monkeypatch.setattr(global_settings.ignore, 'regex', ['^file[2-4]\..*$'])
files = [
type('', (object,), {'filename': 'file1.py'})(),
type('', (object,), {'filename': 'file2.java'})(),
type('', (object,), {'filename': 'file3.cpp'})(),
type('', (object,), {'filename': 'file4.py'})(),
type('', (object,), {'filename': 'file5.py'})()
]
expected = [
files[0],
files[4]
]
filtered_files = filter_ignored(files)
assert filtered_files == expected, f"Expected {[file.filename for file in expected]}, but got {[file.filename for file in filtered_files]}."