Merge pull request #357 from jamesrom/feat/file_ignores

Add support for ignoring files
This commit is contained in:
mrT23
2023-10-08 16:30:02 +03:00
committed by GitHub
7 changed files with 166 additions and 28 deletions

View File

@ -89,10 +89,10 @@ chmod 600 pr_agent/settings/.secrets.toml
``` ```
export PYTHONPATH=[$PYTHONPATH:]<PATH to pr_agent folder> export PYTHONPATH=[$PYTHONPATH:]<PATH to pr_agent folder>
python3 -m pr_agent.cli --pr_url <pr_url> /review python3 -m pr_agent.cli --pr_url <pr_url> review
python3 -m pr_agent.cli --pr_url <pr_url> /ask <your question> python3 -m pr_agent.cli --pr_url <pr_url> ask <your question>
python3 -m pr_agent.cli --pr_url <pr_url> /describe python3 -m pr_agent.cli --pr_url <pr_url> describe
python3 -m pr_agent.cli --pr_url <pr_url> /improve python3 -m pr_agent.cli --pr_url <pr_url> improve
``` ```
--- ---

View File

@ -29,6 +29,16 @@ In addition to general configuration options, each tool has its own configuratio
The [Tools Guide](./docs/TOOLS_GUIDE.md) provides a detailed description of the different tools and their configurations. The [Tools Guide](./docs/TOOLS_GUIDE.md) provides a detailed description of the different tools and their configurations.
#### Ignoring files from analysis
In some cases, you may want to exclude specific files or directories from the analysis performed by CodiumAI PR-Agent. This can be useful, for example, when you have files that are generated automatically or files that shouldn't be reviewed, like vendored code.
To ignore files or directories, edit the **[ignore.toml](/pr_agent/settings/ignore.toml)** configuration file. This setting is also exposed the following environment variables:
- `IGNORE.GLOB`
- `IGNORE.REGEX`
See [dynaconf envvars documentation](https://www.dynaconf.com/envvars/).
#### git provider #### git provider
The [git_provider](pr_agent/settings/configuration.toml#L4) field in the configuration file determines the GIT provider that will be used by PR-Agent. Currently, the following providers are supported: The [git_provider](pr_agent/settings/configuration.toml#L4) field in the configuration file determines the GIT provider that will be used by PR-Agent. Currently, the following providers are supported:
` `

View File

@ -0,0 +1,31 @@
import fnmatch
import re
from pr_agent.config_loader import get_settings
def filter_ignored(files):
"""
Filter out files that match the ignore patterns.
"""
try:
# load regex patterns, and translate glob patterns to regex
patterns = get_settings().ignore.regex
patterns += [fnmatch.translate(glob) for glob in get_settings().ignore.glob]
# compile all valid patterns
compiled_patterns = []
for r in patterns:
try:
compiled_patterns.append(re.compile(r))
except re.error:
pass
# keep filenames that _don't_ match the ignore regex
for r in compiled_patterns:
files = [f for f in files if not r.match(f.filename)]
except Exception as e:
print(f"Could not filter file list: {e}")
return files

View File

@ -11,6 +11,7 @@ from github import RateLimitExceededException
from pr_agent.algo import MAX_TOKENS from pr_agent.algo import MAX_TOKENS
from pr_agent.algo.git_patch_processing import convert_to_hunks_with_lines_numbers, extend_patch, handle_patch_deletions from pr_agent.algo.git_patch_processing import convert_to_hunks_with_lines_numbers, extend_patch, handle_patch_deletions
from pr_agent.algo.language_handler import sort_files_by_main_languages from pr_agent.algo.language_handler import sort_files_by_main_languages
from pr_agent.algo.file_filter import filter_ignored
from pr_agent.algo.token_handler import TokenHandler, get_token_encoder from pr_agent.algo.token_handler import TokenHandler, get_token_encoder
from pr_agent.config_loader import get_settings from pr_agent.config_loader import get_settings
from pr_agent.git_providers.git_provider import FilePatchInfo, GitProvider from pr_agent.git_providers.git_provider import FilePatchInfo, GitProvider
@ -53,6 +54,8 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: s
logging.error(f"Rate limit exceeded for git provider API. original message {e}") logging.error(f"Rate limit exceeded for git provider API. original message {e}")
raise raise
diff_files = filter_ignored(diff_files)
# get pr languages # get pr languages
pr_languages = sort_files_by_main_languages(git_provider.get_languages(), diff_files) pr_languages = sort_files_by_main_languages(git_provider.get_languages(), diff_files)
@ -367,6 +370,8 @@ def get_pr_multi_diffs(git_provider: GitProvider,
logging.error(f"Rate limit exceeded for git provider API. original message {e}") logging.error(f"Rate limit exceeded for git provider API. original message {e}")
raise raise
diff_files = filter_ignored(diff_files)
# Sort files by main language # Sort files by main language
pr_languages = sort_files_by_main_languages(git_provider.get_languages(), diff_files) pr_languages = sort_files_by_main_languages(git_provider.get_languages(), diff_files)

View File

@ -14,6 +14,7 @@ global_settings = Dynaconf(
settings_files=[join(current_dir, f) for f in [ settings_files=[join(current_dir, f) for f in [
"settings/.secrets.toml", "settings/.secrets.toml",
"settings/configuration.toml", "settings/configuration.toml",
"settings/ignore.toml",
"settings/language_extensions.toml", "settings/language_extensions.toml",
"settings/pr_reviewer_prompts.toml", "settings/pr_reviewer_prompts.toml",
"settings/pr_questions_prompts.toml", "settings/pr_questions_prompts.toml",

View File

@ -0,0 +1,11 @@
[ignore]
glob = [
# Ignore files and directories matching these glob patterns.
# See https://docs.python.org/3/library/glob.html
'vendor/**',
]
regex = [
# Ignore files and directories matching these regex patterns.
# See https://learnbyexample.github.io/python-regex-cheatsheet/
]

View File

@ -0,0 +1,80 @@
import pytest
from pr_agent.algo.file_filter import filter_ignored
from pr_agent.config_loader import global_settings
class TestIgnoreFilter:
def test_no_ignores(self):
"""
Test no files are ignored when no patterns are specified.
"""
files = [
type('', (object,), {'filename': 'file1.py'})(),
type('', (object,), {'filename': 'file2.java'})(),
type('', (object,), {'filename': 'file3.cpp'})(),
type('', (object,), {'filename': 'file4.py'})(),
type('', (object,), {'filename': 'file5.py'})()
]
assert filter_ignored(files) == files, "Expected all files to be returned when no ignore patterns are given."
def test_glob_ignores(self, monkeypatch):
"""
Test files are ignored when glob patterns are specified.
"""
monkeypatch.setattr(global_settings.ignore, 'glob', ['*.py'])
files = [
type('', (object,), {'filename': 'file1.py'})(),
type('', (object,), {'filename': 'file2.java'})(),
type('', (object,), {'filename': 'file3.cpp'})(),
type('', (object,), {'filename': 'file4.py'})(),
type('', (object,), {'filename': 'file5.py'})()
]
expected = [
files[1],
files[2]
]
filtered_files = filter_ignored(files)
assert filtered_files == expected, f"Expected {[file.filename for file in expected]}, but got {[file.filename for file in filtered_files]}."
def test_regex_ignores(self, monkeypatch):
"""
Test files are ignored when regex patterns are specified.
"""
monkeypatch.setattr(global_settings.ignore, 'regex', ['^file[2-4]\..*$'])
files = [
type('', (object,), {'filename': 'file1.py'})(),
type('', (object,), {'filename': 'file2.java'})(),
type('', (object,), {'filename': 'file3.cpp'})(),
type('', (object,), {'filename': 'file4.py'})(),
type('', (object,), {'filename': 'file5.py'})()
]
expected = [
files[0],
files[4]
]
filtered_files = filter_ignored(files)
assert filtered_files == expected, f"Expected {[file.filename for file in expected]}, but got {[file.filename for file in filtered_files]}."
def test_invalid_regex(self, monkeypatch):
"""
Test invalid patterns are quietly ignored.
"""
monkeypatch.setattr(global_settings.ignore, 'regex', ['(((||', '^file[2-4]\..*$'])
files = [
type('', (object,), {'filename': 'file1.py'})(),
type('', (object,), {'filename': 'file2.java'})(),
type('', (object,), {'filename': 'file3.cpp'})(),
type('', (object,), {'filename': 'file4.py'})(),
type('', (object,), {'filename': 'file5.py'})()
]
expected = [
files[0],
files[4]
]
filtered_files = filter_ignored(files)
assert filtered_files == expected, f"Expected {[file.filename for file in expected]}, but got {[file.filename for file in filtered_files]}."