diff --git a/docs/docs/usage-guide/additional_configurations.md b/docs/docs/usage-guide/additional_configurations.md
index 4deca211..aba817e4 100644
--- a/docs/docs/usage-guide/additional_configurations.md
+++ b/docs/docs/usage-guide/additional_configurations.md
@@ -10,12 +10,17 @@ To ignore files or directories, edit the **[ignore.toml](https://github.com/Codi
For example, to ignore Python files in a PR with online usage, comment on a PR:
`/review --ignore.glob=['*.py']`
-To ignore Python files in all PRs, set in a configuration file:
+To ignore Python files in all PRs using `glob` pattern, set in a configuration file:
```
[ignore]
glob = ['*.py']
```
+And to ignore Python files in all PRs using `regex` pattern, set in a configuration file:
+```
+[regex]
+regex = ['.*\.py$']
+```
## Extra instructions
All PR-Agent tools have a parameter called `extra_instructions`, that enables to add free-text extra instructions. Example usage:
diff --git a/pr_agent/algo/file_filter.py b/pr_agent/algo/file_filter.py
index 9f396549..fa97b283 100644
--- a/pr_agent/algo/file_filter.py
+++ b/pr_agent/algo/file_filter.py
@@ -4,7 +4,7 @@ import re
from pr_agent.config_loader import get_settings
-def filter_ignored(files):
+def filter_ignored(files, platform = 'github'):
"""
Filter out files that match the ignore patterns.
"""
@@ -28,8 +28,16 @@ def filter_ignored(files):
pass
# keep filenames that _don't_ match the ignore regex
- for r in compiled_patterns:
- files = [f for f in files if (f.filename and not r.match(f.filename))]
+ if files and isinstance(files, list):
+ for r in compiled_patterns:
+ if platform == 'github':
+ files = [f for f in files if (f.filename and not r.match(f.filename))]
+ elif platform == 'bitbucket':
+ files = [f for f in files if (f.new.path and not r.match(f.new.path))]
+ elif platform == 'gitlab':
+ files = [f for f in files if (f['new_path'] and not r.match(f['new_path']))]
+ elif platform == 'azure':
+ files = [f for f in files if not r.match(f)]
except Exception as e:
print(f"Could not filter file list: {e}")
diff --git a/pr_agent/algo/language_handler.py b/pr_agent/algo/language_handler.py
index b4c02bee..83f7b85e 100644
--- a/pr_agent/algo/language_handler.py
+++ b/pr_agent/algo/language_handler.py
@@ -15,7 +15,7 @@ def filter_bad_extensions(files):
return [f for f in files if f.filename is not None and is_valid_file(f.filename)]
-def is_valid_file(filename):
+def is_valid_file(filename: str):
return filename.split('.')[-1] not in bad_extensions
diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py
index e0ea2134..1db3dd53 100644
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@@ -91,7 +91,7 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
markdown_text += f"## Incremental PR Reviewer Guide ๐\n\n"
markdown_text += f"โฎ๏ธ Review for commits since previous PR-Agent review {incremental_review}.\n\n"
if gfm_supported:
- markdown_text += "
\n\n"
+ markdown_text += "\n"
# markdown_text += """ Feedback | | """
if not output_data or not output_data.get('review', {}):
@@ -108,7 +108,7 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
key_nice = 'Estimated effort to review [1-5]'
if 'security concerns' in key_nice.lower():
value = emphasize_header(value.strip())
- markdown_text += f" {emoji} {key_nice} | \n\n{value}\n\n |
\n"
+ markdown_text += f" {emoji} {key_nice} | \n{value}\n\n |
\n"
elif 'can be split' in key_nice.lower():
markdown_text += process_can_be_split(emoji, value)
elif 'key issues to review' in key_nice.lower():
@@ -124,15 +124,17 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
if not issue:
continue
issue = emphasize_header(issue)
+ issue = replace_code_tags(issue)
if i == 0:
- markdown_text += f"\n\n{issue} | \n"
+ markdown_text += f"\n{issue} | \n"
else:
- markdown_text += f"\n\n\n{issue} |
\n"
+ markdown_text += f"\n\n{issue} |
\n"
else:
value = emphasize_header(value.strip('-').strip())
- markdown_text += f" {emoji} {key_nice} | \n\n{value}\n\n |
\n"
+ value = replace_code_tags(value)
+ markdown_text += f" {emoji} {key_nice} | \n{value}\n\n |
\n"
else:
- markdown_text += f" {emoji} {key_nice} | \n\n{value}\n\n |
\n"
+ markdown_text += f" {emoji} {key_nice} | \n{value}\n\n |
\n"
else:
if len(value.split()) > 1:
markdown_text += f"{emoji} **{key_nice}:**\n\n {value}\n\n"
diff --git a/pr_agent/git_providers/azuredevops_provider.py b/pr_agent/git_providers/azuredevops_provider.py
index ee3a116f..9578e0fb 100644
--- a/pr_agent/git_providers/azuredevops_provider.py
+++ b/pr_agent/git_providers/azuredevops_provider.py
@@ -2,6 +2,7 @@ import os
from typing import Optional, Tuple
from urllib.parse import urlparse
+from ..algo.file_filter import filter_ignored
from ..log import get_logger
from ..algo.language_handler import is_valid_file
from ..algo.utils import clip_tokens, find_line_number_of_relevant_line_in_file, load_large_diff
@@ -284,8 +285,20 @@ class AzureDevopsProvider(GitProvider):
#
# diffs = list(set(diffs))
+ diffs_original = diffs
+ diffs = filter_ignored(diffs_original, 'azure')
+ if diffs_original != diffs:
+ try:
+ get_logger().info(f"Filtered out [ignore] files for pull request:", extra=
+ {"files": diffs_original, # diffs is just a list of names
+ "filtered_files": diffs})
+ except Exception:
+ pass
+
+ invalid_files_names = []
for file in diffs:
if not is_valid_file(file):
+ invalid_files_names.append(file)
continue
version = GitVersionDescriptor(
@@ -350,6 +363,8 @@ class AzureDevopsProvider(GitProvider):
edit_type=edit_type,
)
)
+ get_logger().info(f"Invalid files: {invalid_files_names}")
+
self.diff_files = diff_files
return diff_files
except Exception as e:
diff --git a/pr_agent/git_providers/bitbucket_provider.py b/pr_agent/git_providers/bitbucket_provider.py
index 27a88a76..4a0ce49a 100644
--- a/pr_agent/git_providers/bitbucket_provider.py
+++ b/pr_agent/git_providers/bitbucket_provider.py
@@ -7,6 +7,8 @@ from atlassian.bitbucket import Cloud
from starlette_context import context
from pr_agent.algo.types import FilePatchInfo, EDIT_TYPE
+from ..algo.file_filter import filter_ignored
+from ..algo.language_handler import is_valid_file
from ..algo.utils import find_line_number_of_relevant_line_in_file
from ..config_loader import get_settings
from ..log import get_logger
@@ -122,13 +124,30 @@ class BitbucketProvider(GitProvider):
if self.diff_files:
return self.diff_files
- diffs = self.pr.diffstat()
+ diffs_original = list(self.pr.diffstat())
+ diffs = filter_ignored(diffs_original, 'bitbucket')
+ if diffs != diffs_original:
+ try:
+ names_original = [d.new.path for d in diffs_original]
+ names_filtered = [d.new.path for d in diffs]
+ get_logger().info(f"Filtered out [ignore] files for PR", extra={
+ 'original_files': names_original,
+ 'filtered_files': names_filtered
+ })
+ except Exception as e:
+ pass
+
diff_split = [
"diff --git%s" % x for x in self.pr.diff().split("diff --git") if x.strip()
]
+ invalid_files_names = []
diff_files = []
for index, diff in enumerate(diffs):
+ if not is_valid_file(diff.new.path):
+ invalid_files_names.append(diff.new.path)
+ continue
+
original_file_content_str = self._get_pr_file_content(
diff.old.get_data("links")
)
@@ -150,6 +169,9 @@ class BitbucketProvider(GitProvider):
file_patch_canonic_structure.edit_type = EDIT_TYPE.RENAMED
diff_files.append(file_patch_canonic_structure)
+ if invalid_files_names:
+ get_logger().info(f"Invalid file names: {invalid_files_names}")
+
self.diff_files = diff_files
return diff_files
diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py
index 36f15201..5e7601b3 100644
--- a/pr_agent/git_providers/github_provider.py
+++ b/pr_agent/git_providers/github_provider.py
@@ -8,6 +8,7 @@ from github import AppAuthentication, Auth, Github, GithubException
from retry import retry
from starlette_context import context
+from ..algo.file_filter import filter_ignored
from ..algo.language_handler import is_valid_file
from ..algo.utils import load_large_diff, clip_tokens, find_line_number_of_relevant_line_in_file
from ..config_loader import get_settings
@@ -106,19 +107,22 @@ class GithubProvider(GitProvider):
git_files = context.get("git_files", None)
if git_files:
return git_files
- self.git_files = self.pr.get_files()
+ self.git_files = list(self.pr.get_files()) # 'list' to hanlde pagination
context["git_files"] = self.git_files
return self.git_files
except Exception:
if not self.git_files:
- self.git_files = self.pr.get_files()
+ self.git_files = list(self.pr.get_files())
return self.git_files
def get_num_of_files(self):
- if self.git_files:
+ if hasattr(self.git_files, "totalCount"):
return self.git_files.totalCount
else:
- return -1
+ try:
+ return len(self.git_files)
+ except Exception as e:
+ return -1
@retry(exceptions=RateLimitExceeded,
tries=get_settings().github.ratelimit_retries, delay=2, backoff=2, jitter=(1, 3))
@@ -142,12 +146,24 @@ class GithubProvider(GitProvider):
if self.diff_files:
return self.diff_files
- files = self.get_files()
- diff_files = []
+ # filter files using [ignore] patterns
+ files_original = self.get_files()
+ files = filter_ignored(files_original)
+ if files_original != files:
+ try:
+ names_original = [file.filename for file in files_original]
+ names_new = [file.filename for file in files]
+ get_logger().info(f"Filtered out [ignore] files for pull request:", extra=
+ {"files": names_original,
+ "filtered_files": names_new})
+ except Exception:
+ pass
+ diff_files = []
+ invalid_files_names = []
for file in files:
if not is_valid_file(file.filename):
- get_logger().info(f"Skipping a non-code file: {file.filename}")
+ invalid_files_names.append(file.filename)
continue
new_file_content_str = self._get_pr_file_content(file, self.pr.head.sha) # communication with GitHub
@@ -183,6 +199,8 @@ class GithubProvider(GitProvider):
num_plus_lines=num_plus_lines,
num_minus_lines=num_minus_lines,)
diff_files.append(file_patch_canonical_structure)
+ if invalid_files_names:
+ get_logger().info(f"Filtered out files with invalid extensions: {invalid_files_names}")
self.diff_files = diff_files
try:
diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py
index ad18ba1d..766a6743 100644
--- a/pr_agent/git_providers/gitlab_provider.py
+++ b/pr_agent/git_providers/gitlab_provider.py
@@ -6,6 +6,7 @@ from urllib.parse import urlparse
import gitlab
from gitlab import GitlabGetError
+from ..algo.file_filter import filter_ignored
from ..algo.language_handler import is_valid_file
from ..algo.utils import load_large_diff, clip_tokens, find_line_number_of_relevant_line_in_file
from ..config_loader import get_settings
@@ -84,53 +85,71 @@ class GitLabProvider(GitProvider):
if self.diff_files:
return self.diff_files
- diffs = self.mr.changes()['changes']
+ # filter files using [ignore] patterns
+ diffs_original = self.mr.changes()['changes']
+ diffs = filter_ignored(diffs_original, 'gitlab')
+ if diffs != diffs_original:
+ try:
+ names_original = [diff['new_path'] for diff in diffs_original]
+ names_filtered = [diff['new_path'] for diff in diffs]
+ get_logger().info(f"Filtered out [ignore] files for merge request {self.id_mr}", extra={
+ 'original_files': names_original,
+ 'filtered_files': names_filtered
+ })
+ except Exception as e:
+ pass
+
diff_files = []
+ invalid_files_names = []
for diff in diffs:
- if is_valid_file(diff['new_path']):
- original_file_content_str = self.get_pr_file_content(diff['old_path'], self.mr.diff_refs['base_sha'])
- new_file_content_str = self.get_pr_file_content(diff['new_path'], self.mr.diff_refs['head_sha'])
+ if not is_valid_file(diff['new_path']):
+ invalid_files_names.append(diff['new_path'])
+ continue
- try:
- if isinstance(original_file_content_str, bytes):
- original_file_content_str = bytes.decode(original_file_content_str, 'utf-8')
- if isinstance(new_file_content_str, bytes):
- new_file_content_str = bytes.decode(new_file_content_str, 'utf-8')
- except UnicodeDecodeError:
- get_logger().warning(
- f"Cannot decode file {diff['old_path']} or {diff['new_path']} in merge request {self.id_mr}")
+ original_file_content_str = self.get_pr_file_content(diff['old_path'], self.mr.diff_refs['base_sha'])
+ new_file_content_str = self.get_pr_file_content(diff['new_path'], self.mr.diff_refs['head_sha'])
+ try:
+ if isinstance(original_file_content_str, bytes):
+ original_file_content_str = bytes.decode(original_file_content_str, 'utf-8')
+ if isinstance(new_file_content_str, bytes):
+ new_file_content_str = bytes.decode(new_file_content_str, 'utf-8')
+ except UnicodeDecodeError:
+ get_logger().warning(
+ f"Cannot decode file {diff['old_path']} or {diff['new_path']} in merge request {self.id_mr}")
- edit_type = EDIT_TYPE.MODIFIED
- if diff['new_file']:
- edit_type = EDIT_TYPE.ADDED
- elif diff['deleted_file']:
- edit_type = EDIT_TYPE.DELETED
- elif diff['renamed_file']:
- edit_type = EDIT_TYPE.RENAMED
+ edit_type = EDIT_TYPE.MODIFIED
+ if diff['new_file']:
+ edit_type = EDIT_TYPE.ADDED
+ elif diff['deleted_file']:
+ edit_type = EDIT_TYPE.DELETED
+ elif diff['renamed_file']:
+ edit_type = EDIT_TYPE.RENAMED
- filename = diff['new_path']
- patch = diff['diff']
- if not patch:
- patch = load_large_diff(filename, new_file_content_str, original_file_content_str)
+ filename = diff['new_path']
+ patch = diff['diff']
+ if not patch:
+ patch = load_large_diff(filename, new_file_content_str, original_file_content_str)
- # count number of lines added and removed
- patch_lines = patch.splitlines(keepends=True)
- num_plus_lines = len([line for line in patch_lines if line.startswith('+')])
- num_minus_lines = len([line for line in patch_lines if line.startswith('-')])
- diff_files.append(
- FilePatchInfo(original_file_content_str, new_file_content_str,
- patch=patch,
- filename=filename,
- edit_type=edit_type,
- old_filename=None if diff['old_path'] == diff['new_path'] else diff['old_path'],
- num_plus_lines=num_plus_lines,
- num_minus_lines=num_minus_lines, ))
+ # count number of lines added and removed
+ patch_lines = patch.splitlines(keepends=True)
+ num_plus_lines = len([line for line in patch_lines if line.startswith('+')])
+ num_minus_lines = len([line for line in patch_lines if line.startswith('-')])
+ diff_files.append(
+ FilePatchInfo(original_file_content_str, new_file_content_str,
+ patch=patch,
+ filename=filename,
+ edit_type=edit_type,
+ old_filename=None if diff['old_path'] == diff['new_path'] else diff['old_path'],
+ num_plus_lines=num_plus_lines,
+ num_minus_lines=num_minus_lines, ))
+ if invalid_files_names:
+ get_logger().info(f"Filtered out files with invalid extensions: {invalid_files_names}")
self.diff_files = diff_files
return diff_files
- def get_files(self):
+ def get_files(self) -> list:
if not self.git_files:
self.git_files = [change['new_path'] for change in self.mr.changes()['changes']]
return self.git_files
diff --git a/pr_agent/settings/ignore.toml b/pr_agent/settings/ignore.toml
index 429d3887..bc847cfc 100644
--- a/pr_agent/settings/ignore.toml
+++ b/pr_agent/settings/ignore.toml
@@ -8,4 +8,5 @@ glob = [
regex = [
# Ignore files and directories matching these regex patterns.
# See https://learnbyexample.github.io/python-regex-cheatsheet/
+ # for example: regex = ['.*\.toml$']
]
diff --git a/tests/unittest/test_convert_to_markdown.py b/tests/unittest/test_convert_to_markdown.py
index e4918beb..d701efb2 100644
--- a/tests/unittest/test_convert_to_markdown.py
+++ b/tests/unittest/test_convert_to_markdown.py
@@ -52,7 +52,7 @@ class TestConvertToMarkdown:
'suggestion': "Consider raising an exception or logging a warning when 'pr_url' attribute is not found. This can help in debugging issues related to the absence of 'pr_url' in instances where it's expected. [important]\n",
'relevant_line': '[return ""](https://github.com/Codium-ai/pr-agent-pro/pull/102/files#diff-52d45f12b836f77ed1aef86e972e65404634ea4e2a6083fb71a9b0f9bb9e062fR199)'}]}
- expected_output = '## PR Reviewer Guide ๐\n\n\n\n
โฑ๏ธ Estimated effort to review [1-5] | \n\n1, because the changes are minimal and straightforward, focusing on a single functionality addition.\n\n\n |
\n ๐งช Relevant tests | \n\nNo\n\n\n |
\n โก Possible issues | \n\nNo\n\n\n |
\n ๐ Security concerns | \n\nNo\n\n |
\n
\n\n\n Code feedback:
\n\n
relevant file | pr_agent/git_providers/git_provider.py\n |
suggestion | \n\n\n\nConsider raising an exception or logging a warning when \'pr_url\' attribute is not found. This can help in debugging issues related to the absence of \'pr_url\' in instances where it\'s expected. [important]\n\n\n |
relevant line | return "" |
\n\n '
+ expected_output = '## PR Reviewer Guide ๐\n\n\n โฑ๏ธ Estimated effort to review [1-5] | \n1, because the changes are minimal and straightforward, focusing on a single functionality addition.\n\n\n |
\n ๐งช Relevant tests | \nNo\n\n\n |
\n โก Possible issues | \nNo\n\n\n |
\n ๐ Security concerns | \nNo\n\n |
\n
\n\n\n Code feedback:
\n\n
relevant file | pr_agent/git_providers/git_provider.py\n |
suggestion | \n\n\n\nConsider raising an exception or logging a warning when \'pr_url\' attribute is not found. This can help in debugging issues related to the absence of \'pr_url\' in instances where it\'s expected. [important]\n\n\n |
relevant line | return "" |
\n\n '
assert convert_to_markdown(input_data).strip() == expected_output.strip()