Merge pull request #966 from Codium-ai/tr/ignore

Tr/ignore
This commit is contained in:
Tal
2024-06-15 19:48:42 +03:00
committed by GitHub
10 changed files with 146 additions and 56 deletions

View File

@ -10,12 +10,17 @@ To ignore files or directories, edit the **[ignore.toml](https://github.com/Codi
For example, to ignore Python files in a PR with online usage, comment on a PR:
`/review --ignore.glob=['*.py']`
To ignore Python files in all PRs, set in a configuration file:
To ignore Python files in all PRs using `glob` pattern, set in a configuration file:
```
[ignore]
glob = ['*.py']
```
And to ignore Python files in all PRs using `regex` pattern, set in a configuration file:
```
[regex]
regex = ['.*\.py$']
```
## Extra instructions
All PR-Agent tools have a parameter called `extra_instructions`, that enables to add free-text extra instructions. Example usage:

View File

@ -4,7 +4,7 @@ import re
from pr_agent.config_loader import get_settings
def filter_ignored(files):
def filter_ignored(files, platform = 'github'):
"""
Filter out files that match the ignore patterns.
"""
@ -28,8 +28,16 @@ def filter_ignored(files):
pass
# keep filenames that _don't_ match the ignore regex
if files and isinstance(files, list):
for r in compiled_patterns:
if platform == 'github':
files = [f for f in files if (f.filename and not r.match(f.filename))]
elif platform == 'bitbucket':
files = [f for f in files if (f.new.path and not r.match(f.new.path))]
elif platform == 'gitlab':
files = [f for f in files if (f['new_path'] and not r.match(f['new_path']))]
elif platform == 'azure':
files = [f for f in files if not r.match(f)]
except Exception as e:
print(f"Could not filter file list: {e}")

View File

@ -15,7 +15,7 @@ def filter_bad_extensions(files):
return [f for f in files if f.filename is not None and is_valid_file(f.filename)]
def is_valid_file(filename):
def is_valid_file(filename: str):
return filename.split('.')[-1] not in bad_extensions

View File

@ -91,7 +91,7 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
markdown_text += f"## Incremental PR Reviewer Guide 🔍\n\n"
markdown_text += f"⏮️ Review for commits since previous PR-Agent review {incremental_review}.\n\n"
if gfm_supported:
markdown_text += "<table>\n<tr>\n"
markdown_text += "<table>\n"
# markdown_text += """<td> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Feedback&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</td> <td></td></tr>"""
if not output_data or not output_data.get('review', {}):
@ -108,7 +108,7 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
key_nice = 'Estimated&nbsp;effort&nbsp;to&nbsp;review [1-5]'
if 'security concerns' in key_nice.lower():
value = emphasize_header(value.strip())
markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n\n{value}\n\n</td></tr>\n"
markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n{value}\n\n</td></tr>\n"
elif 'can be split' in key_nice.lower():
markdown_text += process_can_be_split(emoji, value)
elif 'key issues to review' in key_nice.lower():
@ -124,15 +124,17 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
if not issue:
continue
issue = emphasize_header(issue)
issue = replace_code_tags(issue)
if i == 0:
markdown_text += f"<td>\n\n{issue}</td></tr>\n"
markdown_text += f"<td>\n{issue}</td></tr>\n"
else:
markdown_text += f"<tr>\n<td>\n\n{issue}</td></tr>\n"
markdown_text += f"<tr>\n<td>\n{issue}</td></tr>\n"
else:
value = emphasize_header(value.strip('-').strip())
markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n\n{value}\n\n</td></tr>\n"
value = replace_code_tags(value)
markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n{value}\n\n</td></tr>\n"
else:
markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n\n{value}\n\n</td></tr>\n"
markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n{value}\n\n</td></tr>\n"
else:
if len(value.split()) > 1:
markdown_text += f"{emoji} **{key_nice}:**\n\n {value}\n\n"

View File

@ -2,6 +2,7 @@ import os
from typing import Optional, Tuple
from urllib.parse import urlparse
from ..algo.file_filter import filter_ignored
from ..log import get_logger
from ..algo.language_handler import is_valid_file
from ..algo.utils import clip_tokens, find_line_number_of_relevant_line_in_file, load_large_diff
@ -284,8 +285,20 @@ class AzureDevopsProvider(GitProvider):
#
# diffs = list(set(diffs))
diffs_original = diffs
diffs = filter_ignored(diffs_original, 'azure')
if diffs_original != diffs:
try:
get_logger().info(f"Filtered out [ignore] files for pull request:", extra=
{"files": diffs_original, # diffs is just a list of names
"filtered_files": diffs})
except Exception:
pass
invalid_files_names = []
for file in diffs:
if not is_valid_file(file):
invalid_files_names.append(file)
continue
version = GitVersionDescriptor(
@ -350,6 +363,8 @@ class AzureDevopsProvider(GitProvider):
edit_type=edit_type,
)
)
get_logger().info(f"Invalid files: {invalid_files_names}")
self.diff_files = diff_files
return diff_files
except Exception as e:

View File

@ -7,6 +7,8 @@ from atlassian.bitbucket import Cloud
from starlette_context import context
from pr_agent.algo.types import FilePatchInfo, EDIT_TYPE
from ..algo.file_filter import filter_ignored
from ..algo.language_handler import is_valid_file
from ..algo.utils import find_line_number_of_relevant_line_in_file
from ..config_loader import get_settings
from ..log import get_logger
@ -122,13 +124,30 @@ class BitbucketProvider(GitProvider):
if self.diff_files:
return self.diff_files
diffs = self.pr.diffstat()
diffs_original = list(self.pr.diffstat())
diffs = filter_ignored(diffs_original, 'bitbucket')
if diffs != diffs_original:
try:
names_original = [d.new.path for d in diffs_original]
names_filtered = [d.new.path for d in diffs]
get_logger().info(f"Filtered out [ignore] files for PR", extra={
'original_files': names_original,
'filtered_files': names_filtered
})
except Exception as e:
pass
diff_split = [
"diff --git%s" % x for x in self.pr.diff().split("diff --git") if x.strip()
]
invalid_files_names = []
diff_files = []
for index, diff in enumerate(diffs):
if not is_valid_file(diff.new.path):
invalid_files_names.append(diff.new.path)
continue
original_file_content_str = self._get_pr_file_content(
diff.old.get_data("links")
)
@ -150,6 +169,9 @@ class BitbucketProvider(GitProvider):
file_patch_canonic_structure.edit_type = EDIT_TYPE.RENAMED
diff_files.append(file_patch_canonic_structure)
if invalid_files_names:
get_logger().info(f"Invalid file names: {invalid_files_names}")
self.diff_files = diff_files
return diff_files

View File

@ -8,6 +8,7 @@ from github import AppAuthentication, Auth, Github, GithubException
from retry import retry
from starlette_context import context
from ..algo.file_filter import filter_ignored
from ..algo.language_handler import is_valid_file
from ..algo.utils import load_large_diff, clip_tokens, find_line_number_of_relevant_line_in_file
from ..config_loader import get_settings
@ -106,18 +107,21 @@ class GithubProvider(GitProvider):
git_files = context.get("git_files", None)
if git_files:
return git_files
self.git_files = self.pr.get_files()
self.git_files = list(self.pr.get_files()) # 'list' to hanlde pagination
context["git_files"] = self.git_files
return self.git_files
except Exception:
if not self.git_files:
self.git_files = self.pr.get_files()
self.git_files = list(self.pr.get_files())
return self.git_files
def get_num_of_files(self):
if self.git_files:
if hasattr(self.git_files, "totalCount"):
return self.git_files.totalCount
else:
try:
return len(self.git_files)
except Exception as e:
return -1
@retry(exceptions=RateLimitExceeded,
@ -142,12 +146,24 @@ class GithubProvider(GitProvider):
if self.diff_files:
return self.diff_files
files = self.get_files()
diff_files = []
# filter files using [ignore] patterns
files_original = self.get_files()
files = filter_ignored(files_original)
if files_original != files:
try:
names_original = [file.filename for file in files_original]
names_new = [file.filename for file in files]
get_logger().info(f"Filtered out [ignore] files for pull request:", extra=
{"files": names_original,
"filtered_files": names_new})
except Exception:
pass
diff_files = []
invalid_files_names = []
for file in files:
if not is_valid_file(file.filename):
get_logger().info(f"Skipping a non-code file: {file.filename}")
invalid_files_names.append(file.filename)
continue
new_file_content_str = self._get_pr_file_content(file, self.pr.head.sha) # communication with GitHub
@ -183,6 +199,8 @@ class GithubProvider(GitProvider):
num_plus_lines=num_plus_lines,
num_minus_lines=num_minus_lines,)
diff_files.append(file_patch_canonical_structure)
if invalid_files_names:
get_logger().info(f"Filtered out files with invalid extensions: {invalid_files_names}")
self.diff_files = diff_files
try:

View File

@ -6,6 +6,7 @@ from urllib.parse import urlparse
import gitlab
from gitlab import GitlabGetError
from ..algo.file_filter import filter_ignored
from ..algo.language_handler import is_valid_file
from ..algo.utils import load_large_diff, clip_tokens, find_line_number_of_relevant_line_in_file
from ..config_loader import get_settings
@ -84,13 +85,29 @@ class GitLabProvider(GitProvider):
if self.diff_files:
return self.diff_files
diffs = self.mr.changes()['changes']
# filter files using [ignore] patterns
diffs_original = self.mr.changes()['changes']
diffs = filter_ignored(diffs_original, 'gitlab')
if diffs != diffs_original:
try:
names_original = [diff['new_path'] for diff in diffs_original]
names_filtered = [diff['new_path'] for diff in diffs]
get_logger().info(f"Filtered out [ignore] files for merge request {self.id_mr}", extra={
'original_files': names_original,
'filtered_files': names_filtered
})
except Exception as e:
pass
diff_files = []
invalid_files_names = []
for diff in diffs:
if is_valid_file(diff['new_path']):
if not is_valid_file(diff['new_path']):
invalid_files_names.append(diff['new_path'])
continue
original_file_content_str = self.get_pr_file_content(diff['old_path'], self.mr.diff_refs['base_sha'])
new_file_content_str = self.get_pr_file_content(diff['new_path'], self.mr.diff_refs['head_sha'])
try:
if isinstance(original_file_content_str, bytes):
original_file_content_str = bytes.decode(original_file_content_str, 'utf-8')
@ -126,11 +143,13 @@ class GitLabProvider(GitProvider):
old_filename=None if diff['old_path'] == diff['new_path'] else diff['old_path'],
num_plus_lines=num_plus_lines,
num_minus_lines=num_minus_lines, ))
if invalid_files_names:
get_logger().info(f"Filtered out files with invalid extensions: {invalid_files_names}")
self.diff_files = diff_files
return diff_files
def get_files(self):
def get_files(self) -> list:
if not self.git_files:
self.git_files = [change['new_path'] for change in self.mr.changes()['changes']]
return self.git_files

View File

@ -8,4 +8,5 @@ glob = [
regex = [
# Ignore files and directories matching these regex patterns.
# See https://learnbyexample.github.io/python-regex-cheatsheet/
# for example: regex = ['.*\.toml$']
]

View File

@ -52,7 +52,7 @@ class TestConvertToMarkdown:
'suggestion': "Consider raising an exception or logging a warning when 'pr_url' attribute is not found. This can help in debugging issues related to the absence of 'pr_url' in instances where it's expected. [important]\n",
'relevant_line': '[return ""](https://github.com/Codium-ai/pr-agent-pro/pull/102/files#diff-52d45f12b836f77ed1aef86e972e65404634ea4e2a6083fb71a9b0f9bb9e062fR199)'}]}
expected_output = '## PR Reviewer Guide 🔍\n\n<table>\n<tr>\n<tr><td> ⏱️&nbsp;<strong>Estimated&nbsp;effort&nbsp;to&nbsp;review [1-5]</strong></td><td>\n\n1, because the changes are minimal and straightforward, focusing on a single functionality addition.\n\n\n</td></tr>\n<tr><td> 🧪&nbsp;<strong>Relevant tests</strong></td><td>\n\nNo\n\n\n</td></tr>\n<tr><td> ⚡&nbsp;<strong>Possible issues</strong></td><td>\n\nNo\n\n\n</td></tr>\n<tr><td> 🔒&nbsp;<strong>Security concerns</strong></td><td>\n\nNo\n\n</td></tr>\n</table>\n\n\n<details><summary> <strong>Code feedback:</strong></summary>\n\n<hr><table><tr><td>relevant file</td><td>pr_agent/git_providers/git_provider.py\n</td></tr><tr><td>suggestion &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</td><td>\n\n<strong>\n\nConsider raising an exception or logging a warning when \'pr_url\' attribute is not found. This can help in debugging issues related to the absence of \'pr_url\' in instances where it\'s expected. [important]\n\n</strong>\n</td></tr><tr><td>relevant line</td><td><a href=\'https://github.com/Codium-ai/pr-agent-pro/pull/102/files#diff-52d45f12b836f77ed1aef86e972e65404634ea4e2a6083fb71a9b0f9bb9e062fR199\'>return ""</a></td></tr></table><hr>\n\n</details>'
expected_output = '## PR Reviewer Guide 🔍\n\n<table>\n<tr><td> ⏱️&nbsp;<strong>Estimated&nbsp;effort&nbsp;to&nbsp;review [1-5]</strong></td><td>\n1, because the changes are minimal and straightforward, focusing on a single functionality addition.\n\n\n</td></tr>\n<tr><td> 🧪&nbsp;<strong>Relevant tests</strong></td><td>\nNo\n\n\n</td></tr>\n<tr><td> ⚡&nbsp;<strong>Possible issues</strong></td><td>\nNo\n\n\n</td></tr>\n<tr><td> 🔒&nbsp;<strong>Security concerns</strong></td><td>\nNo\n\n</td></tr>\n</table>\n\n\n<details><summary> <strong>Code feedback:</strong></summary>\n\n<hr><table><tr><td>relevant file</td><td>pr_agent/git_providers/git_provider.py\n</td></tr><tr><td>suggestion &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</td><td>\n\n<strong>\n\nConsider raising an exception or logging a warning when \'pr_url\' attribute is not found. This can help in debugging issues related to the absence of \'pr_url\' in instances where it\'s expected. [important]\n\n</strong>\n</td></tr><tr><td>relevant line</td><td><a href=\'https://github.com/Codium-ai/pr-agent-pro/pull/102/files#diff-52d45f12b836f77ed1aef86e972e65404634ea4e2a6083fb71a9b0f9bb9e062fR199\'>return ""</a></td></tr></table><hr>\n\n</details>'
assert convert_to_markdown(input_data).strip() == expected_output.strip()