mirror of
https://github.com/qodo-ai/pr-agent.git
synced 2025-07-04 12:50:38 +08:00
@ -10,12 +10,17 @@ To ignore files or directories, edit the **[ignore.toml](https://github.com/Codi
|
||||
For example, to ignore Python files in a PR with online usage, comment on a PR:
|
||||
`/review --ignore.glob=['*.py']`
|
||||
|
||||
To ignore Python files in all PRs, set in a configuration file:
|
||||
To ignore Python files in all PRs using `glob` pattern, set in a configuration file:
|
||||
```
|
||||
[ignore]
|
||||
glob = ['*.py']
|
||||
```
|
||||
|
||||
And to ignore Python files in all PRs using `regex` pattern, set in a configuration file:
|
||||
```
|
||||
[regex]
|
||||
regex = ['.*\.py$']
|
||||
```
|
||||
## Extra instructions
|
||||
|
||||
All PR-Agent tools have a parameter called `extra_instructions`, that enables to add free-text extra instructions. Example usage:
|
||||
|
@ -4,7 +4,7 @@ import re
|
||||
from pr_agent.config_loader import get_settings
|
||||
|
||||
|
||||
def filter_ignored(files):
|
||||
def filter_ignored(files, platform = 'github'):
|
||||
"""
|
||||
Filter out files that match the ignore patterns.
|
||||
"""
|
||||
@ -28,8 +28,16 @@ def filter_ignored(files):
|
||||
pass
|
||||
|
||||
# keep filenames that _don't_ match the ignore regex
|
||||
if files and isinstance(files, list):
|
||||
for r in compiled_patterns:
|
||||
if platform == 'github':
|
||||
files = [f for f in files if (f.filename and not r.match(f.filename))]
|
||||
elif platform == 'bitbucket':
|
||||
files = [f for f in files if (f.new.path and not r.match(f.new.path))]
|
||||
elif platform == 'gitlab':
|
||||
files = [f for f in files if (f['new_path'] and not r.match(f['new_path']))]
|
||||
elif platform == 'azure':
|
||||
files = [f for f in files if not r.match(f)]
|
||||
|
||||
except Exception as e:
|
||||
print(f"Could not filter file list: {e}")
|
||||
|
@ -15,7 +15,7 @@ def filter_bad_extensions(files):
|
||||
return [f for f in files if f.filename is not None and is_valid_file(f.filename)]
|
||||
|
||||
|
||||
def is_valid_file(filename):
|
||||
def is_valid_file(filename: str):
|
||||
return filename.split('.')[-1] not in bad_extensions
|
||||
|
||||
|
||||
|
@ -91,7 +91,7 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
|
||||
markdown_text += f"## Incremental PR Reviewer Guide 🔍\n\n"
|
||||
markdown_text += f"⏮️ Review for commits since previous PR-Agent review {incremental_review}.\n\n"
|
||||
if gfm_supported:
|
||||
markdown_text += "<table>\n<tr>\n"
|
||||
markdown_text += "<table>\n"
|
||||
# markdown_text += """<td> Feedback </td> <td></td></tr>"""
|
||||
|
||||
if not output_data or not output_data.get('review', {}):
|
||||
@ -108,7 +108,7 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
|
||||
key_nice = 'Estimated effort to review [1-5]'
|
||||
if 'security concerns' in key_nice.lower():
|
||||
value = emphasize_header(value.strip())
|
||||
markdown_text += f"<tr><td> {emoji} <strong>{key_nice}</strong></td><td>\n\n{value}\n\n</td></tr>\n"
|
||||
markdown_text += f"<tr><td> {emoji} <strong>{key_nice}</strong></td><td>\n{value}\n\n</td></tr>\n"
|
||||
elif 'can be split' in key_nice.lower():
|
||||
markdown_text += process_can_be_split(emoji, value)
|
||||
elif 'key issues to review' in key_nice.lower():
|
||||
@ -124,15 +124,17 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
|
||||
if not issue:
|
||||
continue
|
||||
issue = emphasize_header(issue)
|
||||
issue = replace_code_tags(issue)
|
||||
if i == 0:
|
||||
markdown_text += f"<td>\n\n{issue}</td></tr>\n"
|
||||
markdown_text += f"<td>\n{issue}</td></tr>\n"
|
||||
else:
|
||||
markdown_text += f"<tr>\n<td>\n\n{issue}</td></tr>\n"
|
||||
markdown_text += f"<tr>\n<td>\n{issue}</td></tr>\n"
|
||||
else:
|
||||
value = emphasize_header(value.strip('-').strip())
|
||||
markdown_text += f"<tr><td> {emoji} <strong>{key_nice}</strong></td><td>\n\n{value}\n\n</td></tr>\n"
|
||||
value = replace_code_tags(value)
|
||||
markdown_text += f"<tr><td> {emoji} <strong>{key_nice}</strong></td><td>\n{value}\n\n</td></tr>\n"
|
||||
else:
|
||||
markdown_text += f"<tr><td> {emoji} <strong>{key_nice}</strong></td><td>\n\n{value}\n\n</td></tr>\n"
|
||||
markdown_text += f"<tr><td> {emoji} <strong>{key_nice}</strong></td><td>\n{value}\n\n</td></tr>\n"
|
||||
else:
|
||||
if len(value.split()) > 1:
|
||||
markdown_text += f"{emoji} **{key_nice}:**\n\n {value}\n\n"
|
||||
|
@ -2,6 +2,7 @@ import os
|
||||
from typing import Optional, Tuple
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from ..algo.file_filter import filter_ignored
|
||||
from ..log import get_logger
|
||||
from ..algo.language_handler import is_valid_file
|
||||
from ..algo.utils import clip_tokens, find_line_number_of_relevant_line_in_file, load_large_diff
|
||||
@ -284,8 +285,20 @@ class AzureDevopsProvider(GitProvider):
|
||||
#
|
||||
# diffs = list(set(diffs))
|
||||
|
||||
diffs_original = diffs
|
||||
diffs = filter_ignored(diffs_original, 'azure')
|
||||
if diffs_original != diffs:
|
||||
try:
|
||||
get_logger().info(f"Filtered out [ignore] files for pull request:", extra=
|
||||
{"files": diffs_original, # diffs is just a list of names
|
||||
"filtered_files": diffs})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
invalid_files_names = []
|
||||
for file in diffs:
|
||||
if not is_valid_file(file):
|
||||
invalid_files_names.append(file)
|
||||
continue
|
||||
|
||||
version = GitVersionDescriptor(
|
||||
@ -350,6 +363,8 @@ class AzureDevopsProvider(GitProvider):
|
||||
edit_type=edit_type,
|
||||
)
|
||||
)
|
||||
get_logger().info(f"Invalid files: {invalid_files_names}")
|
||||
|
||||
self.diff_files = diff_files
|
||||
return diff_files
|
||||
except Exception as e:
|
||||
|
@ -7,6 +7,8 @@ from atlassian.bitbucket import Cloud
|
||||
from starlette_context import context
|
||||
|
||||
from pr_agent.algo.types import FilePatchInfo, EDIT_TYPE
|
||||
from ..algo.file_filter import filter_ignored
|
||||
from ..algo.language_handler import is_valid_file
|
||||
from ..algo.utils import find_line_number_of_relevant_line_in_file
|
||||
from ..config_loader import get_settings
|
||||
from ..log import get_logger
|
||||
@ -122,13 +124,30 @@ class BitbucketProvider(GitProvider):
|
||||
if self.diff_files:
|
||||
return self.diff_files
|
||||
|
||||
diffs = self.pr.diffstat()
|
||||
diffs_original = list(self.pr.diffstat())
|
||||
diffs = filter_ignored(diffs_original, 'bitbucket')
|
||||
if diffs != diffs_original:
|
||||
try:
|
||||
names_original = [d.new.path for d in diffs_original]
|
||||
names_filtered = [d.new.path for d in diffs]
|
||||
get_logger().info(f"Filtered out [ignore] files for PR", extra={
|
||||
'original_files': names_original,
|
||||
'filtered_files': names_filtered
|
||||
})
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
diff_split = [
|
||||
"diff --git%s" % x for x in self.pr.diff().split("diff --git") if x.strip()
|
||||
]
|
||||
|
||||
invalid_files_names = []
|
||||
diff_files = []
|
||||
for index, diff in enumerate(diffs):
|
||||
if not is_valid_file(diff.new.path):
|
||||
invalid_files_names.append(diff.new.path)
|
||||
continue
|
||||
|
||||
original_file_content_str = self._get_pr_file_content(
|
||||
diff.old.get_data("links")
|
||||
)
|
||||
@ -150,6 +169,9 @@ class BitbucketProvider(GitProvider):
|
||||
file_patch_canonic_structure.edit_type = EDIT_TYPE.RENAMED
|
||||
diff_files.append(file_patch_canonic_structure)
|
||||
|
||||
if invalid_files_names:
|
||||
get_logger().info(f"Invalid file names: {invalid_files_names}")
|
||||
|
||||
|
||||
self.diff_files = diff_files
|
||||
return diff_files
|
||||
|
@ -8,6 +8,7 @@ from github import AppAuthentication, Auth, Github, GithubException
|
||||
from retry import retry
|
||||
from starlette_context import context
|
||||
|
||||
from ..algo.file_filter import filter_ignored
|
||||
from ..algo.language_handler import is_valid_file
|
||||
from ..algo.utils import load_large_diff, clip_tokens, find_line_number_of_relevant_line_in_file
|
||||
from ..config_loader import get_settings
|
||||
@ -106,18 +107,21 @@ class GithubProvider(GitProvider):
|
||||
git_files = context.get("git_files", None)
|
||||
if git_files:
|
||||
return git_files
|
||||
self.git_files = self.pr.get_files()
|
||||
self.git_files = list(self.pr.get_files()) # 'list' to hanlde pagination
|
||||
context["git_files"] = self.git_files
|
||||
return self.git_files
|
||||
except Exception:
|
||||
if not self.git_files:
|
||||
self.git_files = self.pr.get_files()
|
||||
self.git_files = list(self.pr.get_files())
|
||||
return self.git_files
|
||||
|
||||
def get_num_of_files(self):
|
||||
if self.git_files:
|
||||
if hasattr(self.git_files, "totalCount"):
|
||||
return self.git_files.totalCount
|
||||
else:
|
||||
try:
|
||||
return len(self.git_files)
|
||||
except Exception as e:
|
||||
return -1
|
||||
|
||||
@retry(exceptions=RateLimitExceeded,
|
||||
@ -142,12 +146,24 @@ class GithubProvider(GitProvider):
|
||||
if self.diff_files:
|
||||
return self.diff_files
|
||||
|
||||
files = self.get_files()
|
||||
diff_files = []
|
||||
# filter files using [ignore] patterns
|
||||
files_original = self.get_files()
|
||||
files = filter_ignored(files_original)
|
||||
if files_original != files:
|
||||
try:
|
||||
names_original = [file.filename for file in files_original]
|
||||
names_new = [file.filename for file in files]
|
||||
get_logger().info(f"Filtered out [ignore] files for pull request:", extra=
|
||||
{"files": names_original,
|
||||
"filtered_files": names_new})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
diff_files = []
|
||||
invalid_files_names = []
|
||||
for file in files:
|
||||
if not is_valid_file(file.filename):
|
||||
get_logger().info(f"Skipping a non-code file: {file.filename}")
|
||||
invalid_files_names.append(file.filename)
|
||||
continue
|
||||
|
||||
new_file_content_str = self._get_pr_file_content(file, self.pr.head.sha) # communication with GitHub
|
||||
@ -183,6 +199,8 @@ class GithubProvider(GitProvider):
|
||||
num_plus_lines=num_plus_lines,
|
||||
num_minus_lines=num_minus_lines,)
|
||||
diff_files.append(file_patch_canonical_structure)
|
||||
if invalid_files_names:
|
||||
get_logger().info(f"Filtered out files with invalid extensions: {invalid_files_names}")
|
||||
|
||||
self.diff_files = diff_files
|
||||
try:
|
||||
|
@ -6,6 +6,7 @@ from urllib.parse import urlparse
|
||||
import gitlab
|
||||
from gitlab import GitlabGetError
|
||||
|
||||
from ..algo.file_filter import filter_ignored
|
||||
from ..algo.language_handler import is_valid_file
|
||||
from ..algo.utils import load_large_diff, clip_tokens, find_line_number_of_relevant_line_in_file
|
||||
from ..config_loader import get_settings
|
||||
@ -84,13 +85,29 @@ class GitLabProvider(GitProvider):
|
||||
if self.diff_files:
|
||||
return self.diff_files
|
||||
|
||||
diffs = self.mr.changes()['changes']
|
||||
# filter files using [ignore] patterns
|
||||
diffs_original = self.mr.changes()['changes']
|
||||
diffs = filter_ignored(diffs_original, 'gitlab')
|
||||
if diffs != diffs_original:
|
||||
try:
|
||||
names_original = [diff['new_path'] for diff in diffs_original]
|
||||
names_filtered = [diff['new_path'] for diff in diffs]
|
||||
get_logger().info(f"Filtered out [ignore] files for merge request {self.id_mr}", extra={
|
||||
'original_files': names_original,
|
||||
'filtered_files': names_filtered
|
||||
})
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
diff_files = []
|
||||
invalid_files_names = []
|
||||
for diff in diffs:
|
||||
if is_valid_file(diff['new_path']):
|
||||
if not is_valid_file(diff['new_path']):
|
||||
invalid_files_names.append(diff['new_path'])
|
||||
continue
|
||||
|
||||
original_file_content_str = self.get_pr_file_content(diff['old_path'], self.mr.diff_refs['base_sha'])
|
||||
new_file_content_str = self.get_pr_file_content(diff['new_path'], self.mr.diff_refs['head_sha'])
|
||||
|
||||
try:
|
||||
if isinstance(original_file_content_str, bytes):
|
||||
original_file_content_str = bytes.decode(original_file_content_str, 'utf-8')
|
||||
@ -126,11 +143,13 @@ class GitLabProvider(GitProvider):
|
||||
old_filename=None if diff['old_path'] == diff['new_path'] else diff['old_path'],
|
||||
num_plus_lines=num_plus_lines,
|
||||
num_minus_lines=num_minus_lines, ))
|
||||
if invalid_files_names:
|
||||
get_logger().info(f"Filtered out files with invalid extensions: {invalid_files_names}")
|
||||
|
||||
self.diff_files = diff_files
|
||||
return diff_files
|
||||
|
||||
def get_files(self):
|
||||
def get_files(self) -> list:
|
||||
if not self.git_files:
|
||||
self.git_files = [change['new_path'] for change in self.mr.changes()['changes']]
|
||||
return self.git_files
|
||||
|
@ -8,4 +8,5 @@ glob = [
|
||||
regex = [
|
||||
# Ignore files and directories matching these regex patterns.
|
||||
# See https://learnbyexample.github.io/python-regex-cheatsheet/
|
||||
# for example: regex = ['.*\.toml$']
|
||||
]
|
||||
|
@ -52,7 +52,7 @@ class TestConvertToMarkdown:
|
||||
'suggestion': "Consider raising an exception or logging a warning when 'pr_url' attribute is not found. This can help in debugging issues related to the absence of 'pr_url' in instances where it's expected. [important]\n",
|
||||
'relevant_line': '[return ""](https://github.com/Codium-ai/pr-agent-pro/pull/102/files#diff-52d45f12b836f77ed1aef86e972e65404634ea4e2a6083fb71a9b0f9bb9e062fR199)'}]}
|
||||
|
||||
expected_output = '## PR Reviewer Guide 🔍\n\n<table>\n<tr>\n<tr><td> ⏱️ <strong>Estimated effort to review [1-5]</strong></td><td>\n\n1, because the changes are minimal and straightforward, focusing on a single functionality addition.\n\n\n</td></tr>\n<tr><td> 🧪 <strong>Relevant tests</strong></td><td>\n\nNo\n\n\n</td></tr>\n<tr><td> ⚡ <strong>Possible issues</strong></td><td>\n\nNo\n\n\n</td></tr>\n<tr><td> 🔒 <strong>Security concerns</strong></td><td>\n\nNo\n\n</td></tr>\n</table>\n\n\n<details><summary> <strong>Code feedback:</strong></summary>\n\n<hr><table><tr><td>relevant file</td><td>pr_agent/git_providers/git_provider.py\n</td></tr><tr><td>suggestion </td><td>\n\n<strong>\n\nConsider raising an exception or logging a warning when \'pr_url\' attribute is not found. This can help in debugging issues related to the absence of \'pr_url\' in instances where it\'s expected. [important]\n\n</strong>\n</td></tr><tr><td>relevant line</td><td><a href=\'https://github.com/Codium-ai/pr-agent-pro/pull/102/files#diff-52d45f12b836f77ed1aef86e972e65404634ea4e2a6083fb71a9b0f9bb9e062fR199\'>return ""</a></td></tr></table><hr>\n\n</details>'
|
||||
expected_output = '## PR Reviewer Guide 🔍\n\n<table>\n<tr><td> ⏱️ <strong>Estimated effort to review [1-5]</strong></td><td>\n1, because the changes are minimal and straightforward, focusing on a single functionality addition.\n\n\n</td></tr>\n<tr><td> 🧪 <strong>Relevant tests</strong></td><td>\nNo\n\n\n</td></tr>\n<tr><td> ⚡ <strong>Possible issues</strong></td><td>\nNo\n\n\n</td></tr>\n<tr><td> 🔒 <strong>Security concerns</strong></td><td>\nNo\n\n</td></tr>\n</table>\n\n\n<details><summary> <strong>Code feedback:</strong></summary>\n\n<hr><table><tr><td>relevant file</td><td>pr_agent/git_providers/git_provider.py\n</td></tr><tr><td>suggestion </td><td>\n\n<strong>\n\nConsider raising an exception or logging a warning when \'pr_url\' attribute is not found. This can help in debugging issues related to the absence of \'pr_url\' in instances where it\'s expected. [important]\n\n</strong>\n</td></tr><tr><td>relevant line</td><td><a href=\'https://github.com/Codium-ai/pr-agent-pro/pull/102/files#diff-52d45f12b836f77ed1aef86e972e65404634ea4e2a6083fb71a9b0f9bb9e062fR199\'>return ""</a></td></tr></table><hr>\n\n</details>'
|
||||
|
||||
assert convert_to_markdown(input_data).strip() == expected_output.strip()
|
||||
|
||||
|
Reference in New Issue
Block a user