diff --git a/pr_agent/algo/language_handler.py b/pr_agent/algo/language_handler.py index db99d20a..ef679d84 100644 --- a/pr_agent/algo/language_handler.py +++ b/pr_agent/algo/language_handler.py @@ -64,7 +64,11 @@ bad_extensions = [ def filter_bad_extensions(files): - return [f for f in files if f.filename.split('.')[-1] not in bad_extensions] + return [f for f in files if is_valid_file(f.filename)] + + +def is_valid_file(filename): + return filename.split('.')[-1] not in bad_extensions def sort_files_by_main_languages(languages: Dict, files: list): diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 23d2b605..fea1ae69 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -8,6 +8,7 @@ from github import AppAuthentication, Github, Auth from pr_agent.config_loader import settings from .git_provider import FilePatchInfo, GitProvider +from ..algo.language_handler import is_valid_file class GithubProvider(GitProvider): @@ -37,9 +38,10 @@ class GithubProvider(GitProvider): files = self.pr.get_files() diff_files = [] for file in files: - original_file_content_str = self._get_pr_file_content(file, self.pr.base.sha) - new_file_content_str = self._get_pr_file_content(file, self.pr.head.sha) - diff_files.append(FilePatchInfo(original_file_content_str, new_file_content_str, file.patch, file.filename)) + if is_valid_file(file.filename): + original_file_content_str = self._get_pr_file_content(file, self.pr.base.sha) + new_file_content_str = self._get_pr_file_content(file, self.pr.head.sha) + diff_files.append(FilePatchInfo(original_file_content_str, new_file_content_str, file.patch, file.filename)) self.diff_files = diff_files return diff_files diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index 25911825..4cc8e9e0 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -9,6 +9,7 @@ from gitlab import GitlabGetError from pr_agent.config_loader import settings from .git_provider import EDIT_TYPE, FilePatchInfo, GitProvider +from ..algo.language_handler import is_valid_file class GitLabProvider(GitProvider): @@ -59,27 +60,28 @@ class GitLabProvider(GitProvider): diffs = self.mr.changes()['changes'] diff_files = [] for diff in diffs: - original_file_content_str = self._get_pr_file_content(diff['old_path'], self.mr.target_branch) - new_file_content_str = self._get_pr_file_content(diff['new_path'], self.mr.source_branch) - edit_type = EDIT_TYPE.MODIFIED - if diff['new_file']: - edit_type = EDIT_TYPE.ADDED - elif diff['deleted_file']: - edit_type = EDIT_TYPE.DELETED - elif diff['renamed_file']: - edit_type = EDIT_TYPE.RENAMED - try: - if isinstance(original_file_content_str, bytes): - original_file_content_str = bytes.decode(original_file_content_str, 'utf-8') - if isinstance(new_file_content_str, bytes): - new_file_content_str = bytes.decode(new_file_content_str, 'utf-8') - except UnicodeDecodeError: - logging.warning( - f"Cannot decode file {diff['old_path']} or {diff['new_path']} in merge request {self.id_mr}") - diff_files.append( - FilePatchInfo(original_file_content_str, new_file_content_str, diff['diff'], diff['new_path'], - edit_type=edit_type, - old_filename=None if diff['old_path'] == diff['new_path'] else diff['old_path'])) + if is_valid_file(diff['new_path']): + original_file_content_str = self._get_pr_file_content(diff['old_path'], self.mr.target_branch) + new_file_content_str = self._get_pr_file_content(diff['new_path'], self.mr.source_branch) + edit_type = EDIT_TYPE.MODIFIED + if diff['new_file']: + edit_type = EDIT_TYPE.ADDED + elif diff['deleted_file']: + edit_type = EDIT_TYPE.DELETED + elif diff['renamed_file']: + edit_type = EDIT_TYPE.RENAMED + try: + if isinstance(original_file_content_str, bytes): + original_file_content_str = bytes.decode(original_file_content_str, 'utf-8') + if isinstance(new_file_content_str, bytes): + new_file_content_str = bytes.decode(new_file_content_str, 'utf-8') + except UnicodeDecodeError: + logging.warning( + f"Cannot decode file {diff['old_path']} or {diff['new_path']} in merge request {self.id_mr}") + diff_files.append( + FilePatchInfo(original_file_content_str, new_file_content_str, diff['diff'], diff['new_path'], + edit_type=edit_type, + old_filename=None if diff['old_path'] == diff['new_path'] else diff['old_path'])) self.diff_files = diff_files return diff_files