From e4565f7106f64c10f43740916900e318d719264d Mon Sep 17 00:00:00 2001 From: mrT23 Date: Tue, 14 May 2024 21:43:14 +0300 Subject: [PATCH 1/3] Refactor Azure DevOps provider to use PR iterations for change detection, improving accuracy of diff file identification --- .../git_providers/azuredevops_provider.py | 76 ++++++++++++------- 1 file changed, 50 insertions(+), 26 deletions(-) diff --git a/pr_agent/git_providers/azuredevops_provider.py b/pr_agent/git_providers/azuredevops_provider.py index a710c254..048a2a95 100644 --- a/pr_agent/git_providers/azuredevops_provider.py +++ b/pr_agent/git_providers/azuredevops_provider.py @@ -26,6 +26,7 @@ try: CommentThread, GitVersionDescriptor, GitPullRequest, + GitPullRequestIterationChanges, ) except ImportError: AZURE_DEVOPS_AVAILABLE = False @@ -230,29 +231,56 @@ class AzureDevopsProvider(GitProvider): base_sha = self.pr.last_merge_target_commit head_sha = self.pr.last_merge_source_commit - commits = self.azure_devops_client.get_pull_request_commits( - project=self.workspace_slug, + # Get PR iterations + iterations = self.azure_devops_client.get_pull_request_iterations( repository_id=self.repo_slug, pull_request_id=self.pr_num, + project=self.workspace_slug ) + changes = [] + if iterations: + iteration_id = iterations[-1].id # Get the last iteration (most recent changes) + # Get changes for the iteration + changes: GitPullRequestIterationChanges = self.azure_devops_client.get_pull_request_iteration_changes( + repository_id=self.repo_slug, + pull_request_id=self.pr_num, + iteration_id=iteration_id, + project=self.workspace_slug + ) diff_files = [] diffs = [] diff_types = {} + if changes: + for change in changes.change_entries: + c = change.additional_properties['item'] + diffs.append(c['path']) + diff_types[c['path']] = change.additional_properties['changeType'] - for c in commits: - changes_obj = self.azure_devops_client.get_changes( - project=self.workspace_slug, - repository_id=self.repo_slug, - commit_id=c.commit_id, - ) - for i in changes_obj.changes: - if i["item"]["gitObjectType"] == "tree": - continue - diffs.append(i["item"]["path"]) - diff_types[i["item"]["path"]] = i["changeType"] + # wrong implementation - gets all the files that were changed in any commit in the PR + # commits = self.azure_devops_client.get_pull_request_commits( + # project=self.workspace_slug, + # repository_id=self.repo_slug, + # pull_request_id=self.pr_num, + # ) + # + # diff_files = [] + # diffs = [] + # diff_types = {} - diffs = list(set(diffs)) + # for c in commits: + # changes_obj = self.azure_devops_client.get_changes( + # project=self.workspace_slug, + # repository_id=self.repo_slug, + # commit_id=c.commit_id, + # ) + # for i in changes_obj.changes: + # if i["item"]["gitObjectType"] == "tree": + # continue + # diffs.append(i["item"]["path"]) + # diff_types[i["item"]["path"]] = i["changeType"] + # + # diffs = list(set(diffs)) for file in diffs: if not is_valid_file(file): @@ -273,12 +301,13 @@ class AzureDevopsProvider(GitProvider): new_file_content_str = new_file_content_str.content except Exception as error: - get_logger().error( - "Failed to retrieve new file content of %s at version %s. Error: %s", - file, - version, - str(error), - ) + get_logger().error(f"Failed to retrieve new file content of {file} at version {version}. Error: {str(error)}") + # get_logger().error( + # "Failed to retrieve new file content of %s at version %s. Error: %s", + # file, + # version, + # str(error), + # ) new_file_content_str = "" edit_type = EDIT_TYPE.MODIFIED @@ -303,12 +332,7 @@ class AzureDevopsProvider(GitProvider): ) original_file_content_str = original_file_content_str.content except Exception as error: - get_logger().error( - "Failed to retrieve original file content of %s at version %s. Error: %s", - file, - version, - str(error), - ) + get_logger().error(f"Failed to retrieve original file content of {file} at version {version}. Error: {str(error)}") original_file_content_str = "" patch = load_large_diff( From e56320540b8f696cbd74b45daf5e85d24718dbc5 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Wed, 15 May 2024 09:05:01 +0300 Subject: [PATCH 2/3] Refactor Azure DevOps provider to use PR iterations for change detection, improving accuracy of diff file identification --- pr_agent/algo/utils.py | 4 ++-- pr_agent/git_providers/azuredevops_provider.py | 8 ++++---- pr_agent/settings/configuration.toml | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index b877106d..e2f7f8ba 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -356,7 +356,7 @@ def convert_str_to_datetime(date_str): return datetime.strptime(date_str, datetime_format) -def load_large_diff(filename, new_file_content_str: str, original_file_content_str: str) -> str: +def load_large_diff(filename, new_file_content_str: str, original_file_content_str: str, show_warning: bool = True) -> str: """ Generate a patch for a modified file by comparing the original content of the file with the new content provided as input. @@ -375,7 +375,7 @@ def load_large_diff(filename, new_file_content_str: str, original_file_content_s try: diff = difflib.unified_diff(original_file_content_str.splitlines(keepends=True), new_file_content_str.splitlines(keepends=True)) - if get_settings().config.verbosity_level >= 2: + if get_settings().config.verbosity_level >= 2 and show_warning: get_logger().warning(f"File was modified, but no patch was found. Manually creating patch: {filename}.") patch = ''.join(diff) except Exception: diff --git a/pr_agent/git_providers/azuredevops_provider.py b/pr_agent/git_providers/azuredevops_provider.py index 048a2a95..1129bd8a 100644 --- a/pr_agent/git_providers/azuredevops_provider.py +++ b/pr_agent/git_providers/azuredevops_provider.py @@ -237,12 +237,12 @@ class AzureDevopsProvider(GitProvider): pull_request_id=self.pr_num, project=self.workspace_slug ) - changes = [] + changes = None if iterations: iteration_id = iterations[-1].id # Get the last iteration (most recent changes) # Get changes for the iteration - changes: GitPullRequestIterationChanges = self.azure_devops_client.get_pull_request_iteration_changes( + changes = self.azure_devops_client.get_pull_request_iteration_changes( repository_id=self.repo_slug, pull_request_id=self.pr_num, iteration_id=iteration_id, @@ -336,8 +336,8 @@ class AzureDevopsProvider(GitProvider): original_file_content_str = "" patch = load_large_diff( - file, new_file_content_str, original_file_content_str - ) + file, new_file_content_str, original_file_content_str, show_warning=False + ).rstrip() diff_files.append( FilePatchInfo( diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index b84bce61..691faec8 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -2,10 +2,10 @@ model="gpt-4-turbo-2024-04-09" model_turbo="gpt-4-turbo-2024-04-09" fallback_models=["gpt-4-0125-preview"] -git_provider="github" -publish_output=true +git_provider="azure" +publish_output=false publish_output_progress=true -verbosity_level=0 # 0,1,2 +verbosity_level=2 # 0,1,2 use_extra_bad_extensions=false use_wiki_settings_file=true use_repo_settings_file=true @@ -80,7 +80,7 @@ enable_help_text=false [pr_code_suggestions] # /improve # max_context_tokens=8000 num_code_suggestions=4 -commitable_code_suggestions = false +commitable_code_suggestions = true extra_instructions = "" rank_suggestions = false enable_help_text=false From 4231a84e7a6d3b10b5dc15ea89c79eeeb81420c5 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Wed, 15 May 2024 09:15:12 +0300 Subject: [PATCH 3/3] Refactor Azure DevOps provider to use PR iterations for change detection, improving accuracy of diff file identification --- pr_agent/git_providers/azuredevops_provider.py | 8 +++++--- pr_agent/settings/configuration.toml | 8 ++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pr_agent/git_providers/azuredevops_provider.py b/pr_agent/git_providers/azuredevops_provider.py index 1129bd8a..ee3a116f 100644 --- a/pr_agent/git_providers/azuredevops_provider.py +++ b/pr_agent/git_providers/azuredevops_provider.py @@ -253,9 +253,11 @@ class AzureDevopsProvider(GitProvider): diff_types = {} if changes: for change in changes.change_entries: - c = change.additional_properties['item'] - diffs.append(c['path']) - diff_types[c['path']] = change.additional_properties['changeType'] + item = change.additional_properties.get('item', {}) + path = item.get('path', None) + if path: + diffs.append(path) + diff_types[path] = change.additional_properties.get('changeType', 'Unknown') # wrong implementation - gets all the files that were changed in any commit in the PR # commits = self.azure_devops_client.get_pull_request_commits( diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 691faec8..b84bce61 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -2,10 +2,10 @@ model="gpt-4-turbo-2024-04-09" model_turbo="gpt-4-turbo-2024-04-09" fallback_models=["gpt-4-0125-preview"] -git_provider="azure" -publish_output=false +git_provider="github" +publish_output=true publish_output_progress=true -verbosity_level=2 # 0,1,2 +verbosity_level=0 # 0,1,2 use_extra_bad_extensions=false use_wiki_settings_file=true use_repo_settings_file=true @@ -80,7 +80,7 @@ enable_help_text=false [pr_code_suggestions] # /improve # max_context_tokens=8000 num_code_suggestions=4 -commitable_code_suggestions = true +commitable_code_suggestions = false extra_instructions = "" rank_suggestions = false enable_help_text=false