test

Merge commit 'e878bbbe369c90433c1b261b5479d23c47734539' into hl/test_docstring
Merge pull request #449 from zmeir/patch-1
2025-07-21 04:50:39 +08:00 · 2023-11-20 00:42:29 +02:00 · 2023-11-15 10:37:31 +02:00 · 2023-11-14 22:09:59 -08:00 · 2023-11-14 20:31:01 +02:00 · 2023-11-13 14:41:14 +02:00
23 changed files with 243 additions and 64 deletions
--- a/INSTALL.md
+++ b/INSTALL.md
@ -1,5 +1,5 @@

-## Installation
+### Installation

 To get started with PR-Agent quickly, you first need to acquire two tokens:

--- a/RELEASE_NOTES.md
+++ b/RELEASE_NOTES.md
@ -1,3 +1,6 @@
+## Unreleased
+- review tool now posts persistent comments by default
+
 ## [Version 0.9] - 2023-10-29
 - codiumai/pr-agent:0.9
 - codiumai/pr-agent:0.9-github_app
--- a/Usage.md
+++ b/Usage.md
@ -173,7 +173,7 @@ push_commands = [
    "/auto_review -i --pr_reviewer.remove_previous_review_comment=true",
 ]
 ```
-The means that when new code is pused to the PR, the PR-Agent will run the `describe` and incremental `auto_review` tools.  
+The means that when new code is pushed to the PR, the PR-Agent will run the `describe` and incremental `auto_review` tools.  
 For the describe tool, the `add_original_user_description` and `keep_original_user_title` parameters will be set to true.  
 For the `auto_review` tool, it will run in incremental mode, and the `remove_previous_review_comment` parameter will be set to true.

--- a/docs/DESCRIBE.md
+++ b/docs/DESCRIBE.md
@ -27,6 +27,8 @@ Under the section 'pr_description', the [configuration file](./../pr_agent/setti

 - `extra_instructions`: Optional extra instructions to the tool. For example: "focus on the changes in the file X. Ignore change in ...".
 - To enable `custom labels`, apply the configuration changes described [here](./GENERATE_CUSTOM_LABELS.md#configuration-changes)
+- `enable_pr_type`: if set to false, it will not show the `PR type` as a text value in the description content. Default is true.
+  
 ### Markers template

 markers enable to easily integrate user's content and auto-generated content, with a template-like mechanism.
--- a/docs/REVIEW.md
+++ b/docs/REVIEW.md
@ -24,6 +24,8 @@ Under the section 'pr_reviewer', the [configuration file](./../pr_agent/settings
 - `num_code_suggestions`: number of code suggestions provided by the 'review' tool. Default is 4.
 - `inline_code_comments`: if set to true, the tool will publish the code suggestions as comments on the code diff. Default is false.
 - `automatic_review`: if set to false, no automatic reviews will be done. Default is true.
+- `remove_previous_review_comment`: if set to true, the tool will remove the previous review comment before adding a new one. Default is false.
+- `persistent_comment`: if set to true, the review comment will be persistent. Default is true.
 - `extra_instructions`: Optional extra instructions to the tool. For example: "focus on the changes in the file X. Ignore change in ...".
 - To enable `custom labels`, apply the configuration changes described [here](./GENERATE_CUSTOM_LABELS.md#configuration-changes) 
 ####  Incremental Mode
--- a/pr_agent/agent/pr_agent.py
+++ b/pr_agent/agent/pr_agent.py
@ -46,10 +46,13 @@ class PRAgent:
        apply_repo_settings(pr_url)

        # Then, apply user specific settings if exists
-        request = request.replace("'", "\\'")
-        lexer = shlex.shlex(request, posix=True)
-        lexer.whitespace_split = True
-        action, *args = list(lexer)
+        if isinstance(request, str):
+            request = request.replace("'", "\\'")
+            lexer = shlex.shlex(request, posix=True)
+            lexer.whitespace_split = True
+            action, *args = list(lexer)
+        else:
+            action, *args = request
        args = update_settings_from_args(args)

        action = action.lstrip("/").lower()
--- a/pr_agent/algo/init.py
+++ b/pr_agent/algo/init.py
@ -8,6 +8,7 @@ MAX_TOKENS = {
    'gpt-4': 8000,
    'gpt-4-0613': 8000,
    'gpt-4-32k': 32000,
+    'gpt-4-1106-preview': 128000, # 128K, but may be limited by config.max_model_tokens
    'claude-instant-1': 100000,
    'claude-2': 100000,
    'command-nightly': 4096,
--- a/pr_agent/algo/file_filter.py
+++ b/pr_agent/algo/file_filter.py
@ -23,7 +23,7 @@ def filter_ignored(files):

        # keep filenames that _don't_ match the ignore regex
        for r in compiled_patterns:
-            files = [f for f in files if not r.match(f.filename)]
+            files = [f for f in files if (f.filename and not r.match(f.filename))]

    except Exception as e:
        print(f"Could not filter file list: {e}")
--- a/pr_agent/algo/git_patch_processing.py
+++ b/pr_agent/algo/git_patch_processing.py
@ -3,6 +3,7 @@ from __future__ import annotations
 import re

 from pr_agent.config_loader import get_settings
+from pr_agent.git_providers.git_provider import EDIT_TYPE
 from pr_agent.log import get_logger


@ -115,7 +116,7 @@ def omit_deletion_hunks(patch_lines) -> str:


 def handle_patch_deletions(patch: str, original_file_content_str: str,
-                           new_file_content_str: str, file_name: str) -> str:
+                           new_file_content_str: str, file_name: str, edit_type: EDIT_TYPE = EDIT_TYPE.UNKNOWN) -> str:
    """
    Handle entire file or deletion patches.

@ -132,7 +133,7 @@ def handle_patch_deletions(patch: str, original_file_content_str: str,
        str: The modified patch with deletion hunks omitted.

    """
-    if not new_file_content_str:
+    if not new_file_content_str and edit_type != EDIT_TYPE.ADDED:
        # logic for handling deleted files - don't show patch, just show that the file was deleted
        if get_settings().config.verbosity_level > 0:
            get_logger().info(f"Processing file: {file_name}, minimizing deletion file")
--- a/pr_agent/algo/pr_processing.py
+++ b/pr_agent/algo/pr_processing.py
@ -7,18 +7,20 @@ from typing import Any, Callable, List, Tuple

 from github import RateLimitExceededException

-from pr_agent.algo import MAX_TOKENS
 from pr_agent.algo.git_patch_processing import convert_to_hunks_with_lines_numbers, extend_patch, handle_patch_deletions
 from pr_agent.algo.language_handler import sort_files_by_main_languages
 from pr_agent.algo.file_filter import filter_ignored
 from pr_agent.algo.token_handler import TokenHandler, get_token_encoder
+from pr_agent.algo.utils import get_max_tokens
 from pr_agent.config_loader import get_settings
-from pr_agent.git_providers.git_provider import FilePatchInfo, GitProvider
+from pr_agent.git_providers.git_provider import FilePatchInfo, GitProvider, EDIT_TYPE
 from pr_agent.log import get_logger

 DELETED_FILES_ = "Deleted files:\n"

-MORE_MODIFIED_FILES_ = "More modified files:\n"
+MORE_MODIFIED_FILES_ = "Additional modified files (insufficient token budget to process):\n"
+
+ADDED_FILES_ = "Additional added files (insufficient token budget to process):\n"

 OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD = 1000
 OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD = 600
@ -64,14 +66,17 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: s
        pr_languages, token_handler, add_line_numbers_to_hunks, patch_extra_lines=PATCH_EXTRA_LINES)

    # if we are under the limit, return the full diff
-    if total_tokens + OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD < MAX_TOKENS[model]:
+    if total_tokens + OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD < get_max_tokens(model):
        return "\n".join(patches_extended)

    # if we are over the limit, start pruning
-    patches_compressed, modified_file_names, deleted_file_names = \
+    patches_compressed, modified_file_names, deleted_file_names, added_file_names = \
        pr_generate_compressed_diff(pr_languages, token_handler, model, add_line_numbers_to_hunks)

    final_diff = "\n".join(patches_compressed)
+    if added_file_names:
+        added_list_str = ADDED_FILES_ + "\n".join(added_file_names)
+        final_diff = final_diff + "\n\n" + added_list_str
    if modified_file_names:
        modified_list_str = MORE_MODIFIED_FILES_ + "\n".join(modified_file_names)
        final_diff = final_diff + "\n\n" + modified_list_str
@ -122,7 +127,7 @@ def pr_generate_extended_diff(pr_languages: list,


 def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, model: str,
-                                convert_hunks_to_line_numbers: bool) -> Tuple[list, list, list]:
+                                convert_hunks_to_line_numbers: bool) -> Tuple[list, list, list, list]:
    """
    Generate a compressed diff string for a pull request, using diff minimization techniques to reduce the number of
    tokens used.
@ -148,6 +153,7 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo
    """

    patches = []
+    added_files_list = []
    modified_files_list = []
    deleted_files_list = []
    # sort each one of the languages in top_langs by the number of tokens in the diff
@ -165,7 +171,7 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo

        # removing delete-only hunks
        patch = handle_patch_deletions(patch, original_file_content_str,
-                                       new_file_content_str, file.filename)
+                                       new_file_content_str, file.filename, file.edit_type)
        if patch is None:
            if not deleted_files_list:
                total_tokens += token_handler.count_tokens(DELETED_FILES_)
@ -179,21 +185,26 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo
        new_patch_tokens = token_handler.count_tokens(patch)

        # Hard Stop, no more tokens
-        if total_tokens > MAX_TOKENS[model] - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD:
+        if total_tokens > get_max_tokens(model) - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD:
            get_logger().warning(f"File was fully skipped, no more tokens: {file.filename}.")
            continue

        # If the patch is too large, just show the file name
-        if total_tokens + new_patch_tokens > MAX_TOKENS[model] - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD:
+        if total_tokens + new_patch_tokens > get_max_tokens(model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD:
            # Current logic is to skip the patch if it's too large
            # TODO: Option for alternative logic to remove hunks from the patch to reduce the number of tokens
            #  until we meet the requirements
            if get_settings().config.verbosity_level >= 2:
                get_logger().warning(f"Patch too large, minimizing it, {file.filename}")
-            if not modified_files_list:
-                total_tokens += token_handler.count_tokens(MORE_MODIFIED_FILES_)
-            modified_files_list.append(file.filename)
-            total_tokens += token_handler.count_tokens(file.filename) + 1
+            if file.edit_type == EDIT_TYPE.ADDED:
+                if not added_files_list:
+                    total_tokens += token_handler.count_tokens(ADDED_FILES_)
+                added_files_list.append(file.filename)
+            else:
+                if not modified_files_list:
+                    total_tokens += token_handler.count_tokens(MORE_MODIFIED_FILES_)
+                modified_files_list.append(file.filename)
+                total_tokens += token_handler.count_tokens(file.filename) + 1
            continue

        if patch:
@ -206,7 +217,7 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo
            if get_settings().config.verbosity_level >= 2:
                get_logger().info(f"Tokens: {total_tokens}, last filename: {file.filename}")

-    return patches, modified_files_list, deleted_files_list
+    return patches, modified_files_list, deleted_files_list, added_files_list


 async def retry_with_fallback_models(f: Callable):
@ -397,13 +408,13 @@ def get_pr_multi_diffs(git_provider: GitProvider,
            continue

        # Remove delete-only hunks
-        patch = handle_patch_deletions(patch, original_file_content_str, new_file_content_str, file.filename)
+        patch = handle_patch_deletions(patch, original_file_content_str, new_file_content_str, file.filename, file.edit_type)
        if patch is None:
            continue

        patch = convert_to_hunks_with_lines_numbers(patch, file)
        new_patch_tokens = token_handler.count_tokens(patch)
-        if patch and (total_tokens + new_patch_tokens > MAX_TOKENS[model] - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD):
+        if patch and (total_tokens + new_patch_tokens > get_max_tokens(model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD):
            final_diff = "\n".join(patches)
            final_diff_list.append(final_diff)
            patches = []
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@ -9,6 +9,8 @@ from typing import Any, List

 import yaml
 from starlette_context import context
+
+from pr_agent.algo import MAX_TOKENS
 from pr_agent.config_loader import get_settings, global_settings
 from pr_agent.log import get_logger

@ -285,6 +287,7 @@ def _fix_key_value(key: str, value: str):


 def load_yaml(review_text: str) -> dict:
+    test = 1
    review_text = review_text.removeprefix('```yaml').rstrip('`')
    try:
        data = yaml.safe_load(review_text)
@ -295,6 +298,21 @@ def load_yaml(review_text: str) -> dict:

 def try_fix_yaml(review_text: str) -> dict:
    review_text_lines = review_text.split('\n')
+
+    # first fallback - try to convert 'relevant line: ...' to relevant line: |-\n        ...'
+    review_text_lines_copy = review_text_lines.copy()
+    for i in range(0, len(review_text_lines_copy)):
+        if 'relevant line:' in review_text_lines_copy[i] and not '|-' in review_text_lines_copy[i]:
+            review_text_lines_copy[i] = review_text_lines_copy[i].replace('relevant line: ',
+                                                    'relevant line: |-\n        ')
+    try:
+        data = yaml.load('\n'.join(review_text_lines_copy), Loader=yaml.SafeLoader)
+        get_logger().info(f"Successfully parsed AI prediction after adding |-\n        to relevant line")
+        return data
+    except:
+        get_logger().debug(f"Failed to parse AI prediction after adding |-\n        to relevant line")
+
+    # second fallback - try to remove last lines
    data = {}
    for i in range(1, len(review_text_lines)):
        review_text_lines_tmp = '\n'.join(review_text_lines[:-i])
@ -326,18 +344,33 @@ def set_custom_labels(variables):
    variables["custom_labels_examples"] = f"      - {list(labels.keys())[0]}"


-def get_user_labels(current_labels):
-    ## Only keep labels that has been added by the user
-    if current_labels is None:
-        current_labels = []
-    user_labels = []
-    for label in current_labels:
-        if label in ['Bug fix', 'Tests', 'Refactoring', 'Enhancement', 'Documentation', 'Other']:
-            continue
-        if get_settings().config.enable_custom_labels:
-            if label in get_settings().custom_labels:
+def get_user_labels(current_labels: List[str] = None):
+    """
+    Only keep labels that has been added by the user
+    """
+    try:
+        if current_labels is None:
+            current_labels = []
+        user_labels = []
+        for label in current_labels:
+            if label.lower() in ['bug fix', 'tests', 'refactoring', 'enhancement', 'documentation', 'other']:
                continue
-        user_labels.append(label)
-    if user_labels:
-        get_logger().info(f"Keeping user labels: {user_labels}")
+            if get_settings().config.enable_custom_labels:
+                if label in get_settings().custom_labels:
+                    continue
+            user_labels.append(label)
+        if user_labels:
+            get_logger().info(f"Keeping user labels: {user_labels}")
+    except Exception as e:
+        get_logger().exception(f"Failed to get user labels: {e}")
+        return current_labels
    return user_labels
+
+
+def get_max_tokens(model):
+    settings = get_settings()
+    max_tokens_model = MAX_TOKENS[model]
+    if settings.config.max_model_tokens:
+        max_tokens_model = min(settings.config.max_model_tokens, max_tokens_model)
+        # get_logger().debug(f"limiting max tokens to {max_tokens_model}")
+    return max_tokens_model
--- a/pr_agent/cli.py
+++ b/pr_agent/cli.py
@ -8,6 +8,8 @@ from pr_agent.log import setup_logger

 setup_logger()

+
+
 def run(inargs=None):
    parser = argparse.ArgumentParser(description='AI based pull request analyzer', usage=
 """\
@ -51,9 +53,9 @@ For example: 'python cli.py --pr_url=... review --pr_reviewer.extra_instructions
    command = args.command.lower()
    get_settings().set("CONFIG.CLI_MODE", True)
    if args.issue_url:
-        result = asyncio.run(PRAgent().handle_request(args.issue_url, command + " " + " ".join(args.rest)))
+        result = asyncio.run(PRAgent().handle_request(args.issue_url, [command] + args.rest))
    else:
-        result = asyncio.run(PRAgent().handle_request(args.pr_url, command + " " + " ".join(args.rest)))
+        result = asyncio.run(PRAgent().handle_request(args.pr_url, [command] + args.rest))
    if not result:
        parser.print_help()

--- a/pr_agent/git_providers/bitbucket_provider.py
+++ b/pr_agent/git_providers/bitbucket_provider.py
@ -9,7 +9,7 @@ from starlette_context import context
 from ..algo.pr_processing import find_line_number_of_relevant_line_in_file
 from ..config_loader import get_settings
 from ..log import get_logger
-from .git_provider import FilePatchInfo, GitProvider
+from .git_provider import FilePatchInfo, GitProvider, EDIT_TYPE


 class BitbucketProvider(GitProvider):
@ -132,17 +132,44 @@ class BitbucketProvider(GitProvider):
                diff.old.get_data("links")
            )
            new_file_content_str = self._get_pr_file_content(diff.new.get_data("links"))
-            diff_files.append(
-                FilePatchInfo(
-                    original_file_content_str,
-                    new_file_content_str,
-                    diff_split[index],
-                    diff.new.path,
-                )
+            file_patch_canonic_structure = FilePatchInfo(
+                original_file_content_str,
+                new_file_content_str,
+                diff_split[index],
+                diff.new.path,
            )
+
+            if diff.data['status'] == 'added':
+                file_patch_canonic_structure.edit_type = EDIT_TYPE.ADDED
+            elif diff.data['status'] == 'removed':
+                file_patch_canonic_structure.edit_type = EDIT_TYPE.DELETED
+            elif diff.data['status'] == 'modified':
+                file_patch_canonic_structure.edit_type = EDIT_TYPE.MODIFIED
+            elif diff.data['status'] == 'renamed':
+                file_patch_canonic_structure.edit_type = EDIT_TYPE.RENAMED
+            diff_files.append(file_patch_canonic_structure)
+
+
        self.diff_files = diff_files
        return diff_files

+    def publish_persistent_comment(self, pr_comment: str, initial_text: str, updated_text: str):
+        try:
+            for comment in self.pr.comments():
+                body = comment.raw
+                if initial_text in body:
+                    if updated_text:
+                        pr_comment_updated = pr_comment.replace(initial_text, updated_text)
+                    else:
+                        pr_comment_updated = pr_comment
+                    d = {"content": {"raw": pr_comment_updated}}
+                    response = comment._update_data(comment.put(None, data=d))
+                    return
+        except Exception as e:
+            get_logger().exception(f"Failed to update persistent review, error: {e}")
+            pass
+        self.publish_comment(pr_comment)
+
    def publish_comment(self, pr_comment: str, is_temporary: bool = False):
        comment = self.pr.comment(pr_comment)
        if is_temporary:
@ -288,6 +315,11 @@ class BitbucketProvider(GitProvider):
            })

        response = requests.request("PUT", self.bitbucket_pull_request_api_url, headers=self.headers, data=payload)
+        try:
+            if response.status_code != 200:
+                get_logger().info(f"Failed to update description, error code: {response.status_code}")
+        except:
+            pass
        return response

    # bitbucket does not support labels
--- a/pr_agent/git_providers/git_provider.py
+++ b/pr_agent/git_providers/git_provider.py
@ -13,6 +13,7 @@ class EDIT_TYPE(Enum):
    DELETED = 2
    MODIFIED = 3
    RENAMED = 4
+    UNKNOWN = 5


@dataclass
@ -22,7 +23,7 @@ class FilePatchInfo:
    patch: str
    filename: str
    tokens: int = -1
-    edit_type: EDIT_TYPE = EDIT_TYPE.MODIFIED
+    edit_type: EDIT_TYPE = EDIT_TYPE.UNKNOWN
    old_filename: str = None


@ -43,6 +44,9 @@ class GitProvider(ABC):
    def publish_comment(self, pr_comment: str, is_temporary: bool = False):
        pass

+    def publish_persistent_comment(self, pr_comment: str, initial_text: str, updated_text: str):
+        self.publish_comment(pr_comment)
+
    @abstractmethod
    def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
        pass
@ -94,16 +98,16 @@ class GitProvider(ABC):
    def get_pr_description(self, *, full: bool = True) -> str:
        from pr_agent.config_loader import get_settings
        from pr_agent.algo.pr_processing import clip_tokens
-        max_tokens = get_settings().get("CONFIG.MAX_DESCRIPTION_TOKENS", None)
+        max_tokens_description = get_settings().get("CONFIG.MAX_DESCRIPTION_TOKENS", None)
        description = self.get_pr_description_full() if full else self.get_user_description()
-        if max_tokens:
-            return clip_tokens(description, max_tokens)
+        if max_tokens_description:
+            return clip_tokens(description, max_tokens_description)
        return description

    def get_user_description(self) -> str:
        description = (self.get_pr_description_full() or "").strip()
        # if the existing description wasn't generated by the pr-agent, just return it as-is
-        if not description.startswith("## PR Type"):
+        if not any(description.startswith(header) for header in ("## PR Type", "## PR Description")):
            return description
        # if the existing description was generated by the pr-agent, but it doesn't contain the user description,
        # return nothing (empty string) because it means there is no user description
@ -153,6 +157,8 @@ def get_main_pr_language(languages, files) -> str:
        # validate that the specific commit uses the main language
        extension_list = []
        for file in files:
+            if not file:
+                continue
            if isinstance(file, str):
                file = FilePatchInfo(base_file=None, head_file=None, patch=None, filename=file)
            extension_list.append(file.filename.rsplit('.')[-1])
--- a/pr_agent/git_providers/github_provider.py
+++ b/pr_agent/git_providers/github_provider.py
@ -13,7 +13,7 @@ from ..algo.utils import load_large_diff
 from ..config_loader import get_settings
 from ..log import get_logger
 from ..servers.utils import RateLimitExceeded
-from .git_provider import FilePatchInfo, GitProvider, IncrementalPR
+from .git_provider import FilePatchInfo, GitProvider, IncrementalPR, EDIT_TYPE


 class GithubProvider(GitProvider):
@ -129,7 +129,20 @@ class GithubProvider(GitProvider):
                    if not patch:
                        patch = load_large_diff(file.filename, new_file_content_str, original_file_content_str)

-                diff_files.append(FilePatchInfo(original_file_content_str, new_file_content_str, patch, file.filename))
+                if file.status == 'added':
+                    edit_type = EDIT_TYPE.ADDED
+                elif file.status == 'removed':
+                    edit_type = EDIT_TYPE.DELETED
+                elif file.status == 'renamed':
+                    edit_type = EDIT_TYPE.RENAMED
+                elif file.status == 'modified':
+                    edit_type = EDIT_TYPE.MODIFIED
+                else:
+                    get_logger().error(f"Unknown edit type: {file.status}")
+                    edit_type = EDIT_TYPE.UNKNOWN
+                file_patch_canonical_structure = FilePatchInfo(original_file_content_str, new_file_content_str, patch,
+                                                               file.filename, edit_type=edit_type)
+                diff_files.append(file_patch_canonical_structure)

            self.diff_files = diff_files
            return diff_files
@ -141,10 +154,24 @@ class GithubProvider(GitProvider):
    def publish_description(self, pr_title: str, pr_body: str):
        self.pr.edit(title=pr_title, body=pr_body)

+    def publish_persistent_comment(self, pr_comment: str, initial_text: str, updated_text: str):
+        prev_comments = list(self.pr.get_issue_comments())
+        for comment in prev_comments:
+            body = comment.body
+            if body.startswith(initial_text):
+                if updated_text:
+                    pr_comment_updated = pr_comment.replace(initial_text, updated_text)
+                else:
+                    pr_comment_updated = pr_comment
+                response = comment.edit(pr_comment_updated)
+                return
+        self.publish_comment(pr_comment)
+
    def publish_comment(self, pr_comment: str, is_temporary: bool = False):
        if is_temporary and not get_settings().config.publish_output_progress:
            get_logger().debug(f"Skipping publish_comment for temporary comment: {pr_comment}")
            return
+
        response = self.pr.create_issue_comment(pr_comment)
        if hasattr(response, "user") and hasattr(response.user, "login"):
            self.github_user_id = response.user.login
--- a/pr_agent/git_providers/gitlab_provider.py
+++ b/pr_agent/git_providers/gitlab_provider.py
@ -136,6 +136,21 @@ class GitLabProvider(GitProvider):
        except Exception as e:
            get_logger().exception(f"Could not update merge request {self.id_mr} description: {e}")

+    def publish_persistent_comment(self, pr_comment: str, initial_text: str, updated_text: str):
+        try:
+            for comment in self.mr.notes.list(get_all=True)[::-1]:
+                if comment.body.startswith(initial_text):
+                    if updated_text:
+                        pr_comment_updated = pr_comment.replace(initial_text, updated_text)
+                    else:
+                        pr_comment_updated = pr_comment
+                    response = self.mr.notes.update(comment.id, {'body': pr_comment_updated})
+                    return
+        except Exception as e:
+            get_logger().exception(f"Failed to update persistent review, error: {e}")
+            pass
+        self.publish_comment(pr_comment)
+
    def publish_comment(self, mr_comment: str, is_temporary: bool = False):
        comment = self.mr.notes.create({'body': mr_comment})
        if is_temporary:
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@ -1,15 +1,16 @@
 [config]
-model="gpt-4"
+model="gpt-4" # "gpt-4-1106-preview"
 fallback_models=["gpt-3.5-turbo-16k"]
 git_provider="github"
 publish_output=true
 publish_output_progress=true
-verbosity_level=0 # 0,1,2
+verbosity_level=2 # 0,1,2
 use_extra_bad_extensions=false
 use_repo_settings_file=true
 ai_timeout=180
 max_description_tokens = 500
 max_commits_tokens = 500
+max_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities.
 patch_extra_lines = 3
 secret_provider="google_cloud_storage"
 cli_mode=false
@ -25,6 +26,7 @@ inline_code_comments = false
 ask_and_reflect=false
 automatic_review=true
 remove_previous_review_comment=false
+persistent_comment=true
 extra_instructions = ""
 # specific configurations for incremental review (/review -i)
 require_all_thresholds_for_incremental_review=false
--- a/pr_agent/settings/pr_custom_labels.toml
+++ b/pr_agent/settings/pr_custom_labels.toml
@ -16,7 +16,7 @@ You must use the following YAML schema to format your answer:
 PR Type:
  type: array
 {%- if enable_custom_labels %}
-  description: One or more labels that describe the PR type. Don't output the description in the parentheses.
+  description: Labels that are applicable to the Pull Request. Don't output the description in the parentheses. If none of the labels is relevant to the PR, output an empty array.
 {%- endif %}
  items:
    type: string
--- a/pr_agent/settings/pr_description_prompts.toml
+++ b/pr_agent/settings/pr_description_prompts.toml
@ -30,7 +30,7 @@ PR Type:
 {%- if enable_custom_labels %}
 PR Labels:
  type: array
-  description: One or more labels that describe the PR labels. Don't output the description in the parentheses.
+  description: Labels that are applicable to the Pull Request. Don't output the description in the parentheses. If none of the labels is relevant to the PR, output an empty array.
  items:
    type: string
    enum:
--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@ -152,7 +152,8 @@ PR Analysis:
  Focused PR: no, because ...
 {%- endif %}
 {%- if require_estimate_effort_to_review %}
-  Estimated effort to review [1-5]: 3, because ...
+  Estimated effort to review [1-5]: |-
+    3, because ...
 {%- endif %}
 PR Feedback:
  General PR suggestions: |-
--- a/pr_agent/tools/pr_reviewer.py
+++ b/pr_agent/tools/pr_reviewer.py
@ -117,7 +117,15 @@ class PRReviewer:
            if get_settings().config.publish_output:
                get_logger().info('Pushing PR review...')
                previous_review_comment = self._get_previous_review_comment()
-                self.git_provider.publish_comment(pr_comment)
+
+                # publish the review
+                if get_settings().pr_reviewer.persistent_comment and not self.incremental.is_incremental:
+                    self.git_provider.publish_persistent_comment(pr_comment,
+                                                                 initial_text="## PR Analysis",
+                                                                 updated_text="## PR Analysis (updated)")
+                else:
+                    self.git_provider.publish_comment(pr_comment)
+
                self.git_provider.remove_initial_comment()
                if previous_review_comment:
                    self._remove_previous_review_comment(previous_review_comment)
@ -156,7 +164,6 @@ class PRReviewer:
        variables["diff"] = self.patches_diff  # update diff

        environment = Environment(undefined=StrictUndefined)
-        # set_custom_labels(variables)
        system_prompt = environment.from_string(get_settings().pr_review_prompt.system).render(variables)
        user_prompt = environment.from_string(get_settings().pr_review_prompt.user).render(variables)

--- a/pr_agent/tools/pr_similar_issue.py
+++ b/pr_agent/tools/pr_similar_issue.py
@ -8,8 +8,8 @@ import pinecone
 from pinecone_datasets import Dataset, DatasetMetadata
 from pydantic import BaseModel, Field

-from pr_agent.algo import MAX_TOKENS
 from pr_agent.algo.token_handler import TokenHandler
+from pr_agent.algo.utils import get_max_tokens
 from pr_agent.config_loader import get_settings
 from pr_agent.git_providers import get_git_provider
 from pr_agent.log import get_logger
@ -197,7 +197,7 @@ class PRSimilarIssue:
            username = issue.user.login
            created_at = str(issue.created_at)
            if len(issue_str) < 8000 or \
-                    self.token_handler.count_tokens(issue_str) < MAX_TOKENS[MODEL]:  # fast reject first
+                    self.token_handler.count_tokens(issue_str) < get_max_tokens(MODEL):  # fast reject first
                issue_record = Record(
                    id=issue_key + "." + "issue",
                    text=issue_str,
--- a/tests/unittest/try_fix_yaml.py
+++ b/tests/unittest/try_fix_yaml.py
@ -0,0 +1,31 @@
+
+# Generated by CodiumAI
+from pr_agent.algo.utils import try_fix_yaml
+
+
+import pytest
+
+class TestTryFixYaml:
+
+    # The function successfully parses a valid YAML string.
+    def test_valid_yaml(self):
+        review_text = "key: value\n"
+        expected_output = {"key": "value"}
+        assert try_fix_yaml(review_text) == expected_output
+
+    # The function adds '|-' to 'relevant line:' if it is not already present and successfully parses the YAML string.
+    def test_add_relevant_line(self):
+        review_text = "relevant line: value: 3\n"
+        expected_output = {"relevant line": "value: 3"}
+        assert try_fix_yaml(review_text) == expected_output
+
+    # The function removes the last line(s) of the YAML string and successfully parses the YAML string.
+    def test_remove_last_line(self):
+        review_text = "key: value\nextra invalid line\n"
+        expected_output = {"key": "value"}
+        assert try_fix_yaml(review_text) == expected_output
+
+    # The YAML string is empty.
+    def test_empty_yaml_fixed(self):
+        review_text = ""
+        assert try_fix_yaml(review_text) is None
Author	SHA1	Message	Date
Hussam.lawen	5fb8bc1927	test	2023-11-20 00:42:29 +02:00
Hussam.lawen	e9315c7d28	Merge commit 'e878bbbe369c90433c1b261b5479d23c47734539' into hl/test_docstring	2023-11-15 10:37:31 +02:00
mrT23	e878bbbe36	Merge pull request #449 from zmeir/patch-1 Fix `get_user_description` in case `pr_description.enable_pr_type=false`	2023-11-14 22:09:59 -08:00
Hussam.lawen	67b61d722d	test	2023-11-14 20:31:01 +02:00
Zohar Meir	7d89b82967	Fix `get_user_description` in case `pr_description.enable_pr_type=false` Fixes an issue when getting the user description after a PR-Agent description was already generated, in case the configuration setting `pr_description.enable_pr_type` was `false`.	2023-11-13 14:41:14 +02:00
Hussam Lawen	c5f9bbbf92	Merge pull request #448 from Codium-ai/hl/optional_custom_labels remove the "one or more" for custom labels	2023-11-13 13:51:18 +02:00
Hussam.lawen	a5e5a82952	s	2023-11-13 13:49:16 +02:00
Hussam.lawen	ccbb62b50a	remove the "one or more" for custom labels	2023-11-13 13:47:06 +02:00
Ori Kotek	1df36c6a44	Merge pull request #446 from Codium-ai/tr/fix_cli_args Handling CLI Arguments with Quotes in pr_agent	2023-11-12 17:29:38 +02:00
Ori Kotek	9e5e9afe92	Refactor CLI argument handling and request processing	2023-11-12 16:11:34 +02:00
mrT23	5e43c202dd	s1	2023-11-12 15:45:22 +02:00
mrT23	37e6608e68	Merge pull request #444 from Codium-ai/tr/fallback_yaml Implementing Fallback Mechanisms for YAML Parsing	2023-11-12 00:43:15 -08:00
mrT23	f64d5f1e2a	tests	2023-11-12 08:36:57 +02:00
mrT23	8fdf174dec	fallback	2023-11-10 18:44:19 +02:00
mrT23	29d4f98b19	Merge pull request #441 from Codium-ai/tr/presistent_review Add Persistent Review Feature to PR Agent	2023-11-09 05:26:51 -08:00
mrT23	737792d83c	publish_persistent_comment	2023-11-09 15:24:55 +02:00
mrT23	7e5889061c	publish_persistent_comment	2023-11-09 15:20:31 +02:00
mrT23	755e04cf65	bitbucket finally works	2023-11-08 20:41:55 +02:00
mrT23	44d6c95714	response	2023-11-08 20:38:18 +02:00
mrT23	14610d5375	persistent s	2023-11-08 20:16:08 +02:00
mrT23	f9c832d6cb	Merge pull request #439 from Codium-ai/tr/fixes_added_files Enhancement of Patch Handling and PR Processing	2023-11-08 04:48:07 -08:00
mrT23	c2bec614e5	s	2023-11-08 14:46:11 +02:00
mrT23	49725e92f2	s	2023-11-08 14:41:15 +02:00
mrT23	a1e32d8331	s	2023-11-08 14:36:59 +02:00
mrT23	0293412a42	s	2023-11-08 14:31:08 +02:00
mrT23	10ec0a1812	s	2023-11-08 14:21:03 +02:00
mrT23	69b68b78f5	s	2023-11-08 14:17:59 +02:00
mrT23	c5bc4b44ff	fix added files	2023-11-08 12:51:30 +02:00
mrT23	39e5102a2e	fix added files	2023-11-08 12:47:18 +02:00
mrT23	6c82bc9a3e	Merge pull request #437 from Codium-ai/tr/new_gpt4 Introduce support for 'gpt-4-1106-preview' model and dynamic token limit calculation	2023-11-07 04:49:50 -08:00
mrT23	54f41dd603	code	2023-11-07 14:41:15 +02:00
mrT23	094f641fb5	code	2023-11-07 14:38:37 +02:00
mrT23	a35a75eb34	get_max_tokens + added 'gpt-4-1106-preview'	2023-11-07 14:28:41 +02:00
mrT23	5a7c118b56	Merge pull request #434 from Codium-ai/document_describe Update DESCRIBE.md	2023-11-06 11:00:06 -08:00
Hussam Lawen	cf9e0fbbc5	Update DESCRIBE.md	2023-11-06 17:55:58 +02:00
Hussam Lawen	ef9af261ed	Merge pull request #433 from Codium-ai/hl/user_labels Keep user labels	2023-11-06 15:17:19 +02:00
Hussam.lawen	ff79776410	Keep user labels	2023-11-06 15:14:08 +02:00
Hussam.lawen	ec3f2fb485	Revert "generate labels keep user labels only" This reverts commit `94a2a5e527`.	2023-11-06 15:08:29 +02:00