From e3274af83126d776c85ac10a8ee8a281153613a8 Mon Sep 17 00:00:00 2001
From: Ori Kotek <ori.k@codium.ai>
Date: Wed, 9 Aug 2023 10:17:58 +0300
Subject: [PATCH 01/21] A (still) hacky way to clip description and commit
 messages

---
 pr_agent/algo/pr_processing.py | 22 ++++++++++++++++++++++
 pr_agent/tools/pr_reviewer.py  |  4 +++-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py
index 8b319446..be3a461b 100644
--- a/pr_agent/algo/pr_processing.py
+++ b/pr_agent/algo/pr_processing.py
@@ -284,3 +284,25 @@ def find_line_number_of_relevant_line_in_file(diff_files: List[FilePatchInfo],
                         absolute_position = start2 + delta - 1
                         break
     return position, absolute_position
+
+
+def clip_tokens(text: str, max_tokens: int) -> str:
+    """
+    Clip the number of tokens in a string to a maximum number of tokens.
+
+    Args:
+        text (str): The string to clip.
+        max_tokens (int): The maximum number of tokens allowed in the string.
+
+    Returns:
+        str: The clipped string.
+    """
+    # We'll estimate the number of tokens by hueristically assuming 2.5 tokens per word
+    words = re.finditer(r'\S+', text)
+    max_words = max_tokens // 2.5
+    end_pos = None
+    for i, token in enumerate(words):
+        if i == max_words:
+            end_pos = token.start()
+            break
+    return text if end_pos is None else text[:end_pos]
\ No newline at end of file
diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py
index 982f5000..982f18cc 100644
--- a/pr_agent/tools/pr_reviewer.py
+++ b/pr_agent/tools/pr_reviewer.py
@@ -8,7 +8,7 @@ from jinja2 import Environment, StrictUndefined
 
 from pr_agent.algo.ai_handler import AiHandler
 from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models, \
-    find_line_number_of_relevant_line_in_file
+    find_line_number_of_relevant_line_in_file, clip_tokens
 from pr_agent.algo.token_handler import TokenHandler
 from pr_agent.algo.utils import convert_to_markdown, try_fix_json
 from pr_agent.config_loader import get_settings
@@ -62,6 +62,8 @@ class PRReviewer:
             "extra_instructions": get_settings().pr_reviewer.extra_instructions,
             "commit_messages_str": self.git_provider.get_commit_messages(),
         }
+        self.vars["description"] = clip_tokens(self.vars["description"], 500)
+        self.vars["commit_messages_str"] = clip_tokens(self.vars["commit_messages_str"], 500)
 
         self.token_handler = TokenHandler(
             self.git_provider.pr,

From fe75e3f2eceb2dc762fb06569a341088df912849 Mon Sep 17 00:00:00 2001
From: mrT23 <tal.r@codium.ai>
Date: Wed, 9 Aug 2023 08:50:15 +0300
Subject: [PATCH 02/21] yaml

yaml
---
 pr_agent/algo/utils.py                        |  25 ++-
 pr_agent/git_providers/github_provider.py     |   7 +-
 pr_agent/settings/pr_description_prompts.toml |  73 ++++---
 pr_agent/settings/pr_reviewer_prompts.toml    | 181 +++++++++---------
 pr_agent/tools/pr_description.py              |  22 ++-
 pr_agent/tools/pr_reviewer.py                 |  36 ++--
 tests/unittest/test_load_yaml.py              |  32 ++++
 7 files changed, 241 insertions(+), 135 deletions(-)
 create mode 100644 tests/unittest/test_load_yaml.py

diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py
index 264575bb..595ac6a9 100644
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@@ -8,8 +8,8 @@ import textwrap
 from datetime import datetime
 from typing import Any, List
 
+import yaml
 from starlette_context import context
-
 from pr_agent.config_loader import get_settings, global_settings
 
 
@@ -258,3 +258,26 @@ def update_settings_from_args(args: List[str]) -> List[str]:
             else:
                 other_args.append(arg)
     return other_args
+
+
+def load_yaml(review_text: str) -> dict:
+    review_text = review_text.lstrip('```yaml').rstrip('`')
+    try:
+        data = yaml.load(review_text, Loader=yaml.SafeLoader)
+    except Exception as e:
+        logging.error(f"Failed to parse AI prediction: {e}")
+        data = try_fix_yaml(review_text)
+    return data
+
+def try_fix_yaml(review_text: str) -> dict:
+    review_text_lines = review_text.split('\n')
+    data = {}
+    for i in range(1, len(review_text_lines)):
+        review_text_lines_tmp = '\n'.join(review_text_lines[:-i])
+        try:
+            data = yaml.load(review_text_lines_tmp, Loader=yaml.SafeLoader)
+            logging.info(f"Successfully parsed AI prediction after removing {i} lines")
+            break
+        except:
+            pass
+    return data
diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py
index f83216ef..38a0ba44 100644
--- a/pr_agent/git_providers/github_provider.py
+++ b/pr_agent/git_providers/github_provider.py
@@ -392,10 +392,13 @@ class GithubProvider(GitProvider):
 
     def generate_link_to_relevant_line_number(self, suggestion) -> str:
         try:
-            relevant_file = suggestion['relevant file']
+            relevant_file = suggestion['relevant file'].strip('`').strip("'")
             relevant_line_str = suggestion['relevant line']
+            if not relevant_line_str:
+                return ""
+
             position, absolute_position = find_line_number_of_relevant_line_in_file \
-                (self.diff_files, relevant_file.strip('`'), relevant_line_str)
+                (self.diff_files, relevant_file, relevant_line_str)
 
             if absolute_position != -1:
                 # # link to right file only
diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml
index 95a12681..1fab0bdc 100644
--- a/pr_agent/settings/pr_description_prompts.toml
+++ b/pr_agent/settings/pr_description_prompts.toml
@@ -2,6 +2,7 @@
 system="""You are CodiumAI-PR-Reviewer, a language model designed to review git pull requests.
 Your task is to provide full description of the PR content.
 - Make sure not to focus the new PR code (the '+' lines).
+- Notice that the 'Previous title', 'Previous description' and 'Commit messages' sections may be partial, simplistic, non-informative or not up-to-date. Hence, compare them to the PR diff code, and use them only as a reference.
 
 {%- if extra_instructions %}
 
@@ -9,31 +10,57 @@ Extra instructions from the user:
 {{ extra_instructions }}
 {% endif %}
 
-You must use the following JSON schema to format your answer:
-```json
-{
-  "PR Title": {
-      "type": "string",
-      "description": "an informative title for the PR, describing its main theme"
-  },
-  "PR Type": {
-      "type": "string",
-      "description": possible values are: ["Bug fix", "Tests", "Bug fix with tests", "Refactoring", "Enhancement", "Documentation", "Other"]
-    },
-  "PR Description": {
-      "type": "string",
-      "description": "an informative and concise description of the PR"
-  },
-  "PR Main Files Walkthrough": {
-      "type": "string",
-      "description": "a walkthrough of the PR changes. Review main files, in bullet points, and shortly describe the changes in each file (up to 10 most important files). Format: -`filename`: description of changes\n..."
-  }
-}
+You must use the following YAML schema to format your answer:
+```yaml
+PR Title:
+  type: string
+  description: an informative title for the PR, describing its main theme
+PR Type:
+  type: array
+  items:
+    type: string
+    enum:
+      - Bug fix
+      - Tests
+      - Bug fix with tests
+      - Refactoring
+      - Enhancement
+      - Documentation
+      - Other
+PR Description:
+  type: string
+  description: an informative and concise description of the PR
+PR Main Files Walkthrough:
+  type: array
+  maxItems: 10
+  description: >-
+    a walkthrough of the PR changes. Review main files, and shortly describe the changes in each file (up to 10 most important files).
+  items:
+    filename:
+      type: string
+      description: the relevant file full path
+    changes in file:
+      type: string
+      description: minimal and concise description of the changes in the relevant file
 
-Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields.
+
+Example output:
+```yaml
+PR Title: ...
+PR Type:
+  - Bug fix
+PR Description: ...
+PR Main Files Walkthrough:
+  - ...
+  - ...
+```
+
+Make sure to output a valid YAML. Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields.
 """
 
 user="""PR Info:
+Previous title: '{{title}}'
+Previous description: '{{description}}'
 Branch: '{{branch}}'
 {%- if language %}
 
@@ -52,6 +79,6 @@ The PR Git Diff:
 ```
 Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines.
 
-Response (should be a valid JSON, and nothing else):
-```json
+Response (should be a valid YAML, and nothing else):
+```yaml
 """
diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml
index 613f1360..a4ae0451 100644
--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@@ -14,118 +14,119 @@ Extra instructions from the user:
 {{ extra_instructions }}
 {% endif %}
 
-You must use the following JSON schema to format your answer:
-```json
-{
-  "PR Analysis": {
-    "Main theme": {
-      "type": "string",
-      "description": "a short explanation of the PR"
-    },
-    "Type of PR": {
-      "type": "string",
-      "enum": ["Bug fix", "Tests", "Refactoring", "Enhancement", "Documentation", "Other"]
-    },
+You must use the following YAML schema to format your answer:
+```yaml
+PR Analysis:
+  Main theme:
+    type: string
+    description: a short explanation of the PR
+  Type of PR:
+    type: string
+    enum:
+      - Bug fix
+      - Tests
+      - Refactoring
+      - Enhancement
+      - Documentation
+      - Other
 {%- if require_score %}
-    "Score": {
-      "type": "int",
-      "description": "Rate this PR on a scale of 0-100 (inclusive), where 0 means the worst possible PR code, and 100 means PR code of the highest quality, without any bugs or performance issues, that is ready to be merged immediately and run in production at scale."
-    },
+  Score:
+    type: int
+    description: >-
+      Rate this PR on a scale of 0-100 (inclusive), where 0 means the worst
+      possible PR code, and 100 means PR code of the highest quality, without
+      any bugs or performance issues, that is ready to be merged immediately and
+      run in production at scale.
 {%- endif %}
 {%- if require_tests %}
-    "Relevant tests added": {
-      "type": "string",
-      "description": "yes\\no question: does this PR have relevant tests ?"
-    },
+  Relevant tests added:
+    type: string
+    description: yes\\no question: does this PR have relevant tests ?
 {%- endif %}
 {%- if question_str %}
-    "Insights from user's answer": {
-      "type": "string",
-      "description": "shortly summarize the insights you gained from the user's answers to the questions"
-    },
+  Insights from user's answer:
+    type: string
+    description: >-
+      shortly summarize the insights you gained from the user's answers to the questions
 {%- endif %}
 {%- if require_focused %}
-    "Focused PR": {
-      "type": "string",
-      "description": "Is this a focused PR, in the sense that all the PR code diff changes are united under a single focused theme ? If the theme is too broad, or the PR code diff changes are too scattered, then the PR is not focused. Explain your answer shortly."
-    }
-  },
+  Focused PR:
+    type: string
+    description: >-
+      Is this a focused PR, in the sense that all the PR code diff changes are
+      united under a single focused theme ? If the theme is too broad, or the PR
+      code diff changes are too scattered, then the PR is not focused. Explain
+      your answer shortly.
 {%- endif %}
-  "PR Feedback": {
-    "General suggestions": {
-      "type": "string",
-      "description": "General suggestions and feedback for the contributors and maintainers of this PR. May include important suggestions for the overall structure, primary purpose, best practices, critical bugs, and other aspects of the PR. Don't address PR title and description, or lack of tests. Explain your suggestions."
-    },
+PR Feedback:
+  General suggestions:
+    type: string
+    description: >-
+      General suggestions and feedback for the contributors and maintainers of
+      this PR. May include important suggestions for the overall structure,
+      primary purpose, best practices, critical bugs, and other aspects of the
+      PR. Don't address PR title and description, or lack of tests. Explain your
+      suggestions.
 {%- if num_code_suggestions > 0 %}
-    "Code feedback": {
-      "type": "array",
-      "maxItems": {{ num_code_suggestions }},
-      "uniqueItems": true,
-      "items": {
-        "relevant file": {
-          "type": "string",
-          "description": "the relevant file full path"
-        },
-        "suggestion": {
-          "type": "string",
-          "description": "a concrete suggestion for meaningfully improving the new PR code. Also describe how, specifically, the suggestion can be applied to new PR code. Add tags with importance measure that matches each suggestion ('important' or 'medium'). Do not make suggestions for updating or adding docstrings, renaming PR title and description, or linter like.
-        },
-        "relevant line": {
-          "type": "string",
-          "description": "a single code line taken from the relevant file, to which the suggestion applies. The line should be a '+' line. Make sure to output the line exactly as it appears in the relevant file"
-        }
-      }
-    },
+  Code feedback:
+    type: array
+    maxItems: {{ num_code_suggestions }}
+    uniqueItems: true
+    items:
+      relevant file:
+        type: string
+        description: the relevant file full path
+      suggestion:
+        type: string
+        description: >-
+          a concrete suggestion for meaningfully improving the new PR code. Also
+          describe how, specifically, the suggestion can be applied to new PR
+          code. Add tags with importance measure that matches each suggestion
+          ('important' or 'medium'). Do not make suggestions for updating or
+          adding docstrings, renaming PR title and description, or linter like.
+      relevant line:
+        type: string
+        description: >-
+          a single code line taken from the relevant file, to which the
+          suggestion applies. The line should be a '+' line. Make sure to output
+          the line exactly as it appears in the relevant file
 {%- endif %}
 {%- if require_security %}
-    "Security concerns": {
-      "type": "string",
-      "description": "yes\\no question: does this PR code introduce possible security concerns or issues, like SQL injection, XSS, CSRF, and others ? If answered 'yes', explain your answer shortly"
-       ? explain your answer shortly"
-    }
+  Security concerns:
+    type: string
+    description: >-
+      yes\\no question: does this PR code introduce possible security concerns or
+      issues, like SQL injection, XSS, CSRF, and others ? If answered 'yes',explain your answer shortly
 {%- endif %}
-  }
-}
 ```
 
 Example output:
-'
-{
-    "PR Analysis":
-    {
-        "Main theme": "xxx",
-        "Type of PR": "Bug fix",
+```yaml
+PR Analysis:
+  Main theme: xxx
+  Type of PR: Bug fix
 {%- if require_score %}
-        "Score": 89,
-{%- endif %}
-{%- if require_tests %}
-        "Relevant tests added": "No",
+  Score: 89
 {%- endif %}
+  Relevant tests added: No
 {%- if require_focused %}
-        "Focused PR": "yes\\no, because ..."
+  Focused PR: no, because ...
 {%- endif %}
-    },
-    "PR Feedback":
-    {
-        "General PR suggestions": "..., `xxx`...",
+PR Feedback:
+  General PR suggestions: ...
 {%- if num_code_suggestions > 0 %}
-        "Code feedback": [
-            {
-                "relevant file": "directory/xxx.py",
-                "suggestion": "xxx [important]",
-                "relevant line": "xxx",
-            },
-            ...
-        ]
+  Code feedback:
+    - relevant file: directory/xxx.py
+      suggestion: xxx [important]
+      relevant line: 'xxx'
+    - ...
 {%- endif %}
 {%- if require_security %}
-        "Security concerns": "No, because ..."
+  Security concerns: No
 {%- endif %}
-    }
-}
-'
+```
 
-Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields.
+Make sure to output a valid YAML. Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields.
 """
 
 user="""PR Info:
@@ -158,6 +159,6 @@ The PR Git Diff:
 ```
 Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines.
 
-Response (should be a valid JSON, and nothing else):
-```json
+Response (should be a valid YAML, and nothing else):
+```yaml
 """
diff --git a/pr_agent/tools/pr_description.py b/pr_agent/tools/pr_description.py
index 7f39a8b8..d55dd55a 100644
--- a/pr_agent/tools/pr_description.py
+++ b/pr_agent/tools/pr_description.py
@@ -8,6 +8,7 @@ from jinja2 import Environment, StrictUndefined
 from pr_agent.algo.ai_handler import AiHandler
 from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
 from pr_agent.algo.token_handler import TokenHandler
+from pr_agent.algo.utils import load_yaml
 from pr_agent.config_loader import get_settings
 from pr_agent.git_providers import get_git_provider
 from pr_agent.git_providers.git_provider import get_main_pr_language
@@ -139,34 +140,45 @@ class PRDescription:
         - title: a string containing the PR title.
         - pr_body: a string containing the PR body in a markdown format.
         - pr_types: a list of strings containing the PR types.
-        - markdown_text: a string containing the AI prediction data in a markdown format.
+        - markdown_text: a string containing the AI prediction data in a markdown format. used for publishing a comment
         """
         # Load the AI prediction data into a dictionary
-        data = json.loads(self.prediction)
+        data = load_yaml(self.prediction.strip())
 
         # Initialization
-        markdown_text = pr_body = ""
         pr_types = []
 
         # Iterate over the dictionary items and append the key and value to 'markdown_text' in a markdown format
+        markdown_text = ""
         for key, value in data.items():
             markdown_text += f"## {key}\n\n"
             markdown_text += f"{value}\n\n"
 
         # If the 'PR Type' key is present in the dictionary, split its value by comma and assign it to 'pr_types'
         if 'PR Type' in data:
-            pr_types = data['PR Type'].split(',')
+            if type(data['PR Type']) == list:
+                pr_types = data['PR Type']
+            elif type(data['PR Type']) == str:
+                pr_types = data['PR Type'].split(',')
 
         # Assign the value of the 'PR Title' key to 'title' variable and remove it from the dictionary
         title = data.pop('PR Title')
 
         # Iterate over the remaining dictionary items and append the key and value to 'pr_body' in a markdown format,
         # except for the items containing the word 'walkthrough'
+        pr_body = ""
         for key, value in data.items():
             pr_body += f"## {key}:\n"
             if 'walkthrough' in key.lower():
-                pr_body += f"{value}\n"
+                # for filename, description in value.items():
+                for file in value:
+                    filename = file['filename'].replace("'", "`")
+                    description = file['changes in file']
+                    pr_body += f'`{filename}`: {description}\n'
             else:
+                # if the value is a list, join its items by comma
+                if type(value) == list:
+                    value = ', '.join(v for v in value)
                 pr_body += f"{value}\n\n___\n"
 
         if get_settings().config.verbosity_level >= 2:
diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py
index 982f5000..afbc3bae 100644
--- a/pr_agent/tools/pr_reviewer.py
+++ b/pr_agent/tools/pr_reviewer.py
@@ -4,13 +4,15 @@ import logging
 from collections import OrderedDict
 from typing import List, Tuple
 
+import yaml
 from jinja2 import Environment, StrictUndefined
+from yaml import SafeLoader
 
 from pr_agent.algo.ai_handler import AiHandler
 from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models, \
     find_line_number_of_relevant_line_in_file
 from pr_agent.algo.token_handler import TokenHandler
-from pr_agent.algo.utils import convert_to_markdown, try_fix_json
+from pr_agent.algo.utils import convert_to_markdown, try_fix_json, try_fix_yaml, load_yaml
 from pr_agent.config_loader import get_settings
 from pr_agent.git_providers import get_git_provider
 from pr_agent.git_providers.git_provider import IncrementalPR, get_main_pr_language
@@ -160,19 +162,17 @@ class PRReviewer:
         Prepare the PR review by processing the AI prediction and generating a markdown-formatted text that summarizes
         the feedback.
         """
-        review = self.prediction.strip()
-
-        try:
-            data = json.loads(review)
-        except json.decoder.JSONDecodeError:
-            data = try_fix_json(review)
+        data = load_yaml(self.prediction.strip())
 
         # Move 'Security concerns' key to 'PR Analysis' section for better display
         pr_feedback = data.get('PR Feedback', {})
         security_concerns = pr_feedback.get('Security concerns')
-        if security_concerns:
+        if security_concerns is not None:
             del pr_feedback['Security concerns']
-            data.setdefault('PR Analysis', {})['Security concerns'] = security_concerns
+            if type(security_concerns) == bool and security_concerns == False:
+                data.setdefault('PR Analysis', {})['Security concerns'] = 'No security concerns found'
+            else:
+                data.setdefault('PR Analysis', {})['Security concerns'] = security_concerns
 
         #
         if 'Code feedback' in pr_feedback:
@@ -183,6 +183,12 @@ class PRReviewer:
                 del pr_feedback['Code feedback']
             else:
                 for suggestion in code_feedback:
+                    if ('relevant file' in suggestion) and (not suggestion['relevant file'].startswith('``')):
+                        suggestion['relevant file'] = f"``{suggestion['relevant file']}``"
+
+                    if 'relevant line' not in suggestion:
+                        suggestion['relevant line'] = ''
+
                     relevant_line_str = suggestion['relevant line'].split('\n')[0]
 
                     # removing '+'
@@ -219,7 +225,7 @@ class PRReviewer:
             logging.info(f"Markdown response:\n{markdown_text}")
 
         if markdown_text == None or len(markdown_text) == 0:
-            markdown_text = review
+            markdown_text = ""
 
         return markdown_text
 
@@ -230,11 +236,13 @@ class PRReviewer:
         if get_settings().pr_reviewer.num_code_suggestions == 0:
             return
 
-        review = self.prediction.strip()
+        review_text = self.prediction.strip()
+        review_text = review_text.lstrip('```yaml').rstrip('`')
         try:
-            data = json.loads(review)
-        except json.decoder.JSONDecodeError:
-            data = try_fix_json(review)
+            data = yaml.load(review_text, Loader=SafeLoader)
+        except Exception as e:
+            logging.error(f"Failed to parse AI prediction: {e}")
+            data = try_fix_yaml(review_text)
 
         comments: List[str] = []
         for suggestion in data.get('PR Feedback', {}).get('Code feedback', []):
diff --git a/tests/unittest/test_load_yaml.py b/tests/unittest/test_load_yaml.py
new file mode 100644
index 00000000..a345aee2
--- /dev/null
+++ b/tests/unittest/test_load_yaml.py
@@ -0,0 +1,32 @@
+
+# Generated by CodiumAI
+
+import pytest
+from pr_agent.algo.utils import load_yaml
+
+
+class TestLoadYaml:
+    #  Tests that load_yaml loads a valid YAML string
+    def test_load_valid_yaml(self):
+        yaml_str = 'name: John Smith\nage: 35'
+        expected_output = {'name': 'John Smith', 'age': 35}
+        assert load_yaml(yaml_str) == expected_output
+
+    def test_load_complicated_yaml(self):
+        yaml_str = \
+'''\
+PR Analysis:
+  Main theme: Enhancing the `/describe` command prompt by adding title and description
+  Type of PR: Enhancement
+  Relevant tests added: No
+  Focused PR: Yes, the PR is focused on enhancing the `/describe` command prompt.
+
+PR Feedback:
+  General suggestions: The PR seems to be well-structured and focused on a specific enhancement. However, it would be beneficial to add tests to ensure the new feature works as expected.
+  Code feedback:
+    - relevant file: pr_agent/settings/pr_description_prompts.toml
+      suggestion: Consider using a more descriptive variable name than 'user' for the command prompt. A more descriptive name would make the code more readable and maintainable. [medium]
+      relevant line: 'user="""PR Info:'
+  Security concerns: No'''
+        expected_output = {'PR Analysis': {'Main theme': 'Enhancing the `/describe` command prompt by adding title and description', 'Type of PR': 'Enhancement', 'Relevant tests added': False, 'Focused PR': 'Yes, the PR is focused on enhancing the `/describe` command prompt.'}, 'PR Feedback': {'General suggestions': 'The PR seems to be well-structured and focused on a specific enhancement. However, it would be beneficial to add tests to ensure the new feature works as expected.', 'Code feedback': [{'relevant file': 'pr_agent/settings/pr_description_prompts.toml', 'suggestion': "Consider using a more descriptive variable name than 'user' for the command prompt. A more descriptive name would make the code more readable and maintainable. [medium]", 'relevant line': 'user="""PR Info:'}], 'Security concerns': False}}
+        assert load_yaml(yaml_str) == expected_output

From e0f295659dbc33c681dfd95859f149b3c5aae854 Mon Sep 17 00:00:00 2001
From: Ori Kotek <ori.k@codium.ai>
Date: Wed, 9 Aug 2023 12:17:54 +0300
Subject: [PATCH 03/21] A less hacky way

---
 pr_agent/algo/pr_processing.py               | 19 ++++++++++---------
 pr_agent/algo/token_handler.py               |  6 +++++-
 pr_agent/git_providers/bitbucket_provider.py |  4 ++++
 pr_agent/git_providers/git_provider.py       |  4 ++++
 pr_agent/git_providers/github_provider.py    | 12 +++++++++---
 pr_agent/git_providers/gitlab_provider.py    | 11 +++++++++--
 pr_agent/settings/configuration.toml         |  2 ++
 pr_agent/tools/pr_reviewer.py                |  2 --
 8 files changed, 43 insertions(+), 17 deletions(-)

diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py
index be3a461b..b195f9f4 100644
--- a/pr_agent/algo/pr_processing.py
+++ b/pr_agent/algo/pr_processing.py
@@ -11,7 +11,7 @@ from github import RateLimitExceededException
 from pr_agent.algo import MAX_TOKENS
 from pr_agent.algo.git_patch_processing import convert_to_hunks_with_lines_numbers, extend_patch, handle_patch_deletions
 from pr_agent.algo.language_handler import sort_files_by_main_languages
-from pr_agent.algo.token_handler import TokenHandler
+from pr_agent.algo.token_handler import TokenHandler, get_token_encoder
 from pr_agent.config_loader import get_settings
 from pr_agent.git_providers.git_provider import FilePatchInfo, GitProvider
 
@@ -298,11 +298,12 @@ def clip_tokens(text: str, max_tokens: int) -> str:
         str: The clipped string.
     """
     # We'll estimate the number of tokens by hueristically assuming 2.5 tokens per word
-    words = re.finditer(r'\S+', text)
-    max_words = max_tokens // 2.5
-    end_pos = None
-    for i, token in enumerate(words):
-        if i == max_words:
-            end_pos = token.start()
-            break
-    return text if end_pos is None else text[:end_pos]
\ No newline at end of file
+    encoder = get_token_encoder()
+    num_input_tokens = len(encoder.encode(text))
+    if num_input_tokens <= max_tokens:
+        return text
+    num_chars = len(text)
+    chars_per_token = num_chars / num_input_tokens
+    num_output_chars = int(chars_per_token * max_tokens)
+    clipped_text = text[:num_output_chars]
+    return clipped_text
diff --git a/pr_agent/algo/token_handler.py b/pr_agent/algo/token_handler.py
index 3686f521..f018a92b 100644
--- a/pr_agent/algo/token_handler.py
+++ b/pr_agent/algo/token_handler.py
@@ -4,6 +4,10 @@ from tiktoken import encoding_for_model, get_encoding
 from pr_agent.config_loader import get_settings
 
 
+def get_token_encoder():
+    return encoding_for_model(get_settings().config.model) if "gpt" in get_settings().config.model else get_encoding(
+        "cl100k_base")
+
 class TokenHandler:
     """
     A class for handling tokens in the context of a pull request.
@@ -27,7 +31,7 @@ class TokenHandler:
         - system: The system string.
         - user: The user string.
         """
-        self.encoder = encoding_for_model(get_settings().config.model) if "gpt" in get_settings().config.model else get_encoding("cl100k_base")
+        self.encoder = get_token_encoder()
         self.prompt_tokens = self._get_system_user_tokens(pr, self.encoder, vars, system, user)
 
     def _get_system_user_tokens(self, pr, encoder, vars: dict, system, user):
diff --git a/pr_agent/git_providers/bitbucket_provider.py b/pr_agent/git_providers/bitbucket_provider.py
index 122b0db3..07b92295 100644
--- a/pr_agent/git_providers/bitbucket_provider.py
+++ b/pr_agent/git_providers/bitbucket_provider.py
@@ -5,6 +5,7 @@ from urllib.parse import urlparse
 import requests
 from atlassian.bitbucket import Cloud
 
+from ..algo.pr_processing import clip_tokens
 from ..config_loader import get_settings
 from .git_provider import FilePatchInfo
 
@@ -81,6 +82,9 @@ class BitbucketProvider:
         return self.pr.source_branch
 
     def get_pr_description(self):
+        max_tokens = get_settings().get("CONFIG.MAX_DESCRIPTION_TOKENS", None)
+        if max_tokens:
+            return clip_tokens(self.pr.description, max_tokens)
         return self.pr.description
 
     def get_user_id(self):
diff --git a/pr_agent/git_providers/git_provider.py b/pr_agent/git_providers/git_provider.py
index 8e161252..2a891938 100644
--- a/pr_agent/git_providers/git_provider.py
+++ b/pr_agent/git_providers/git_provider.py
@@ -97,6 +97,10 @@ class GitProvider(ABC):
     def remove_reaction(self, issue_comment_id: int, reaction_id: int) -> bool:
         pass
 
+    @abstractmethod
+    def get_commit_messages(self):
+        pass
+
 def get_main_pr_language(languages, files) -> str:
     """
     Get the main language of the commit. Return an empty string if cannot determine.
diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py
index bc5cc6a7..dbad5388 100644
--- a/pr_agent/git_providers/github_provider.py
+++ b/pr_agent/git_providers/github_provider.py
@@ -12,7 +12,7 @@ from starlette_context import context
 from .git_provider import FilePatchInfo, GitProvider, IncrementalPR
 from ..algo.language_handler import is_valid_file
 from ..algo.utils import load_large_diff
-from ..algo.pr_processing import find_line_number_of_relevant_line_in_file
+from ..algo.pr_processing import find_line_number_of_relevant_line_in_file, clip_tokens
 from ..config_loader import get_settings
 from ..servers.utils import RateLimitExceeded
 
@@ -234,6 +234,9 @@ class GithubProvider(GitProvider):
         return self.pr.head.ref
 
     def get_pr_description(self):
+        max_tokens = get_settings().get("CONFIG.MAX_DESCRIPTION_TOKENS", None)
+        if max_tokens:
+            return clip_tokens(self.pr.body, max_tokens)
         return self.pr.body
 
     def get_user_id(self):
@@ -375,19 +378,22 @@ class GithubProvider(GitProvider):
             logging.exception(f"Failed to get labels, error: {e}")
             return []
 
-    def get_commit_messages(self) -> str:
+    def get_commit_messages(self):
         """
         Retrieves the commit messages of a pull request.
 
         Returns:
             str: A string containing the commit messages of the pull request.
         """
+        max_tokens = get_settings().get("CONFIG.MAX_COMMITS_TOKENS", None)
         try:
             commit_list = self.pr.get_commits()
             commit_messages = [commit.commit.message for commit in commit_list]
             commit_messages_str = "\n".join([f"{i + 1}. {message}" for i, message in enumerate(commit_messages)])
-        except:
+        except Exception:
             commit_messages_str = ""
+        if max_tokens:
+            commit_messages_str = clip_tokens(commit_messages_str, max_tokens)
         return commit_messages_str
 
     def generate_link_to_relevant_line_number(self, suggestion) -> str:
diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py
index a4d2d127..73a3a2f9 100644
--- a/pr_agent/git_providers/gitlab_provider.py
+++ b/pr_agent/git_providers/gitlab_provider.py
@@ -7,6 +7,7 @@ import gitlab
 from gitlab import GitlabGetError
 
 from ..algo.language_handler import is_valid_file
+from ..algo.pr_processing import clip_tokens
 from ..algo.utils import load_large_diff
 from ..config_loader import get_settings
 from .git_provider import EDIT_TYPE, FilePatchInfo, GitProvider
@@ -275,6 +276,9 @@ class GitLabProvider(GitProvider):
         return self.mr.source_branch
 
     def get_pr_description(self):
+        max_tokens = get_settings().get("CONFIG.MAX_DESCRIPTION_TOKENS", None)
+        if max_tokens:
+            return clip_tokens(self.mr.description, max_tokens)
         return self.mr.description
 
     def get_issue_comments(self):
@@ -338,16 +342,19 @@ class GitLabProvider(GitProvider):
     def get_labels(self):
         return self.mr.labels
 
-    def get_commit_messages(self) -> str:
+    def get_commit_messages(self):
         """
         Retrieves the commit messages of a pull request.
 
         Returns:
             str: A string containing the commit messages of the pull request.
         """
+        max_tokens = get_settings().get("CONFIG.MAX_COMMITS_TOKENS", None)
         try:
             commit_messages_list = [commit['message'] for commit in self.mr.commits()._list]
             commit_messages_str = "\n".join([f"{i + 1}. {message}" for i, message in enumerate(commit_messages_list)])
-        except:
+        except Exception:
             commit_messages_str = ""
+        if max_tokens:
+            commit_messages_str = clip_tokens(commit_messages_str, max_tokens)
         return commit_messages_str
\ No newline at end of file
diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml
index 8334049d..0c502df9 100644
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@@ -8,6 +8,8 @@ verbosity_level=0 # 0,1,2
 use_extra_bad_extensions=false
 use_repo_settings_file=true
 ai_timeout=180
+max_description_tokens = 500
+max_commits_tokens = 500
 
 [pr_reviewer] # /review #
 require_focused_review=true
diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py
index 982f18cc..f679851b 100644
--- a/pr_agent/tools/pr_reviewer.py
+++ b/pr_agent/tools/pr_reviewer.py
@@ -62,8 +62,6 @@ class PRReviewer:
             "extra_instructions": get_settings().pr_reviewer.extra_instructions,
             "commit_messages_str": self.git_provider.get_commit_messages(),
         }
-        self.vars["description"] = clip_tokens(self.vars["description"], 500)
-        self.vars["commit_messages_str"] = clip_tokens(self.vars["commit_messages_str"], 500)
 
         self.token_handler = TokenHandler(
             self.git_provider.pr,

From 64481e2d84a98dc928ed12f6539ce1a5bbd0482b Mon Sep 17 00:00:00 2001
From: mrT23 <tal.r@codium.ai>
Date: Wed, 9 Aug 2023 14:01:48 +0300
Subject: [PATCH 04/21] block scalar

---
 pr_agent/settings/pr_reviewer_prompts.toml | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml
index a4ae0451..e649bf2d 100644
--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@@ -116,17 +116,20 @@ PR Feedback:
   General PR suggestions: ...
 {%- if num_code_suggestions > 0 %}
   Code feedback:
-    - relevant file: directory/xxx.py
+    - relevant file: |-
+        directory/xxx.py
       suggestion: xxx [important]
-      relevant line: 'xxx'
-    - ...
+      relevant line: |-
+        xxx
+    ...
 {%- endif %}
 {%- if require_security %}
   Security concerns: No
 {%- endif %}
 ```
 
-Make sure to output a valid YAML. Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields.
+Make sure to output a valid YAML. Use multi-line block scalar ('|') if needed.
+Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields.
 """
 
 user="""PR Info:

From 1b0b90e51d5d875c63bb0b99a7dcb067758b6cce Mon Sep 17 00:00:00 2001
From: mrT23 <tal.r@codium.ai>
Date: Wed, 9 Aug 2023 14:11:58 +0300
Subject: [PATCH 05/21] block scalar

---
 pr_agent/settings/pr_reviewer_prompts.toml | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml
index e649bf2d..8f7f03b3 100644
--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@@ -78,18 +78,20 @@ PR Feedback:
         description: the relevant file full path
       suggestion:
         type: string
-        description: >-
+        description: |
           a concrete suggestion for meaningfully improving the new PR code. Also
           describe how, specifically, the suggestion can be applied to new PR
           code. Add tags with importance measure that matches each suggestion
           ('important' or 'medium'). Do not make suggestions for updating or
           adding docstrings, renaming PR title and description, or linter like.
+          YAML output should be in block scalar format ('|')
       relevant line:
         type: string
-        description: >-
-          a single code line taken from the relevant file, to which the
-          suggestion applies. The line should be a '+' line. Make sure to output
-          the line exactly as it appears in the relevant file
+        description: |
+          a single code line taken from the relevant file, to which the suggestion applies.
+          The line should be a '+' line.
+          Make sure to output the line exactly as it appears in the relevant file
+          YAML output should be in block scalar format ('|')
 {%- endif %}
 {%- if require_security %}
   Security concerns:

From f1f271fa00293ac75cbbf7a3aa8dc067482e8b6a Mon Sep 17 00:00:00 2001
From: Ori Kotek <ori.k@codium.ai>
Date: Thu, 10 Aug 2023 00:44:00 +0300
Subject: [PATCH 06/21] PyYAML dependency

---
 pyproject.toml   | 3 ++-
 requirements.txt | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 4ca0c0b6..2e8f2b5c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,7 +42,8 @@ dependencies = [
   "atlassian-python-api==3.39.0",
   "GitPython~=3.1.32",
   "starlette-context==0.3.6",
-  "litellm~=0.1.351"
+  "litellm~=0.1.351",
+  "PyYAML==6.0"
 ]
 
 [project.urls]
diff --git a/requirements.txt b/requirements.txt
index 07a33514..0d2c6239 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,4 +11,5 @@ pytest~=7.4.0
 aiohttp~=3.8.4
 atlassian-python-api==3.39.0
 GitPython~=3.1.32
-litellm~=0.1.351
\ No newline at end of file
+litellm~=0.1.351
+PyYAML==6.0

From e00500b90cd3a09a453a7dfc62c902562dfd9eb4 Mon Sep 17 00:00:00 2001
From: Ori Kotek <ori.k@codium.ai>
Date: Thu, 10 Aug 2023 00:56:28 +0300
Subject: [PATCH 07/21] PyYAML dependency

---
 requirements.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index 0d2c6239..ebea2b71 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,3 +13,5 @@ atlassian-python-api==3.39.0
 GitPython~=3.1.32
 litellm~=0.1.351
 PyYAML==6.0
+starlette-context==0.3.6
+litellm~=0.1.351
\ No newline at end of file

From 725ac9e85dedff726e509e253949f01bbf6aa4fc Mon Sep 17 00:00:00 2001
From: Itamar Friedman <itamar.f@codium.ai>
Date: Thu, 10 Aug 2023 01:30:12 +0300
Subject: [PATCH 08/21] fixing cli pr_url help description

---
 pr_agent/cli.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/pr_agent/cli.py b/pr_agent/cli.py
index 8dd21b3f..0f871041 100644
--- a/pr_agent/cli.py
+++ b/pr_agent/cli.py
@@ -10,13 +10,13 @@ from pr_agent.config_loader import get_settings
 def run(inargs=None):
     parser = argparse.ArgumentParser(description='AI based pull request analyzer', usage=
 """\
-Usage: cli.py --pr-url <URL on supported git hosting service> <command> [<args>].
+Usage: cli.py --pr-url=<URL on supported git hosting service> <command> [<args>].
 For example:
-- cli.py --pr-url=... review
-- cli.py --pr-url=... describe
-- cli.py --pr-url=... improve
-- cli.py --pr-url=... ask "write me a poem about this PR"
-- cli.py --pr-url=... reflect
+- cli.py --pr_url=... review
+- cli.py --pr_url=... describe
+- cli.py --pr_url=... improve
+- cli.py --pr_url=... ask "write me a poem about this PR"
+- cli.py --pr_url=... reflect
 
 Supported commands:
 review / review_pr - Add a review that includes a summary of the PR and specific suggestions for improvement.
@@ -27,7 +27,7 @@ reflect - Ask the PR author questions about the PR.
 update_changelog - Update the changelog based on the PR's contents.
 
 To edit any configuration parameter from 'configuration.toml', just add -config_path=<value>.
-For example: '- cli.py --pr-url=... review --pr_reviewer.extra_instructions="focus on the file: ..."'
+For example: 'python cli.py --pr_url=... review --pr_reviewer.extra_instructions="focus on the file: ..."'
 """)
     parser.add_argument('--pr_url', type=str, help='The URL of the PR to review', required=True)
     parser.add_argument('command', type=str, help='The', choices=commands, default='review')

From 3e09b9ac370e3dd6aa772d4a517db16f91fc24b0 Mon Sep 17 00:00:00 2001
From: Itamar Friedman <itamar.f@codium.ai>
Date: Thu, 10 Aug 2023 01:31:06 +0300
Subject: [PATCH 09/21] fixing pr_url param description (was wrongly mentioned
 as `pr-url`)

---
 README.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index b8ab88c9..0ec00ec2 100644
--- a/README.md
+++ b/README.md
@@ -97,12 +97,12 @@ CodiumAI `PR-Agent` is an open-source tool aiming to help developers review pull
 |       | Incremental PR Review |   :white_check_mark:    |      |         |
 
 Examples for invoking the different tools via the CLI:
-- **Review**:       python cli.py --pr-url=<pr_url>  review
-- **Describe**:     python cli.py --pr-url=<pr_url>  describe
-- **Improve**:      python cli.py --pr-url=<pr_url>  improve
-- **Ask**:          python cli.py --pr-url=<pr_url>  ask "Write me a poem about this PR"
-- **Reflect**:      python cli.py --pr-url=<pr_url>  reflect
-- **Update Changelog**:      python cli.py --pr-url=<pr_url>  update_changelog
+- **Review**:       python cli.py --pr_url=<pr_url>  review
+- **Describe**:     python cli.py --pr_url=<pr_url>  describe
+- **Improve**:      python cli.py --pr_url=<pr_url>  improve
+- **Ask**:          python cli.py --pr_url=<pr_url>  ask "Write me a poem about this PR"
+- **Reflect**:      python cli.py --pr_url=<pr_url>  reflect
+- **Update Changelog**:      python cli.py --pr_url=<pr_url>  update_changelog
 
 "<pr_url>" is the url of the relevant PR (for example: https://github.com/Codium-ai/pr-agent/pull/50).
 

From 0270306d3ca65e2a218f6eabe55686902b7e59fc Mon Sep 17 00:00:00 2001
From: Itamar Friedman <itamar.f@codium.ai>
Date: Thu, 10 Aug 2023 01:34:24 +0300
Subject: [PATCH 10/21] litellm was mentioned twice in the requirements.txt

---
 requirements.txt | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index ebea2b71..ea2cacdf 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,5 +13,4 @@ atlassian-python-api==3.39.0
 GitPython~=3.1.32
 litellm~=0.1.351
 PyYAML==6.0
-starlette-context==0.3.6
-litellm~=0.1.351
\ No newline at end of file
+starlette-context==0.3.6
\ No newline at end of file

From b206b1c5ff42ed77943b72d0758f89d050348027 Mon Sep 17 00:00:00 2001
From: Ori Kotek <ori.k@codium.ai>
Date: Thu, 10 Aug 2023 02:08:36 +0300
Subject: [PATCH 11/21] Protect for empty description

---
 pr_agent/algo/pr_processing.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py
index b195f9f4..3a08a86d 100644
--- a/pr_agent/algo/pr_processing.py
+++ b/pr_agent/algo/pr_processing.py
@@ -298,12 +298,16 @@ def clip_tokens(text: str, max_tokens: int) -> str:
         str: The clipped string.
     """
     # We'll estimate the number of tokens by hueristically assuming 2.5 tokens per word
-    encoder = get_token_encoder()
-    num_input_tokens = len(encoder.encode(text))
-    if num_input_tokens <= max_tokens:
-        return text
-    num_chars = len(text)
-    chars_per_token = num_chars / num_input_tokens
-    num_output_chars = int(chars_per_token * max_tokens)
-    clipped_text = text[:num_output_chars]
-    return clipped_text
+    try:
+        encoder = get_token_encoder()
+        num_input_tokens = len(encoder.encode(text))
+        if num_input_tokens <= max_tokens:
+            return text
+        num_chars = len(text)
+        chars_per_token = num_chars / num_input_tokens
+        num_output_chars = int(chars_per_token * max_tokens)
+        clipped_text = text[:num_output_chars]
+        return clipped_text
+    except Exception as e:
+        logging.warning(f"Failed to clip tokens: {e}")
+        return text
\ No newline at end of file

From e24c5e3501aeaa01a0b78269421522853b9d1e2e Mon Sep 17 00:00:00 2001
From: Ori Kotek <orikotek@gmail.com>
Date: Thu, 10 Aug 2023 02:16:16 +0300
Subject: [PATCH 12/21] Update requirements.txt

---
 requirements.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index ea2cacdf..ebea2b71 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,4 +13,5 @@ atlassian-python-api==3.39.0
 GitPython~=3.1.32
 litellm~=0.1.351
 PyYAML==6.0
-starlette-context==0.3.6
\ No newline at end of file
+starlette-context==0.3.6
+litellm~=0.1.351
\ No newline at end of file

From 3e542cd88bd8f965eea48614203feafe578afb2a Mon Sep 17 00:00:00 2001
From: Itamar Friedman <itamar.f@codium.ai>
Date: Thu, 10 Aug 2023 08:10:10 +0300
Subject: [PATCH 13/21] adding `permissions` to `review.yaml`, also adding some
 comments

---
 .github/workflows/review.yaml | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/.github/workflows/review.yaml b/.github/workflows/review.yaml
index e7612520..9dcf59b8 100644
--- a/.github/workflows/review.yaml
+++ b/.github/workflows/review.yaml
@@ -1,6 +1,17 @@
+# This workflow enables developers to call PR-Agents `/[actions]` in PR's comments and upon PR creation. 
+# Learn more at https://www.codium.ai/pr-agent/
+# This is v0.2 of this workflow file
+
+name: PR-Agent
+
 on:
   pull_request:
   issue_comment:
+
+permissions:
+  issues: write
+  pull-requests: write
+
 jobs:
   pr_agent_job:
     runs-on: ubuntu-latest

From 4a47b78a902079d7adcf2e033d4b73015cc79323 Mon Sep 17 00:00:00 2001
From: Ori Kotek <ori.k@codium.ai>
Date: Thu, 10 Aug 2023 15:16:03 +0300
Subject: [PATCH 14/21] Rename workflow

---
 .github/workflows/{review.yaml => pr-agent-review.yaml} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename .github/workflows/{review.yaml => pr-agent-review.yaml} (100%)

diff --git a/.github/workflows/review.yaml b/.github/workflows/pr-agent-review.yaml
similarity index 100%
rename from .github/workflows/review.yaml
rename to .github/workflows/pr-agent-review.yaml

From cf9c6a872d062317a33f1730c00657a10a5ef398 Mon Sep 17 00:00:00 2001
From: Ori Kotek <ori.k@codium.ai>
Date: Thu, 10 Aug 2023 16:09:29 +0300
Subject: [PATCH 15/21] Test github action

---
 .github/workflows/build-and-test.yaml | 34 +++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 .github/workflows/build-and-test.yaml

diff --git a/.github/workflows/build-and-test.yaml b/.github/workflows/build-and-test.yaml
new file mode 100644
index 00000000..f71be1be
--- /dev/null
+++ b/.github/workflows/build-and-test.yaml
@@ -0,0 +1,34 @@
+on:
+  push:
+
+jobs:
+  build-and-test:
+    runs-on: ubuntu-latest
+
+    steps:
+      - id: checkout
+        uses: actions/checkout@v2
+
+      - id: dockerx
+        name: Setup Docker Buildx
+        uses: docker/setup-buildx-action@v2
+
+      - id: build
+        name: Build dev docker
+        uses: docker/build-push-action@v2
+        with:
+          context: .
+          file: ./docker/Dockerfile
+          push: false
+          load: true
+          tags: codiumai/pr-agent:test
+          cache-from: type=gha,scope=dev
+          cache-to: type=gha,mode=max,scope=dev
+          target: test
+
+      - id: test
+        name: Test dev docker
+        run: |
+          docker run --rm codiumai/pr-agent:test pytest -v
+        
+          

From 2d6b94729287dc683f111d859eec70b2c3bbc550 Mon Sep 17 00:00:00 2001
From: Ori Kotek <ori.k@codium.ai>
Date: Thu, 10 Aug 2023 16:37:02 +0300
Subject: [PATCH 16/21] Test github action

---
 docker/Dockerfile | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 4a8b86d5..61ab74cf 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -4,17 +4,21 @@ WORKDIR /app
 ADD pyproject.toml .
 RUN pip install . && rm pyproject.toml
 ENV PYTHONPATH=/app
-ADD pr_agent pr_agent
 
 FROM base as github_app
+ADD pr_agent pr_agent
 CMD ["python", "pr_agent/servers/github_app.py"]
 
 FROM base as github_polling
+ADD pr_agent pr_agent
 CMD ["python", "pr_agent/servers/github_polling.py"]
 
 FROM base as test
 ADD requirements-dev.txt .
 RUN pip install -r requirements-dev.txt && rm requirements-dev.txt
+ADD pr_agent pr_agent
+ADD tests tests
 
 FROM base as cli
+ADD pr_agent pr_agent
 ENTRYPOINT ["python", "pr_agent/cli.py"]

From 20bbdac13584ca007287680d66f61fb3ae22b78c Mon Sep 17 00:00:00 2001
From: Ori Kotek <ori.k@codium.ai>
Date: Thu, 10 Aug 2023 16:41:50 +0300
Subject: [PATCH 17/21] Test github action

---
 .github/workflows/build-and-test.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/build-and-test.yaml b/.github/workflows/build-and-test.yaml
index f71be1be..960da61b 100644
--- a/.github/workflows/build-and-test.yaml
+++ b/.github/workflows/build-and-test.yaml
@@ -1,3 +1,5 @@
+name: Build-and-test
+
 on:
   push:
 

From 273a9e35d994d79878a4c808f9d6d59749860111 Mon Sep 17 00:00:00 2001
From: mrT23 <tal.r@codium.ai>
Date: Fri, 11 Aug 2023 18:35:34 +0300
Subject: [PATCH 18/21] block scalar

---
 pr_agent/algo/utils.py                        |  2 +-
 pr_agent/settings/pr_description_prompts.toml | 10 ++++++----
 pr_agent/tools/pr_reviewer.py                 |  2 +-
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py
index 595ac6a9..725d75ec 100644
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@@ -261,7 +261,7 @@ def update_settings_from_args(args: List[str]) -> List[str]:
 
 
 def load_yaml(review_text: str) -> dict:
-    review_text = review_text.lstrip('```yaml').rstrip('`')
+    review_text = review_text.removeprefix('```yaml').rstrip('`')
     try:
         data = yaml.load(review_text, Loader=yaml.SafeLoader)
     except Exception as e:
diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml
index 1fab0bdc..e9d4cbe7 100644
--- a/pr_agent/settings/pr_description_prompts.toml
+++ b/pr_agent/settings/pr_description_prompts.toml
@@ -3,7 +3,7 @@ system="""You are CodiumAI-PR-Reviewer, a language model designed to review git
 Your task is to provide full description of the PR content.
 - Make sure not to focus the new PR code (the '+' lines).
 - Notice that the 'Previous title', 'Previous description' and 'Commit messages' sections may be partial, simplistic, non-informative or not up-to-date. Hence, compare them to the PR diff code, and use them only as a reference.
-
+- YAML output should be in block scalar format ('|')
 {%- if extra_instructions %}
 
 Extra instructions from the user:
@@ -33,7 +33,7 @@ PR Description:
 PR Main Files Walkthrough:
   type: array
   maxItems: 10
-  description: >-
+  description: |-
     a walkthrough of the PR changes. Review main files, and shortly describe the changes in each file (up to 10 most important files).
   items:
     filename:
@@ -46,10 +46,12 @@ PR Main Files Walkthrough:
 
 Example output:
 ```yaml
-PR Title: ...
+PR Title: |-
+  ...
 PR Type:
   - Bug fix
-PR Description: ...
+PR Description: |-
+  ...
 PR Main Files Walkthrough:
   - ...
   - ...
diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py
index afbc3bae..7fdf6429 100644
--- a/pr_agent/tools/pr_reviewer.py
+++ b/pr_agent/tools/pr_reviewer.py
@@ -237,7 +237,7 @@ class PRReviewer:
             return
 
         review_text = self.prediction.strip()
-        review_text = review_text.lstrip('```yaml').rstrip('`')
+        review_text = review_text.removeprefix('```yaml').rstrip('`')
         try:
             data = yaml.load(review_text, Loader=SafeLoader)
         except Exception as e:

From abb754b16bd699b296267cea1fda079be9dedb3a Mon Sep 17 00:00:00 2001
From: mrT23 <tal.r@codium.ai>
Date: Fri, 11 Aug 2023 18:37:55 +0300
Subject: [PATCH 19/21] block scalar

---
 pr_agent/settings/pr_reviewer_prompts.toml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml
index 8f7f03b3..13485931 100644
--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@@ -7,6 +7,7 @@ Your task is to provide constructive and concise feedback for the PR, and also p
 - Suggestions should focus on improving the new added code lines.
 - Make sure not to provide suggestions repeating modifications already implemented in the new PR code (the '+' lines).
 {%- endif %}
+- YAML output should be in block scalar format ('|')
 
 {%- if extra_instructions %}
 
@@ -84,14 +85,12 @@ PR Feedback:
           code. Add tags with importance measure that matches each suggestion
           ('important' or 'medium'). Do not make suggestions for updating or
           adding docstrings, renaming PR title and description, or linter like.
-          YAML output should be in block scalar format ('|')
       relevant line:
         type: string
         description: |
           a single code line taken from the relevant file, to which the suggestion applies.
           The line should be a '+' line.
           Make sure to output the line exactly as it appears in the relevant file
-          YAML output should be in block scalar format ('|')
 {%- endif %}
 {%- if require_security %}
   Security concerns:

From dd87df49f5fc46f96ff678148f1eee3d44b5dcf3 Mon Sep 17 00:00:00 2001
From: mrT23 <tal.r@codium.ai>
Date: Fri, 11 Aug 2023 18:43:46 +0300
Subject: [PATCH 20/21] block scalar

---
 pr_agent/settings/pr_description_prompts.toml | 2 +-
 pr_agent/settings/pr_reviewer_prompts.toml    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml
index e9d4cbe7..43dd8e3b 100644
--- a/pr_agent/settings/pr_description_prompts.toml
+++ b/pr_agent/settings/pr_description_prompts.toml
@@ -3,7 +3,7 @@ system="""You are CodiumAI-PR-Reviewer, a language model designed to review git
 Your task is to provide full description of the PR content.
 - Make sure not to focus the new PR code (the '+' lines).
 - Notice that the 'Previous title', 'Previous description' and 'Commit messages' sections may be partial, simplistic, non-informative or not up-to-date. Hence, compare them to the PR diff code, and use them only as a reference.
-- YAML output should be in block scalar format ('|')
+-  If needed, each YAML output should be in block scalar format ('|-')
 {%- if extra_instructions %}
 
 Extra instructions from the user:
diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml
index 13485931..cdf7f731 100644
--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@@ -7,7 +7,7 @@ Your task is to provide constructive and concise feedback for the PR, and also p
 - Suggestions should focus on improving the new added code lines.
 - Make sure not to provide suggestions repeating modifications already implemented in the new PR code (the '+' lines).
 {%- endif %}
-- YAML output should be in block scalar format ('|')
+- If needed, each YAML output should be in block scalar format ('|-')
 
 {%- if extra_instructions %}
 

From d3a5aea89e3dc7fa841f67b5ae1b25860c799f33 Mon Sep 17 00:00:00 2001
From: mrT23 <tal.r@codium.ai>
Date: Fri, 11 Aug 2023 18:50:56 +0300
Subject: [PATCH 21/21] update_changelog

---
 pr_agent/servers/help.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pr_agent/servers/help.py b/pr_agent/servers/help.py
index 838645f5..1c8e1c3f 100644
--- a/pr_agent/servers/help.py
+++ b/pr_agent/servers/help.py
@@ -3,7 +3,8 @@ commands_text = "> **/review [-i]**: Request a review of your Pull Request. For
                 "> **/describe**: Modify the PR title and description based on the contents of the PR.\n" \
                 "> **/improve**: Suggest improvements to the code in the PR. \n" \
                 "> **/ask \\<QUESTION\\>**: Pose a question about the PR.\n\n" \
-                ">To edit any configuration parameter from 'configuration.toml', add --config_path=new_value\n" \
+                "> **/update_changelog**: Update the changelog based on the PR's contents.\n\n" \
+                ">To edit any configuration parameter from **configuration.toml**, add --config_path=new_value\n" \
                 ">For example: /review --pr_reviewer.extra_instructions=\"focus on the file: ...\" \n" \
                 ">To list the possible configuration parameters, use the **/config** command.\n" \