From 5971a06d739c9f70211c28c78290efa19cad150c Mon Sep 17 00:00:00 2001 From: mrT23 Date: Thu, 2 Jan 2025 11:16:21 +0200 Subject: [PATCH 1/6] docs: improve Ollama and Hugging Face model configuration docs --- docs/docs/usage-guide/changing_a_model.md | 30 ++++------------------- pr_agent/config_loader.py | 2 +- pr_agent/settings/configuration.toml | 2 +- 3 files changed, 7 insertions(+), 27 deletions(-) diff --git a/docs/docs/usage-guide/changing_a_model.md b/docs/docs/usage-guide/changing_a_model.md index c86af096..dc3efc9c 100644 --- a/docs/docs/usage-guide/changing_a_model.md +++ b/docs/docs/usage-guide/changing_a_model.md @@ -30,50 +30,30 @@ model="" # the OpenAI model you've deployed on Azure (e.g. gpt-4o) fallback_models=["..."] ``` -### Hugging Face +### Ollama **Local** You can run Hugging Face models locally through either [VLLM](https://docs.litellm.ai/docs/providers/vllm) or [Ollama](https://docs.litellm.ai/docs/providers/ollama) E.g. to use a new Hugging Face model locally via Ollama, set: ``` -[__init__.py] -MAX_TOKENS = { - "model-name-on-ollama": -} -e.g. -MAX_TOKENS={ - ..., - "ollama/llama2": 4096 -} - - [config] # in configuration.toml model = "ollama/llama2" fallback_models=["ollama/llama2"] +custom_model_max_tokens=... # set the maximal input tokens for the model [ollama] # in .secrets.toml -api_base = ... # the base url for your Hugging Face inference endpoint -# e.g. if running Ollama locally, you may use: -api_base = "http://localhost:11434/" +api_base = "http://localhost:11434" # or whatever port you're running Ollama on ``` -### Inference Endpoints +### Hugging Face Inference Endpoints To use a new model with Hugging Face Inference Endpoints, for example, set: ``` -[__init__.py] -MAX_TOKENS = { - "model-name-on-huggingface": -} -e.g. -MAX_TOKENS={ - ..., - "meta-llama/Llama-2-7b-chat-hf": 4096 -} [config] # in configuration.toml model = "huggingface/meta-llama/Llama-2-7b-chat-hf" fallback_models=["huggingface/meta-llama/Llama-2-7b-chat-hf"] +custom_model_max_tokens=... # set the maximal input tokens for the model [huggingface] # in .secrets.toml key = ... # your Hugging Face api key diff --git a/pr_agent/config_loader.py b/pr_agent/config_loader.py index b13a3ce7..9ae430ca 100644 --- a/pr_agent/config_loader.py +++ b/pr_agent/config_loader.py @@ -12,7 +12,6 @@ global_settings = Dynaconf( envvar_prefix=False, merge_enabled=True, settings_files=[join(current_dir, f) for f in [ - "settings/.secrets.toml", "settings/configuration.toml", "settings/ignore.toml", "settings/language_extensions.toml", @@ -29,6 +28,7 @@ global_settings = Dynaconf( "settings/pr_add_docs.toml", "settings/custom_labels.toml", "settings/pr_help_prompts.toml", + "settings/.secrets.toml", "settings_prod/.secrets.toml", ]] ) diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 3bc91099..90e10366 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -2,6 +2,7 @@ # models model="gpt-4o-2024-11-20" fallback_models=["gpt-4o-2024-08-06"] +custom_model_max_tokens=-1 # for models not in the default list #model_weak="gpt-4o-mini-2024-07-18" # optional, a weaker model to use for some easier tasks # CLI git_provider="github" @@ -21,7 +22,6 @@ skip_keys = [] max_description_tokens = 500 max_commits_tokens = 500 max_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities. -custom_model_max_tokens=-1 # for models not in the default list # patch extension logic patch_extension_skip_types =[".md",".txt"] allow_dynamic_context=true From 531804720234dd55c4cfba55bf3fed17c67ef6b3 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Thu, 2 Jan 2025 12:25:42 +0200 Subject: [PATCH 2/6] feat: add prompt example duplication option for improved model output --- docs/docs/usage-guide/changing_a_model.md | 22 ++- pr_agent/settings/configuration.toml | 1 + pr_agent/settings/pr_description_prompts.toml | 29 ++++ .../settings/pr_description_prompts_json.toml | 158 ++++++++++++++++++ pr_agent/tools/pr_description.py | 3 +- 5 files changed, 204 insertions(+), 9 deletions(-) create mode 100644 pr_agent/settings/pr_description_prompts_json.toml diff --git a/docs/docs/usage-guide/changing_a_model.md b/docs/docs/usage-guide/changing_a_model.md index dc3efc9c..3e470040 100644 --- a/docs/docs/usage-guide/changing_a_model.md +++ b/docs/docs/usage-guide/changing_a_model.md @@ -32,20 +32,26 @@ fallback_models=["..."] ### Ollama -**Local** -You can run Hugging Face models locally through either [VLLM](https://docs.litellm.ai/docs/providers/vllm) or [Ollama](https://docs.litellm.ai/docs/providers/ollama) +You can run models locally through either [VLLM](https://docs.litellm.ai/docs/providers/vllm) or [Ollama](https://docs.litellm.ai/docs/providers/ollama) -E.g. to use a new Hugging Face model locally via Ollama, set: +E.g. to use a new model locally via Ollama, set in `.secrets.toml` or in a configuration file: ``` -[config] # in configuration.toml -model = "ollama/llama2" -fallback_models=["ollama/llama2"] -custom_model_max_tokens=... # set the maximal input tokens for the model +[config] +model = "ollama/qwen2.5-coder:32b" +fallback_models=["ollama/qwen2.5-coder:32b"] +custom_model_max_tokens=128000 # set the maximal input tokens for the model +duplicate_examples=true # will duplicate the examples in the prompt, to help the model to output structured output -[ollama] # in .secrets.toml +[ollama] api_base = "http://localhost:11434" # or whatever port you're running Ollama on ``` +!!! note "Local models vs commercial models" + Qodo Merge is compatible with almost any AI model, but analyzing complex code repositories and pull requests requires a model specifically optimized for code analysis. + Commercial models such as GPT-4, Claude Sonnet, and Gemini have demonstrated robust capabilities in generating structured output for code analysis. In contrast, most open-source models currently available (as of January 2025) face challenges with these complex tasks. + Based on our testing, local open-source models are suitable for experimentation and learning purposes, but they may not be suitable for production-level code analysis tasks. + Hence, for production workflows and real-world code analysis, we recommend using commercial models. + ### Hugging Face Inference Endpoints To use a new model with Hugging Face Inference Endpoints, for example, set: diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 90e10366..c9fb86d9 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -34,6 +34,7 @@ ai_disclaimer_title="" # Pro feature, title for a collapsible disclaimer to AI ai_disclaimer="" # Pro feature, full text for the AI disclaimer output_relevant_configurations=false large_patch_policy = "clip" # "clip", "skip" +duplicate_prompt_examples = false # seed seed=-1 # set positive value to fix the seed (and ensure temperature=0) temperature=0.2 diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml index 0a15eee3..6fd17b89 100644 --- a/pr_agent/settings/pr_description_prompts.toml +++ b/pr_agent/settings/pr_description_prompts.toml @@ -130,6 +130,35 @@ The PR Git Diff: Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines. +{%- if duplicate_prompt_examples %} + + +Example output: +```yaml +type: +- Bug fix +- Refactoring +- ... +description: | + ... +title: | + ... +{%- if enable_semantic_files_types %} +pr_files: +- filename: | + ... +{%- if include_file_summary_changes %} + changes_summary: | + ... +{%- endif %} + changes_title: | + ... + label: | + label_key_1 +... +{%- endif %} +``` +{%- endif %} Response (should be a valid YAML, and nothing else): ```yaml diff --git a/pr_agent/settings/pr_description_prompts_json.toml b/pr_agent/settings/pr_description_prompts_json.toml new file mode 100644 index 00000000..9769c614 --- /dev/null +++ b/pr_agent/settings/pr_description_prompts_json.toml @@ -0,0 +1,158 @@ +[pr_description_prompt] +system="""You are PR-Reviewer, a language model designed to review a Git Pull Request (PR). +Your task is to provide a full description for the PR content - type, description, title and files walkthrough. +- Focus on the new PR code (lines starting with '+' in the 'PR Git Diff' section). +- Keep in mind that the 'Previous title', 'Previous description' and 'Commit messages' sections may be partial, simplistic, non-informative or out of date. Hence, compare them to the PR diff code, and use them only as a reference. +- The generated title and description should prioritize the most significant changes. +- When quoting variables, names or file paths from the code, use backticks (`) instead of single quote ('). + +{%- if extra_instructions %} + +Extra instructions from the user: +===== +{{extra_instructions}} +===== +{% endif %} + + +The output must be a JSON object equivalent to type $PRDescription, according to the following Pydantic definitions: +===== +class PRType(str, Enum): + bug_fix = "Bug fix" + tests = "Tests" + enhancement = "Enhancement" + documentation = "Documentation" + other = "Other" + +{%- if enable_custom_labels %} + +{{ custom_labels_class }} + +{%- endif %} + +{%- if enable_semantic_files_types %} + +class FileDescription(BaseModel): + filename: str = Field(description="The full file path of the relevant file") +{%- if include_file_summary_changes %} + changes_summary: str = Field(description="concise summary of the changes in the relevant file, in bullet points (1-4 bullet points).") +{%- endif %} + changes_title: str = Field(description="one-line summary (5-10 words) capturing the main theme of changes in the file") + label: str = Field(description="a single semantic label that represents a type of code changes that occurred in the File. Possible values (partial list): 'bug fix', 'tests', 'enhancement', 'documentation', 'error handling', 'configuration changes', 'dependencies', 'formatting', 'miscellaneous', ...") +{%- endif %} + +class PRDescription(BaseModel): + type: List[PRType] = Field(description="one or more types that describe the PR content. Return the label member value (e.g. 'Bug fix', not 'bug_fix')") + description: str = Field(description="summarize the PR changes in up to four bullet points, each up to 8 words. For large PRs, add sub-bullets if needed. Order bullets by importance, with each bullet highlighting a key change group.") + title: str = Field(description="a concise and descriptive title that captures the PR's main theme") +{%- if enable_semantic_files_types %} + pr_files: List[FileDescription] = Field(max_items=20, description="a list of all the files that were changed in the PR, and summary of their changes. Each file must be analyzed regardless of change size.") +{%- endif %} +===== + + +Example output: + +```json +{ + "type": [ + "...", + "..." + ], + "description": "...", + "title": "..." +{%- if enable_semantic_files_types %}, + "pr_files": [ + { + "filename": "...", +{%- if include_file_summary_changes %} + "changes_summary": "...", +{%- endif %} + "changes_title": "...", + "label": "label_key_1" + } + ] +{%- endif %} +} + + +Answer should be a valid JSON, and nothing else. +""" + +user=""" +{%- if related_tickets %} +Related Ticket Info: +{% for ticket in related_tickets %} +===== +Ticket Title: '{{ ticket.title }}' +{%- if ticket.labels %} +Ticket Labels: {{ ticket.labels }} +{%- endif %} +{%- if ticket.body %} +Ticket Description: +##### +{{ ticket.body }} +##### +{%- endif %} +===== +{% endfor %} +{%- endif %} + +PR Info: + +Previous title: '{{title}}' + +{%- if description %} + +Previous description: +===== +{{ description|trim }} +===== +{%- endif %} + +Branch: '{{branch}}' + +{%- if commit_messages_str %} + +Commit messages: +===== +{{ commit_messages_str|trim }} +===== +{%- endif %} + + +The PR Git Diff: +===== +{{ diff|trim }} +===== + +Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines. + + +Example output: +```json +{ + "type": [ + "...", + "..." + ], + "description": "...", + "title": "..." +{%- if enable_semantic_files_types %}, + "pr_files": [ + { + "filename": "...", +{%- if include_file_summary_changes %} + "changes_summary": "...", +{%- endif %} + "changes_title": "...", + "label": "label_key_1" + } + ] +{%- endif %} +} + + +Response (should be a valid JSON, and nothing else): +```json +""" \ No newline at end of file diff --git a/pr_agent/tools/pr_description.py b/pr_agent/tools/pr_description.py index 18df4f10..7744b699 100644 --- a/pr_agent/tools/pr_description.py +++ b/pr_agent/tools/pr_description.py @@ -71,7 +71,8 @@ class PRDescription: "custom_labels_class": "", # will be filled if necessary in 'set_custom_labels' function "enable_semantic_files_types": get_settings().pr_description.enable_semantic_files_types, "related_tickets": "", - "include_file_summary_changes": len(self.git_provider.get_diff_files()) <= self.COLLAPSIBLE_FILE_LIST_THRESHOLD + "include_file_summary_changes": len(self.git_provider.get_diff_files()) <= self.COLLAPSIBLE_FILE_LIST_THRESHOLD, + 'duplicate_prompt_examples': get_settings().config.get('duplicate_prompt_examples', False), } self.user_description = self.git_provider.get_user_description() From e695af6917112086750aeed30ed78abe25b12d30 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Thu, 2 Jan 2025 12:33:26 +0200 Subject: [PATCH 3/6] feat: add example output duplication option for PR review prompts --- pr_agent/settings/pr_description_prompts.toml | 1 + pr_agent/settings/pr_reviewer_prompts.toml | 53 +++++++++++++++++++ pr_agent/tools/pr_reviewer.py | 1 + 3 files changed, 55 insertions(+) diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml index 6fd17b89..21524731 100644 --- a/pr_agent/settings/pr_description_prompts.toml +++ b/pr_agent/settings/pr_description_prompts.toml @@ -158,6 +158,7 @@ pr_files: ... {%- endif %} ``` +(replace '...' with the actual values) {%- endif %} Response (should be a valid YAML, and nothing else): diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index 9401a0d3..fb5a134e 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -221,6 +221,59 @@ The PR code diff: ====== +{%- if duplicate_prompt_examples %} + + +Example output: +```yaml +review: +{%- if related_tickets %} + ticket_compliance_check: + - ticket_url: | + ... + ticket_requirements: | + ... + fully_compliant_requirements: | + ... + not_compliant_requirements: | + ... + overall_compliance_level: | + ... +{%- endif %} +{%- if require_estimate_effort_to_review %} + estimated_effort_to_review_[1-5]: | + 3 +{%- endif %} +{%- if require_score %} + score: 89 +{%- endif %} + relevant_tests: | + No + key_issues_to_review: + - relevant_file: | + ... + issue_header: | + ... + issue_content: | + ... + start_line: ... + end_line: ... + - ... + security_concerns: | + No +{%- if require_can_be_split_review %} + can_be_split: + - relevant_files: + - ... + - ... + title: ... + - ... +{%- endif %} +``` +(replace '...' with the actual values) +{%- endif %} + + Response (should be a valid YAML, and nothing else): ```yaml """ diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index 9905ae3d..ee3bb963 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -94,6 +94,7 @@ class PRReviewer: "enable_custom_labels": get_settings().config.enable_custom_labels, "is_ai_metadata": get_settings().get("config.enable_ai_metadata", False), "related_tickets": get_settings().get('related_tickets', []), + 'duplicate_prompt_examples': get_settings().config.get('duplicate_prompt_examples', False), } self.token_handler = TokenHandler( From a1a7c8e44ca81b48bc0413b36d50ba42a5ee027b Mon Sep 17 00:00:00 2001 From: mrT23 Date: Thu, 2 Jan 2025 12:46:50 +0200 Subject: [PATCH 4/6] feat: add example output duplication option for code suggestions prompts --- docs/docs/usage-guide/changing_a_model.md | 4 ++-- .../settings/pr_code_suggestions_prompts.toml | 24 +++++++++++++++++++ .../pr_code_suggestions_reflect_prompts.toml | 19 +++++++++++++++ pr_agent/tools/pr_code_suggestions.py | 4 +++- 4 files changed, 48 insertions(+), 3 deletions(-) diff --git a/docs/docs/usage-guide/changing_a_model.md b/docs/docs/usage-guide/changing_a_model.md index 3e470040..ed57bb3e 100644 --- a/docs/docs/usage-guide/changing_a_model.md +++ b/docs/docs/usage-guide/changing_a_model.md @@ -48,8 +48,8 @@ api_base = "http://localhost:11434" # or whatever port you're running Ollama on !!! note "Local models vs commercial models" Qodo Merge is compatible with almost any AI model, but analyzing complex code repositories and pull requests requires a model specifically optimized for code analysis. - Commercial models such as GPT-4, Claude Sonnet, and Gemini have demonstrated robust capabilities in generating structured output for code analysis. In contrast, most open-source models currently available (as of January 2025) face challenges with these complex tasks. - Based on our testing, local open-source models are suitable for experimentation and learning purposes, but they may not be suitable for production-level code analysis tasks. + Commercial models such as GPT-4, Claude Sonnet, and Gemini have demonstrated robust capabilities in generating structured output for code analysis tasks with large input. In contrast, most open-source models currently available (as of January 2025) face challenges with these complex tasks. + Based on our testing, local open-source models are suitable for experimentation and learning purposes, but they are not suitable for production-level code analysis tasks. Hence, for production workflows and real-world code analysis, we recommend using commercial models. ### Hugging Face Inference Endpoints diff --git a/pr_agent/settings/pr_code_suggestions_prompts.toml b/pr_agent/settings/pr_code_suggestions_prompts.toml index 012ae0fc..7a449fb2 100644 --- a/pr_agent/settings/pr_code_suggestions_prompts.toml +++ b/pr_agent/settings/pr_code_suggestions_prompts.toml @@ -125,6 +125,30 @@ The PR Diff: {{ diff_no_line_numbers|trim }} ====== +{%- if duplicate_prompt_examples %} + + +Example output: +```yaml +code_suggestions: +- relevant_file: | + src/file1.py + language: | + python + suggestion_content: | + ... + existing_code: | + ... + improved_code: | + ... + one_sentence_summary: | + ... + label: | + ... +``` +(replace '...' with actual content) +{%- endif %} + Response (should be a valid YAML, and nothing else): ```yaml diff --git a/pr_agent/settings/pr_code_suggestions_reflect_prompts.toml b/pr_agent/settings/pr_code_suggestions_reflect_prompts.toml index 34b1eec4..16ffb435 100644 --- a/pr_agent/settings/pr_code_suggestions_reflect_prompts.toml +++ b/pr_agent/settings/pr_code_suggestions_reflect_prompts.toml @@ -122,6 +122,25 @@ Below are {{ num_code_suggestions }} AI-generated code suggestions for enhancing ====== +{%- if duplicate_prompt_examples %} + + +Example output: +```yaml +code_suggestions: +- suggestion_summary: | + ... + relevant_file: "..." + relevant_lines_start: ... + relevant_lines_end: ... + suggestion_score: ... + why: | + ... +- ... +``` +(replace '...' with actual content) +{%- endif %} + Response (should be a valid YAML, and nothing else): ```yaml """ diff --git a/pr_agent/tools/pr_code_suggestions.py b/pr_agent/tools/pr_code_suggestions.py index f3d7cda4..aebe5dec 100644 --- a/pr_agent/tools/pr_code_suggestions.py +++ b/pr_agent/tools/pr_code_suggestions.py @@ -81,6 +81,7 @@ class PRCodeSuggestions: "relevant_best_practices": "", "is_ai_metadata": get_settings().get("config.enable_ai_metadata", False), "focus_only_on_problems": get_settings().get("pr_code_suggestions.focus_only_on_problems", False), + 'duplicate_prompt_examples': get_settings().config.get('duplicate_prompt_examples', False), } self.pr_code_suggestions_prompt_system = get_settings().pr_code_suggestions_prompt.system @@ -830,7 +831,8 @@ class PRCodeSuggestions: "diff": patches_diff, 'num_code_suggestions': len(suggestion_list), 'prev_suggestions_str': prev_suggestions_str, - "is_ai_metadata": get_settings().get("config.enable_ai_metadata", False)} + "is_ai_metadata": get_settings().get("config.enable_ai_metadata", False), + 'duplicate_prompt_examples': get_settings().config.get('duplicate_prompt_examples', False)} environment = Environment(undefined=StrictUndefined) if dedicated_prompt: From 379fa957ea2d7b616fe2c019751f5582d1a85a47 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Thu, 2 Jan 2025 12:48:56 +0200 Subject: [PATCH 5/6] docs: clarify model recommendation for production usage --- docs/docs/usage-guide/changing_a_model.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/usage-guide/changing_a_model.md b/docs/docs/usage-guide/changing_a_model.md index ed57bb3e..6ee03759 100644 --- a/docs/docs/usage-guide/changing_a_model.md +++ b/docs/docs/usage-guide/changing_a_model.md @@ -50,7 +50,7 @@ api_base = "http://localhost:11434" # or whatever port you're running Ollama on Qodo Merge is compatible with almost any AI model, but analyzing complex code repositories and pull requests requires a model specifically optimized for code analysis. Commercial models such as GPT-4, Claude Sonnet, and Gemini have demonstrated robust capabilities in generating structured output for code analysis tasks with large input. In contrast, most open-source models currently available (as of January 2025) face challenges with these complex tasks. Based on our testing, local open-source models are suitable for experimentation and learning purposes, but they are not suitable for production-level code analysis tasks. - Hence, for production workflows and real-world code analysis, we recommend using commercial models. + Hence, for production workflows and real-world usage, we recommend using commercial models. ### Hugging Face Inference Endpoints From 7f950a3aa989c99ed2f329f201d3086cd2813548 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Thu, 2 Jan 2025 12:50:38 +0200 Subject: [PATCH 6/6] docs: clarify model recommendation for production usage --- pr_agent/settings/configuration.toml | 2 +- pr_agent/settings/pr_description_prompts.toml | 1 + .../settings/pr_description_prompts_json.toml | 158 ------------------ 3 files changed, 2 insertions(+), 159 deletions(-) delete mode 100644 pr_agent/settings/pr_description_prompts_json.toml diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index c9fb86d9..29cd90e7 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -2,7 +2,6 @@ # models model="gpt-4o-2024-11-20" fallback_models=["gpt-4o-2024-08-06"] -custom_model_max_tokens=-1 # for models not in the default list #model_weak="gpt-4o-mini-2024-07-18" # optional, a weaker model to use for some easier tasks # CLI git_provider="github" @@ -22,6 +21,7 @@ skip_keys = [] max_description_tokens = 500 max_commits_tokens = 500 max_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities. +custom_model_max_tokens=-1 # for models not in the default list # patch extension logic patch_extension_skip_types =[".md",".txt"] allow_dynamic_context=true diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml index 21524731..73ec8459 100644 --- a/pr_agent/settings/pr_description_prompts.toml +++ b/pr_agent/settings/pr_description_prompts.toml @@ -161,6 +161,7 @@ pr_files: (replace '...' with the actual values) {%- endif %} + Response (should be a valid YAML, and nothing else): ```yaml """ diff --git a/pr_agent/settings/pr_description_prompts_json.toml b/pr_agent/settings/pr_description_prompts_json.toml deleted file mode 100644 index 9769c614..00000000 --- a/pr_agent/settings/pr_description_prompts_json.toml +++ /dev/null @@ -1,158 +0,0 @@ -[pr_description_prompt] -system="""You are PR-Reviewer, a language model designed to review a Git Pull Request (PR). -Your task is to provide a full description for the PR content - type, description, title and files walkthrough. -- Focus on the new PR code (lines starting with '+' in the 'PR Git Diff' section). -- Keep in mind that the 'Previous title', 'Previous description' and 'Commit messages' sections may be partial, simplistic, non-informative or out of date. Hence, compare them to the PR diff code, and use them only as a reference. -- The generated title and description should prioritize the most significant changes. -- When quoting variables, names or file paths from the code, use backticks (`) instead of single quote ('). - -{%- if extra_instructions %} - -Extra instructions from the user: -===== -{{extra_instructions}} -===== -{% endif %} - - -The output must be a JSON object equivalent to type $PRDescription, according to the following Pydantic definitions: -===== -class PRType(str, Enum): - bug_fix = "Bug fix" - tests = "Tests" - enhancement = "Enhancement" - documentation = "Documentation" - other = "Other" - -{%- if enable_custom_labels %} - -{{ custom_labels_class }} - -{%- endif %} - -{%- if enable_semantic_files_types %} - -class FileDescription(BaseModel): - filename: str = Field(description="The full file path of the relevant file") -{%- if include_file_summary_changes %} - changes_summary: str = Field(description="concise summary of the changes in the relevant file, in bullet points (1-4 bullet points).") -{%- endif %} - changes_title: str = Field(description="one-line summary (5-10 words) capturing the main theme of changes in the file") - label: str = Field(description="a single semantic label that represents a type of code changes that occurred in the File. Possible values (partial list): 'bug fix', 'tests', 'enhancement', 'documentation', 'error handling', 'configuration changes', 'dependencies', 'formatting', 'miscellaneous', ...") -{%- endif %} - -class PRDescription(BaseModel): - type: List[PRType] = Field(description="one or more types that describe the PR content. Return the label member value (e.g. 'Bug fix', not 'bug_fix')") - description: str = Field(description="summarize the PR changes in up to four bullet points, each up to 8 words. For large PRs, add sub-bullets if needed. Order bullets by importance, with each bullet highlighting a key change group.") - title: str = Field(description="a concise and descriptive title that captures the PR's main theme") -{%- if enable_semantic_files_types %} - pr_files: List[FileDescription] = Field(max_items=20, description="a list of all the files that were changed in the PR, and summary of their changes. Each file must be analyzed regardless of change size.") -{%- endif %} -===== - - -Example output: - -```json -{ - "type": [ - "...", - "..." - ], - "description": "...", - "title": "..." -{%- if enable_semantic_files_types %}, - "pr_files": [ - { - "filename": "...", -{%- if include_file_summary_changes %} - "changes_summary": "...", -{%- endif %} - "changes_title": "...", - "label": "label_key_1" - } - ] -{%- endif %} -} - - -Answer should be a valid JSON, and nothing else. -""" - -user=""" -{%- if related_tickets %} -Related Ticket Info: -{% for ticket in related_tickets %} -===== -Ticket Title: '{{ ticket.title }}' -{%- if ticket.labels %} -Ticket Labels: {{ ticket.labels }} -{%- endif %} -{%- if ticket.body %} -Ticket Description: -##### -{{ ticket.body }} -##### -{%- endif %} -===== -{% endfor %} -{%- endif %} - -PR Info: - -Previous title: '{{title}}' - -{%- if description %} - -Previous description: -===== -{{ description|trim }} -===== -{%- endif %} - -Branch: '{{branch}}' - -{%- if commit_messages_str %} - -Commit messages: -===== -{{ commit_messages_str|trim }} -===== -{%- endif %} - - -The PR Git Diff: -===== -{{ diff|trim }} -===== - -Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines. - - -Example output: -```json -{ - "type": [ - "...", - "..." - ], - "description": "...", - "title": "..." -{%- if enable_semantic_files_types %}, - "pr_files": [ - { - "filename": "...", -{%- if include_file_summary_changes %} - "changes_summary": "...", -{%- endif %} - "changes_title": "...", - "label": "label_key_1" - } - ] -{%- endif %} -} - - -Response (should be a valid JSON, and nothing else): -```json -""" \ No newline at end of file