diff --git a/docs/docs/usage-guide/changing_a_model.md b/docs/docs/usage-guide/changing_a_model.md index dc3efc9c..3e470040 100644 --- a/docs/docs/usage-guide/changing_a_model.md +++ b/docs/docs/usage-guide/changing_a_model.md @@ -32,20 +32,26 @@ fallback_models=["..."] ### Ollama -**Local** -You can run Hugging Face models locally through either [VLLM](https://docs.litellm.ai/docs/providers/vllm) or [Ollama](https://docs.litellm.ai/docs/providers/ollama) +You can run models locally through either [VLLM](https://docs.litellm.ai/docs/providers/vllm) or [Ollama](https://docs.litellm.ai/docs/providers/ollama) -E.g. to use a new Hugging Face model locally via Ollama, set: +E.g. to use a new model locally via Ollama, set in `.secrets.toml` or in a configuration file: ``` -[config] # in configuration.toml -model = "ollama/llama2" -fallback_models=["ollama/llama2"] -custom_model_max_tokens=... # set the maximal input tokens for the model +[config] +model = "ollama/qwen2.5-coder:32b" +fallback_models=["ollama/qwen2.5-coder:32b"] +custom_model_max_tokens=128000 # set the maximal input tokens for the model +duplicate_examples=true # will duplicate the examples in the prompt, to help the model to output structured output -[ollama] # in .secrets.toml +[ollama] api_base = "http://localhost:11434" # or whatever port you're running Ollama on ``` +!!! note "Local models vs commercial models" + Qodo Merge is compatible with almost any AI model, but analyzing complex code repositories and pull requests requires a model specifically optimized for code analysis. + Commercial models such as GPT-4, Claude Sonnet, and Gemini have demonstrated robust capabilities in generating structured output for code analysis. In contrast, most open-source models currently available (as of January 2025) face challenges with these complex tasks. + Based on our testing, local open-source models are suitable for experimentation and learning purposes, but they may not be suitable for production-level code analysis tasks. + Hence, for production workflows and real-world code analysis, we recommend using commercial models. + ### Hugging Face Inference Endpoints To use a new model with Hugging Face Inference Endpoints, for example, set: diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 90e10366..c9fb86d9 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -34,6 +34,7 @@ ai_disclaimer_title="" # Pro feature, title for a collapsible disclaimer to AI ai_disclaimer="" # Pro feature, full text for the AI disclaimer output_relevant_configurations=false large_patch_policy = "clip" # "clip", "skip" +duplicate_prompt_examples = false # seed seed=-1 # set positive value to fix the seed (and ensure temperature=0) temperature=0.2 diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml index 0a15eee3..6fd17b89 100644 --- a/pr_agent/settings/pr_description_prompts.toml +++ b/pr_agent/settings/pr_description_prompts.toml @@ -130,6 +130,35 @@ The PR Git Diff: Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines. +{%- if duplicate_prompt_examples %} + + +Example output: +```yaml +type: +- Bug fix +- Refactoring +- ... +description: | + ... +title: | + ... +{%- if enable_semantic_files_types %} +pr_files: +- filename: | + ... +{%- if include_file_summary_changes %} + changes_summary: | + ... +{%- endif %} + changes_title: | + ... + label: | + label_key_1 +... +{%- endif %} +``` +{%- endif %} Response (should be a valid YAML, and nothing else): ```yaml diff --git a/pr_agent/settings/pr_description_prompts_json.toml b/pr_agent/settings/pr_description_prompts_json.toml new file mode 100644 index 00000000..9769c614 --- /dev/null +++ b/pr_agent/settings/pr_description_prompts_json.toml @@ -0,0 +1,158 @@ +[pr_description_prompt] +system="""You are PR-Reviewer, a language model designed to review a Git Pull Request (PR). +Your task is to provide a full description for the PR content - type, description, title and files walkthrough. +- Focus on the new PR code (lines starting with '+' in the 'PR Git Diff' section). +- Keep in mind that the 'Previous title', 'Previous description' and 'Commit messages' sections may be partial, simplistic, non-informative or out of date. Hence, compare them to the PR diff code, and use them only as a reference. +- The generated title and description should prioritize the most significant changes. +- When quoting variables, names or file paths from the code, use backticks (`) instead of single quote ('). + +{%- if extra_instructions %} + +Extra instructions from the user: +===== +{{extra_instructions}} +===== +{% endif %} + + +The output must be a JSON object equivalent to type $PRDescription, according to the following Pydantic definitions: +===== +class PRType(str, Enum): + bug_fix = "Bug fix" + tests = "Tests" + enhancement = "Enhancement" + documentation = "Documentation" + other = "Other" + +{%- if enable_custom_labels %} + +{{ custom_labels_class }} + +{%- endif %} + +{%- if enable_semantic_files_types %} + +class FileDescription(BaseModel): + filename: str = Field(description="The full file path of the relevant file") +{%- if include_file_summary_changes %} + changes_summary: str = Field(description="concise summary of the changes in the relevant file, in bullet points (1-4 bullet points).") +{%- endif %} + changes_title: str = Field(description="one-line summary (5-10 words) capturing the main theme of changes in the file") + label: str = Field(description="a single semantic label that represents a type of code changes that occurred in the File. Possible values (partial list): 'bug fix', 'tests', 'enhancement', 'documentation', 'error handling', 'configuration changes', 'dependencies', 'formatting', 'miscellaneous', ...") +{%- endif %} + +class PRDescription(BaseModel): + type: List[PRType] = Field(description="one or more types that describe the PR content. Return the label member value (e.g. 'Bug fix', not 'bug_fix')") + description: str = Field(description="summarize the PR changes in up to four bullet points, each up to 8 words. For large PRs, add sub-bullets if needed. Order bullets by importance, with each bullet highlighting a key change group.") + title: str = Field(description="a concise and descriptive title that captures the PR's main theme") +{%- if enable_semantic_files_types %} + pr_files: List[FileDescription] = Field(max_items=20, description="a list of all the files that were changed in the PR, and summary of their changes. Each file must be analyzed regardless of change size.") +{%- endif %} +===== + + +Example output: + +```json +{ + "type": [ + "...", + "..." + ], + "description": "...", + "title": "..." +{%- if enable_semantic_files_types %}, + "pr_files": [ + { + "filename": "...", +{%- if include_file_summary_changes %} + "changes_summary": "...", +{%- endif %} + "changes_title": "...", + "label": "label_key_1" + } + ] +{%- endif %} +} + + +Answer should be a valid JSON, and nothing else. +""" + +user=""" +{%- if related_tickets %} +Related Ticket Info: +{% for ticket in related_tickets %} +===== +Ticket Title: '{{ ticket.title }}' +{%- if ticket.labels %} +Ticket Labels: {{ ticket.labels }} +{%- endif %} +{%- if ticket.body %} +Ticket Description: +##### +{{ ticket.body }} +##### +{%- endif %} +===== +{% endfor %} +{%- endif %} + +PR Info: + +Previous title: '{{title}}' + +{%- if description %} + +Previous description: +===== +{{ description|trim }} +===== +{%- endif %} + +Branch: '{{branch}}' + +{%- if commit_messages_str %} + +Commit messages: +===== +{{ commit_messages_str|trim }} +===== +{%- endif %} + + +The PR Git Diff: +===== +{{ diff|trim }} +===== + +Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines. + + +Example output: +```json +{ + "type": [ + "...", + "..." + ], + "description": "...", + "title": "..." +{%- if enable_semantic_files_types %}, + "pr_files": [ + { + "filename": "...", +{%- if include_file_summary_changes %} + "changes_summary": "...", +{%- endif %} + "changes_title": "...", + "label": "label_key_1" + } + ] +{%- endif %} +} + + +Response (should be a valid JSON, and nothing else): +```json +""" \ No newline at end of file diff --git a/pr_agent/tools/pr_description.py b/pr_agent/tools/pr_description.py index 18df4f10..7744b699 100644 --- a/pr_agent/tools/pr_description.py +++ b/pr_agent/tools/pr_description.py @@ -71,7 +71,8 @@ class PRDescription: "custom_labels_class": "", # will be filled if necessary in 'set_custom_labels' function "enable_semantic_files_types": get_settings().pr_description.enable_semantic_files_types, "related_tickets": "", - "include_file_summary_changes": len(self.git_provider.get_diff_files()) <= self.COLLAPSIBLE_FILE_LIST_THRESHOLD + "include_file_summary_changes": len(self.git_provider.get_diff_files()) <= self.COLLAPSIBLE_FILE_LIST_THRESHOLD, + 'duplicate_prompt_examples': get_settings().config.get('duplicate_prompt_examples', False), } self.user_description = self.git_provider.get_user_description()