mirror of
https://github.com/qodo-ai/pr-agent.git
synced 2025-07-06 22:00:40 +08:00
feat: add prompt example duplication option for improved model output
This commit is contained in:
@ -32,20 +32,26 @@ fallback_models=["..."]
|
||||
|
||||
### Ollama
|
||||
|
||||
**Local**
|
||||
You can run Hugging Face models locally through either [VLLM](https://docs.litellm.ai/docs/providers/vllm) or [Ollama](https://docs.litellm.ai/docs/providers/ollama)
|
||||
You can run models locally through either [VLLM](https://docs.litellm.ai/docs/providers/vllm) or [Ollama](https://docs.litellm.ai/docs/providers/ollama)
|
||||
|
||||
E.g. to use a new Hugging Face model locally via Ollama, set:
|
||||
E.g. to use a new model locally via Ollama, set in `.secrets.toml` or in a configuration file:
|
||||
```
|
||||
[config] # in configuration.toml
|
||||
model = "ollama/llama2"
|
||||
fallback_models=["ollama/llama2"]
|
||||
custom_model_max_tokens=... # set the maximal input tokens for the model
|
||||
[config]
|
||||
model = "ollama/qwen2.5-coder:32b"
|
||||
fallback_models=["ollama/qwen2.5-coder:32b"]
|
||||
custom_model_max_tokens=128000 # set the maximal input tokens for the model
|
||||
duplicate_examples=true # will duplicate the examples in the prompt, to help the model to output structured output
|
||||
|
||||
[ollama] # in .secrets.toml
|
||||
[ollama]
|
||||
api_base = "http://localhost:11434" # or whatever port you're running Ollama on
|
||||
```
|
||||
|
||||
!!! note "Local models vs commercial models"
|
||||
Qodo Merge is compatible with almost any AI model, but analyzing complex code repositories and pull requests requires a model specifically optimized for code analysis.
|
||||
Commercial models such as GPT-4, Claude Sonnet, and Gemini have demonstrated robust capabilities in generating structured output for code analysis. In contrast, most open-source models currently available (as of January 2025) face challenges with these complex tasks.
|
||||
Based on our testing, local open-source models are suitable for experimentation and learning purposes, but they may not be suitable for production-level code analysis tasks.
|
||||
Hence, for production workflows and real-world code analysis, we recommend using commercial models.
|
||||
|
||||
### Hugging Face Inference Endpoints
|
||||
|
||||
To use a new model with Hugging Face Inference Endpoints, for example, set:
|
||||
|
@ -34,6 +34,7 @@ ai_disclaimer_title="" # Pro feature, title for a collapsible disclaimer to AI
|
||||
ai_disclaimer="" # Pro feature, full text for the AI disclaimer
|
||||
output_relevant_configurations=false
|
||||
large_patch_policy = "clip" # "clip", "skip"
|
||||
duplicate_prompt_examples = false
|
||||
# seed
|
||||
seed=-1 # set positive value to fix the seed (and ensure temperature=0)
|
||||
temperature=0.2
|
||||
|
@ -130,6 +130,35 @@ The PR Git Diff:
|
||||
|
||||
Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines.
|
||||
|
||||
{%- if duplicate_prompt_examples %}
|
||||
|
||||
|
||||
Example output:
|
||||
```yaml
|
||||
type:
|
||||
- Bug fix
|
||||
- Refactoring
|
||||
- ...
|
||||
description: |
|
||||
...
|
||||
title: |
|
||||
...
|
||||
{%- if enable_semantic_files_types %}
|
||||
pr_files:
|
||||
- filename: |
|
||||
...
|
||||
{%- if include_file_summary_changes %}
|
||||
changes_summary: |
|
||||
...
|
||||
{%- endif %}
|
||||
changes_title: |
|
||||
...
|
||||
label: |
|
||||
label_key_1
|
||||
...
|
||||
{%- endif %}
|
||||
```
|
||||
{%- endif %}
|
||||
|
||||
Response (should be a valid YAML, and nothing else):
|
||||
```yaml
|
||||
|
158
pr_agent/settings/pr_description_prompts_json.toml
Normal file
158
pr_agent/settings/pr_description_prompts_json.toml
Normal file
@ -0,0 +1,158 @@
|
||||
[pr_description_prompt]
|
||||
system="""You are PR-Reviewer, a language model designed to review a Git Pull Request (PR).
|
||||
Your task is to provide a full description for the PR content - type, description, title and files walkthrough.
|
||||
- Focus on the new PR code (lines starting with '+' in the 'PR Git Diff' section).
|
||||
- Keep in mind that the 'Previous title', 'Previous description' and 'Commit messages' sections may be partial, simplistic, non-informative or out of date. Hence, compare them to the PR diff code, and use them only as a reference.
|
||||
- The generated title and description should prioritize the most significant changes.
|
||||
- When quoting variables, names or file paths from the code, use backticks (`) instead of single quote (').
|
||||
|
||||
{%- if extra_instructions %}
|
||||
|
||||
Extra instructions from the user:
|
||||
=====
|
||||
{{extra_instructions}}
|
||||
=====
|
||||
{% endif %}
|
||||
|
||||
|
||||
The output must be a JSON object equivalent to type $PRDescription, according to the following Pydantic definitions:
|
||||
=====
|
||||
class PRType(str, Enum):
|
||||
bug_fix = "Bug fix"
|
||||
tests = "Tests"
|
||||
enhancement = "Enhancement"
|
||||
documentation = "Documentation"
|
||||
other = "Other"
|
||||
|
||||
{%- if enable_custom_labels %}
|
||||
|
||||
{{ custom_labels_class }}
|
||||
|
||||
{%- endif %}
|
||||
|
||||
{%- if enable_semantic_files_types %}
|
||||
|
||||
class FileDescription(BaseModel):
|
||||
filename: str = Field(description="The full file path of the relevant file")
|
||||
{%- if include_file_summary_changes %}
|
||||
changes_summary: str = Field(description="concise summary of the changes in the relevant file, in bullet points (1-4 bullet points).")
|
||||
{%- endif %}
|
||||
changes_title: str = Field(description="one-line summary (5-10 words) capturing the main theme of changes in the file")
|
||||
label: str = Field(description="a single semantic label that represents a type of code changes that occurred in the File. Possible values (partial list): 'bug fix', 'tests', 'enhancement', 'documentation', 'error handling', 'configuration changes', 'dependencies', 'formatting', 'miscellaneous', ...")
|
||||
{%- endif %}
|
||||
|
||||
class PRDescription(BaseModel):
|
||||
type: List[PRType] = Field(description="one or more types that describe the PR content. Return the label member value (e.g. 'Bug fix', not 'bug_fix')")
|
||||
description: str = Field(description="summarize the PR changes in up to four bullet points, each up to 8 words. For large PRs, add sub-bullets if needed. Order bullets by importance, with each bullet highlighting a key change group.")
|
||||
title: str = Field(description="a concise and descriptive title that captures the PR's main theme")
|
||||
{%- if enable_semantic_files_types %}
|
||||
pr_files: List[FileDescription] = Field(max_items=20, description="a list of all the files that were changed in the PR, and summary of their changes. Each file must be analyzed regardless of change size.")
|
||||
{%- endif %}
|
||||
=====
|
||||
|
||||
|
||||
Example output:
|
||||
|
||||
```json
|
||||
{
|
||||
"type": [
|
||||
"...",
|
||||
"..."
|
||||
],
|
||||
"description": "...",
|
||||
"title": "..."
|
||||
{%- if enable_semantic_files_types %},
|
||||
"pr_files": [
|
||||
{
|
||||
"filename": "...",
|
||||
{%- if include_file_summary_changes %}
|
||||
"changes_summary": "...",
|
||||
{%- endif %}
|
||||
"changes_title": "...",
|
||||
"label": "label_key_1"
|
||||
}
|
||||
]
|
||||
{%- endif %}
|
||||
}
|
||||
|
||||
|
||||
Answer should be a valid JSON, and nothing else.
|
||||
"""
|
||||
|
||||
user="""
|
||||
{%- if related_tickets %}
|
||||
Related Ticket Info:
|
||||
{% for ticket in related_tickets %}
|
||||
=====
|
||||
Ticket Title: '{{ ticket.title }}'
|
||||
{%- if ticket.labels %}
|
||||
Ticket Labels: {{ ticket.labels }}
|
||||
{%- endif %}
|
||||
{%- if ticket.body %}
|
||||
Ticket Description:
|
||||
#####
|
||||
{{ ticket.body }}
|
||||
#####
|
||||
{%- endif %}
|
||||
=====
|
||||
{% endfor %}
|
||||
{%- endif %}
|
||||
|
||||
PR Info:
|
||||
|
||||
Previous title: '{{title}}'
|
||||
|
||||
{%- if description %}
|
||||
|
||||
Previous description:
|
||||
=====
|
||||
{{ description|trim }}
|
||||
=====
|
||||
{%- endif %}
|
||||
|
||||
Branch: '{{branch}}'
|
||||
|
||||
{%- if commit_messages_str %}
|
||||
|
||||
Commit messages:
|
||||
=====
|
||||
{{ commit_messages_str|trim }}
|
||||
=====
|
||||
{%- endif %}
|
||||
|
||||
|
||||
The PR Git Diff:
|
||||
=====
|
||||
{{ diff|trim }}
|
||||
=====
|
||||
|
||||
Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines.
|
||||
|
||||
|
||||
Example output:
|
||||
```json
|
||||
{
|
||||
"type": [
|
||||
"...",
|
||||
"..."
|
||||
],
|
||||
"description": "...",
|
||||
"title": "..."
|
||||
{%- if enable_semantic_files_types %},
|
||||
"pr_files": [
|
||||
{
|
||||
"filename": "...",
|
||||
{%- if include_file_summary_changes %}
|
||||
"changes_summary": "...",
|
||||
{%- endif %}
|
||||
"changes_title": "...",
|
||||
"label": "label_key_1"
|
||||
}
|
||||
]
|
||||
{%- endif %}
|
||||
}
|
||||
|
||||
|
||||
Response (should be a valid JSON, and nothing else):
|
||||
```json
|
||||
"""
|
@ -71,7 +71,8 @@ class PRDescription:
|
||||
"custom_labels_class": "", # will be filled if necessary in 'set_custom_labels' function
|
||||
"enable_semantic_files_types": get_settings().pr_description.enable_semantic_files_types,
|
||||
"related_tickets": "",
|
||||
"include_file_summary_changes": len(self.git_provider.get_diff_files()) <= self.COLLAPSIBLE_FILE_LIST_THRESHOLD
|
||||
"include_file_summary_changes": len(self.git_provider.get_diff_files()) <= self.COLLAPSIBLE_FILE_LIST_THRESHOLD,
|
||||
'duplicate_prompt_examples': get_settings().config.get('duplicate_prompt_examples', False),
|
||||
}
|
||||
|
||||
self.user_description = self.git_provider.get_user_description()
|
||||
|
Reference in New Issue
Block a user