mirror of
https://github.com/qodo-ai/pr-agent.git
synced 2025-07-16 10:40:16 +08:00
Compare commits
91 Commits
v0.28
...
qodo-merge
Author | SHA1 | Date | |
---|---|---|---|
cd68a92283 | |||
44a3e5819c | |||
27a7c1a94f | |||
dc46acb762 | |||
0da667d179 | |||
73b3e2520c | |||
3d2a285091 | |||
b1bc77c809 | |||
14dafc4016 | |||
7d00574044 | |||
93002acff8 | |||
f08bed67f1 | |||
08bf9593b2 | |||
4b58a5488f | |||
57808075be | |||
2cb226dbcc | |||
943af0653b | |||
259d67c064 | |||
84fdc4ca2b | |||
c8f519ad70 | |||
8bdd11646c | |||
fc6de449ad | |||
a649e323d3 | |||
7a32faf64f | |||
3c8ad9eac8 | |||
5f2d4d400e | |||
0cbf65dab6 | |||
c0c307503f | |||
60ace1ed09 | |||
7f6014e064 | |||
e4f40da35c | |||
0ac7028bc6 | |||
a919c62606 | |||
abda701539 | |||
da59a6dbe8 | |||
e44b371d34 | |||
edaab4b6b1 | |||
2476dadf53 | |||
526d7ff5d2 | |||
cdc354c33b | |||
eb9c4fa110 | |||
ca95e876eb | |||
4ee9f784dd | |||
ef7a8eafb6 | |||
83bb3b25d8 | |||
665fb90a98 | |||
0161769f22 | |||
629de489dd | |||
a4957693ba | |||
e8171b0289 | |||
d4cc57c32a | |||
cb47cd5144 | |||
99d88f8d1d | |||
d938ff05ef | |||
ae6c4e741a | |||
282fb0ed28 | |||
9b19fcdc90 | |||
76b447a62a | |||
b7b533ddf6 | |||
7987fd1be7 | |||
db06a8e49e | |||
4fa2d82179 | |||
1ffddaf719 | |||
94aa090552 | |||
2683b78e34 | |||
eac20ba0e2 | |||
c2e61b7113 | |||
c74a2efdb7 | |||
1710cd49f1 | |||
9254225949 | |||
14971c4f5f | |||
ceaca3e621 | |||
3b0225544a | |||
dbfc07ccc1 | |||
f0e0901b10 | |||
d749620ebb | |||
e692735b7b | |||
ed00ef6ee3 | |||
c674c5ed02 | |||
20cb139161 | |||
8513a1a4b9 | |||
a7ab04ba8d | |||
afa4adcb23 | |||
7bd0fefee4 | |||
02d9aed7fe | |||
7d47bd5f5e | |||
8f0df437dd | |||
ddf94c14a3 | |||
6b2a1ff529 | |||
4a5c115d20 | |||
069f5eb86f |
42
README.md
42
README.md
@ -22,7 +22,7 @@ PR-Agent aims to help efficiently review and handle pull requests, by providing
|
||||
[](https://chromewebstore.google.com/detail/qodo-merge-ai-powered-cod/ephlnjeghhogofkifjloamocljapahnl)
|
||||
[](https://github.com/apps/qodo-merge-pro/)
|
||||
[](https://github.com/apps/qodo-merge-pro-for-open-source/)
|
||||
[](https://discord.com/channels/1057273017547378788/1126104260430528613)
|
||||
[](https://discord.com/invite/SgSxuQ65GF)
|
||||
<a href="https://github.com/Codium-ai/pr-agent/commits/main">
|
||||
<img alt="GitHub" src="https://img.shields.io/github/last-commit/Codium-ai/pr-agent/main?style=for-the-badge" height="20">
|
||||
</a>
|
||||
@ -52,6 +52,22 @@ PR-Agent aims to help efficiently review and handle pull requests, by providing
|
||||
|
||||
## News and Updates
|
||||
|
||||
## Apr 14, 2025
|
||||
|
||||
GPT-4.1 is out. And its quite good on coding tasks...
|
||||
|
||||
https://openai.com/index/gpt-4-1/
|
||||
|
||||
<img width="635" alt="image" src="https://github.com/user-attachments/assets/a8f4c648-a058-4bdc-9825-2a4bb71a23e5" />
|
||||
|
||||
|
||||
## March 28, 2025
|
||||
A new version, v0.28, was released. See release notes [here](https://github.com/qodo-ai/pr-agent/releases/tag/v0.28).
|
||||
|
||||
This version includes a new tool, [Help Docs](https://qodo-merge-docs.qodo.ai/tools/help_docs/), which can answer free-text questions based on a documentation folder.
|
||||
|
||||
`/help_docs` is now being used to provide immediate automatic feedback to any user who [opens an issue](https://github.com/qodo-ai/pr-agent/issues/1608#issue-2897328825) on PR-Agent's open-source project
|
||||
|
||||
### Feb 28, 2025
|
||||
A new version, v0.27, was released. See release notes [here](https://github.com/qodo-ai/pr-agent/releases/tag/v0.27).
|
||||
|
||||
@ -60,30 +76,6 @@ A new version, v0.27, was released. See release notes [here](https://github.com/
|
||||
- Important updates and bug fixes for Azure DevOps, see [here](https://github.com/qodo-ai/pr-agent/pull/1583)
|
||||
- Added support for adjusting the [response language](https://qodo-merge-docs.qodo.ai/usage-guide/additional_configurations/#language-settings) of the PR-Agent tools.
|
||||
|
||||
### Feb 6, 2025
|
||||
New design for the `/improve` tool:
|
||||
|
||||
<kbd><img src="https://github.com/user-attachments/assets/26506430-550e-469a-adaa-af0a09b70c6d" width="512"></kbd>
|
||||
|
||||
### Jan 25, 2025
|
||||
|
||||
The open-source GitHub organization was updated:
|
||||
`https://github.com/codium-ai/pr-agent` →
|
||||
`https://github.com/qodo-ai/pr-agent`
|
||||
|
||||
The docker should be redirected automatically to the new location.
|
||||
However, if you have any issues, please update the GitHub action docker image from
|
||||
`uses: Codium-ai/pr-agent@main`
|
||||
to
|
||||
`uses: qodo-ai/pr-agent@main`
|
||||
|
||||
|
||||
### Jan 2, 2025
|
||||
|
||||
New tool [/Implement](https://qodo-merge-docs.qodo.ai/tools/implement/) (💎), which converts human code review discussions and feedback into ready-to-commit code changes.
|
||||
|
||||
<kbd><img src="https://www.qodo.ai/images/pr_agent/implement1.png?v=2" width="512"></kbd>
|
||||
|
||||
|
||||
### December 30, 2024
|
||||
|
||||
|
199
best_practices.md
Normal file
199
best_practices.md
Normal file
@ -0,0 +1,199 @@
|
||||
|
||||
<b>Pattern 1: Wrap critical operations with try-except blocks to handle potential exceptions, especially for file operations, API calls, and data parsing functions.</b>
|
||||
|
||||
Example code before:
|
||||
```
|
||||
def get_git_repo_url(self, issues_or_pr_url: str) -> str:
|
||||
repo_path = self._get_owner_and_repo_path(issues_or_pr_url)
|
||||
if not repo_path or repo_path not in issues_or_pr_url:
|
||||
get_logger().error(f"Unable to retrieve owner/path from url: {issues_or_pr_url}")
|
||||
return ""
|
||||
return f"{issues_or_pr_url.split(repo_path)[0]}{repo_path}.git"
|
||||
```
|
||||
|
||||
Example code after:
|
||||
```
|
||||
def get_git_repo_url(self, issues_or_pr_url: str) -> str:
|
||||
try:
|
||||
repo_path = self._get_owner_and_repo_path(issues_or_pr_url)
|
||||
if not repo_path or repo_path not in issues_or_pr_url:
|
||||
get_logger().error(f"Unable to retrieve owner/path from url: {issues_or_pr_url}")
|
||||
return ""
|
||||
return f"{issues_or_pr_url.split(repo_path)[0]}{repo_path}.git"
|
||||
except Exception as e:
|
||||
get_logger().error(f"Failed to get git repo url from {issues_or_pr_url}, error: {e}")
|
||||
return ""
|
||||
```
|
||||
|
||||
<details><summary>Examples for relevant past discussions:</summary>
|
||||
|
||||
- https://github.com/qodo-ai/pr-agent/pull/1644#discussion_r2013912636
|
||||
- https://github.com/qodo-ai/pr-agent/pull/1263#discussion_r1782129216
|
||||
- https://github.com/qodo-ai/pr-agent/pull/1391#discussion_r1879870807
|
||||
</details>
|
||||
|
||||
|
||||
___
|
||||
|
||||
<b>Pattern 2: Use proper logging methods instead of print statements, with get_logger().error() for errors, get_logger().warning() for warnings, and get_logger().info() for informational messages.</b>
|
||||
|
||||
Example code before:
|
||||
```
|
||||
if isinstance(response_tuple, tuple) and len(response_tuple) == 3:
|
||||
response_json = json.loads(response_tuple[2])
|
||||
else:
|
||||
print("Unexpected response format:", response_tuple)
|
||||
return sub_issues
|
||||
```
|
||||
|
||||
Example code after:
|
||||
```
|
||||
if isinstance(response_tuple, tuple) and len(response_tuple) == 3:
|
||||
response_json = json.loads(response_tuple[2])
|
||||
else:
|
||||
get_logger().error(f"Unexpected response format", artifact={"response": response_tuple})
|
||||
return sub_issues
|
||||
```
|
||||
|
||||
<details><summary>Examples for relevant past discussions:</summary>
|
||||
|
||||
- https://github.com/qodo-ai/pr-agent/pull/1529#discussion_r1958684550
|
||||
- https://github.com/qodo-ai/pr-agent/pull/1529#discussion_r1958686068
|
||||
- https://github.com/qodo-ai/pr-agent/pull/1529#discussion_r1964110734
|
||||
- https://github.com/qodo-ai/pr-agent/pull/1634#discussion_r2007976915
|
||||
</details>
|
||||
|
||||
|
||||
___
|
||||
|
||||
<b>Pattern 3: Move specific imports to where they are actually used rather than at the top of the file, especially for rarely used or heavy dependencies.</b>
|
||||
|
||||
Example code before:
|
||||
```
|
||||
import os
|
||||
from azure.identity import ClientSecretCredential
|
||||
import litellm
|
||||
import openai
|
||||
import requests
|
||||
```
|
||||
|
||||
Example code after:
|
||||
```
|
||||
import os
|
||||
import litellm
|
||||
import openai
|
||||
import requests
|
||||
|
||||
# Later in the code where Azure AD is actually used:
|
||||
if get_settings().get("AZURE_AD.CLIENT_ID", None):
|
||||
from azure.identity import ClientSecretCredential
|
||||
# Azure AD specific code...
|
||||
```
|
||||
|
||||
<details><summary>Examples for relevant past discussions:</summary>
|
||||
|
||||
- https://github.com/qodo-ai/pr-agent/pull/1698#discussion_r2046221654
|
||||
</details>
|
||||
|
||||
|
||||
___
|
||||
|
||||
<b>Pattern 4: Add defensive checks for potentially None or invalid values before performing operations on them, especially when working with external data or API responses.</b>
|
||||
|
||||
Example code before:
|
||||
```
|
||||
model_is_from_o_series = re.match(r"^o[1-9](-mini|-preview)?$", model)
|
||||
if ('gpt' in get_settings().config.model.lower() or model_is_from_o_series) and get_settings().get('openai.key'):
|
||||
return encoder_estimate
|
||||
```
|
||||
|
||||
Example code after:
|
||||
```
|
||||
if model is None:
|
||||
get_logger().warning("Model is None, cannot determine model type accurately")
|
||||
return encoder_estimate
|
||||
|
||||
if not isinstance(model, str):
|
||||
get_logger().warning(f"Model is not a string type: {type(model)}")
|
||||
return encoder_estimate
|
||||
|
||||
model_is_from_o_series = re.match(r"^o[1-9](-mini|-preview)?$", model)
|
||||
openai_key_exists = get_settings().get('openai.key') is not None
|
||||
|
||||
if (('gpt' in model.lower() or model_is_from_o_series) and openai_key_exists):
|
||||
return encoder_estimate
|
||||
```
|
||||
|
||||
<details><summary>Examples for relevant past discussions:</summary>
|
||||
|
||||
- https://github.com/qodo-ai/pr-agent/pull/1644#discussion_r2032621065
|
||||
- https://github.com/qodo-ai/pr-agent/pull/1529#discussion_r1958694146
|
||||
- https://github.com/qodo-ai/pr-agent/pull/1391#discussion_r1879875496
|
||||
</details>
|
||||
|
||||
|
||||
___
|
||||
|
||||
<b>Pattern 5: Avoid redundant code initialization and reuse existing objects or instances when possible, especially for resource-intensive operations.</b>
|
||||
|
||||
Example code before:
|
||||
```
|
||||
if tickets:
|
||||
provider = GithubProvider()
|
||||
|
||||
for ticket in tickets:
|
||||
# Extract sub-issues
|
||||
sub_issues_content = []
|
||||
try:
|
||||
sub_issues = provider.fetch_sub_issues(ticket)
|
||||
```
|
||||
|
||||
Example code after:
|
||||
```
|
||||
if tickets:
|
||||
for ticket in tickets:
|
||||
# Extract sub-issues
|
||||
sub_issues_content = []
|
||||
try:
|
||||
sub_issues = git_provider.fetch_sub_issues(ticket)
|
||||
```
|
||||
|
||||
<details><summary>Examples for relevant past discussions:</summary>
|
||||
|
||||
- https://github.com/qodo-ai/pr-agent/pull/1529#discussion_r1964085987
|
||||
- https://github.com/qodo-ai/pr-agent/pull/1529#discussion_r1964088304
|
||||
</details>
|
||||
|
||||
|
||||
___
|
||||
|
||||
<b>Pattern 6: Use descriptive variable names and add explanatory comments for complex logic or non-obvious code to improve maintainability and readability.</b>
|
||||
|
||||
Example code before:
|
||||
```
|
||||
issues = value
|
||||
for i, issue in enumerate(issues):
|
||||
try:
|
||||
if not issue or not isinstance(issue, dict):
|
||||
continue
|
||||
```
|
||||
|
||||
Example code after:
|
||||
```
|
||||
focus_areas = value
|
||||
for i, focus_area in enumerate(focus_areas):
|
||||
try:
|
||||
# Skip empty issues or non-dictionary items to ensure valid data structure
|
||||
if not focus_area or not isinstance(focus_area, dict):
|
||||
continue
|
||||
```
|
||||
|
||||
<details><summary>Examples for relevant past discussions:</summary>
|
||||
|
||||
- https://github.com/qodo-ai/pr-agent/pull/1262#discussion_r1782097201
|
||||
- https://github.com/qodo-ai/pr-agent/pull/1262#discussion_r1782097204
|
||||
- https://github.com/qodo-ai/pr-agent/pull/1583#discussion_r1971790979
|
||||
</details>
|
||||
|
||||
|
||||
___
|
@ -1,10 +1,10 @@
|
||||
FROM public.ecr.aws/lambda/python:3.10
|
||||
FROM public.ecr.aws/lambda/python:3.12
|
||||
|
||||
RUN yum update -y && \
|
||||
yum install -y gcc python3-devel git && \
|
||||
yum clean all
|
||||
RUN dnf update -y && \
|
||||
dnf install -y gcc python3-devel git && \
|
||||
dnf clean all
|
||||
|
||||
ADD pyproject.toml requirements.txt .
|
||||
ADD pyproject.toml requirements.txt ./
|
||||
RUN pip install --no-cache-dir . && rm pyproject.toml
|
||||
RUN pip install --no-cache-dir mangum==0.17.0
|
||||
COPY pr_agent/ ${LAMBDA_TASK_ROOT}/pr_agent/
|
||||
|
41
docs/docs/chrome-extension/options.md
Normal file
41
docs/docs/chrome-extension/options.md
Normal file
@ -0,0 +1,41 @@
|
||||
## Options and Configurations
|
||||
|
||||
### Accessing the Options Page
|
||||
|
||||
To access the options page for the Qodo Merge Chrome extension:
|
||||
|
||||
1. Find the extension icon in your Chrome toolbar (usually in the top-right corner of your browser)
|
||||
2. Right-click on the extension icon
|
||||
3. Select "Options" from the context menu that appears
|
||||
|
||||
Alternatively, you can access the options page directly using this URL:
|
||||
|
||||
[chrome-extension://ephlnjeghhogofkifjloamocljapahnl/options.html](chrome-extension://ephlnjeghhogofkifjloamocljapahnl/options.html)
|
||||
|
||||
<img src="https://codium.ai/images/pr_agent/chrome_ext_options.png" width="256">
|
||||
|
||||
|
||||
### Configuration Options
|
||||
|
||||
<img src="https://codium.ai/images/pr_agent/chrome_ext_settings_page.png" width="512">
|
||||
|
||||
|
||||
#### API Base Host
|
||||
|
||||
For single-tenant customers, you can configure the extension to communicate directly with your company's Qodo Merge server instance.
|
||||
|
||||
To set this up:
|
||||
|
||||
- Enter your organization's Qodo Merge API endpoint in the "API Base Host" field
|
||||
- This endpoint should be provided by your Qodo DevOps Team
|
||||
|
||||
*Note: The extension does not send your code to the server, but only triggers your previously installed Qodo Merge application.*
|
||||
|
||||
#### Interface Options
|
||||
|
||||
You can customize the extension's interface by:
|
||||
|
||||
- Toggling the "Show Qodo Merge Toolbar" option
|
||||
- When disabled, the toolbar will not appear in your Github comment bar
|
||||
|
||||
Remember to click "Save Settings" after making any changes.
|
39
docs/docs/core-abilities/code_validation.md
Normal file
39
docs/docs/core-abilities/code_validation.md
Normal file
@ -0,0 +1,39 @@
|
||||
## Introduction
|
||||
The Git environment usually represents the final stage before code enters production. Hence, Detecting bugs and issues during the review process is critical.
|
||||
|
||||
The [`improve`](https://qodo-merge-docs.qodo.ai/tools/improve/) tool provides actionable code suggestions for your pull requests, aiming to help detect and fix bugs and problems.
|
||||
By default, suggestions appear as a comment in a table format:
|
||||
|
||||
{width=512}
|
||||
|
||||
{width=512}
|
||||
|
||||
## Validation of Code Suggestions
|
||||
|
||||
Each suggestion in the table can be "applied" by clicking on the `Apply this suggestion` checkbox, converting it to a committable Git code change that can be committed directly to the PR.
|
||||
This approach allows to fix issues without returning to your IDE for manual edits — significantly faster and more convenient.
|
||||
|
||||
However, committing a suggestion in a Git environment carries more risk than in a local IDE, as you don't have the opportunity to fully run and test the code before committing.
|
||||
|
||||
To balance convenience with safety, Qodo Merge implements a dual validation system for each generated code suggestion:
|
||||
|
||||
1) **Localization** - Qodo Merge confirms that the suggestion's line numbers and surrounding code, as predicted by the model, actually match the repo code. This means that the model correctly identified the context and location of the code to be changed.
|
||||
|
||||
2) **"Compilation"** - Using static code analysis, Qodo Merge verifies that after applying the suggestion, the modified file will still be valid, meaning tree-sitter syntax processing will not throw an error. This process is relevant for multiple programming languages, see [here](https://pypi.org/project/tree-sitter-languages/) for the full list of supported languages.
|
||||
|
||||
When a suggestion fails to meet these validation criteria, it may still provide valuable feedback, but isn't suitable for direct application to the PR.
|
||||
In such cases, Qodo Merge will omit the 'apply' checkbox and instead display:
|
||||
|
||||
`[To ensure code accuracy, apply this suggestion manually]`
|
||||
|
||||
All suggestions that pass these validations undergo a final stage of **self-reflection**, where the AI model evaluates, scores, and re-ranks its own suggestions, eliminating any that are irrelevant or incorrect.
|
||||
Read more about this process in the [self-reflection](https://qodo-merge-docs.qodo.ai/core-abilities/self_reflection/) page.
|
||||
|
||||
## Conclusion
|
||||
|
||||
The validation methods described above enhance the reliability of code suggestions and help PR authors determine which suggestions are safer to apply in the Git environment.
|
||||
Of course, additional factors should be considered, such as suggestion complexity and potential code impact.
|
||||
|
||||
Human judgment remains essential. After clicking 'apply', Qodo Merge still presents the 'before' and 'after' code snippets for review, allowing you to assess the changes before finalizing the commit.
|
||||
|
||||
{width=512}
|
@ -1,57 +0,0 @@
|
||||
# Company Codebase 💎
|
||||
`Supported Git Platforms: GitHub`
|
||||
|
||||
|
||||
## Overview
|
||||
|
||||
### What is Company Codebase?
|
||||
|
||||
An organized, semantic database that aggregates all your company’s source code into one searchable repository, enabling efficient code discovery and analysis.
|
||||
|
||||
### How does Company Codebase work?
|
||||
|
||||
By indexing your company's code and using Retrieval-Augmented Generation (RAG), it retrieves contextual code segments on demand, improving pull request (PR) insights and accelerating review accuracy.
|
||||
|
||||
|
||||
## Getting started
|
||||
|
||||
!!! info "Prerequisites"
|
||||
- Database setup and codebase indexing must be completed before proceeding. [Contact support](https://www.qodo.ai/contact/) for assistance.
|
||||
|
||||
### Configuration options
|
||||
|
||||
In order to enable the RAG feature, add the following lines to your configuration file:
|
||||
``` toml
|
||||
[rag_arguments]
|
||||
enable_rag=true
|
||||
```
|
||||
|
||||
!!! example "RAG Arguments Options"
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<td><b>enable_rag</b></td>
|
||||
<td>If set to true, codebase enrichment using RAG will be enabled. Default is false.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>rag_repo_list</b></td>
|
||||
<td>A list of repositories that will be used by the semantic search for RAG. Use `['all']` to consider the entire codebase or a select list or repositories, for example: ['my-org/my-repo', ...]. Default: the repository from which the PR was opened.</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
|
||||
References from the codebase will be shown in a collapsible bookmark, allowing you to easily access relevant code snippets:
|
||||
|
||||
{width=640}
|
||||
|
||||
## Limitations
|
||||
|
||||
### Querying the codebase presents significant challenges:
|
||||
- **Search Method**: RAG uses natural language queries to find semantically relevant code sections
|
||||
- **Result Quality**: No guarantee that RAG results will be useful for all queries
|
||||
- **Scope Recommendation**: To reduce noise, avoid using the whole codebase; focus on PR repository instead
|
||||
|
||||
### This feature has several requirements and restrictions:
|
||||
- **Codebase**: Must be properly indexed for search functionality
|
||||
- **Security**: Requires secure and private indexed codebase implementation
|
||||
- **Deployment**: Only available for Qodo Merge Enterprise plan using single tenant or on-premises setup
|
@ -176,6 +176,50 @@ jira_base_url = "YOUR_JIRA_BASE_URL" # e.g. https://jira.example.com
|
||||
jira_api_token = "YOUR_API_TOKEN"
|
||||
```
|
||||
|
||||
#### Validating PAT token via Python script
|
||||
|
||||
If you are facing issues retrieving tickets in Qodo Merge with PAT token, you can validate the flow using a Python script.
|
||||
This following steps will help you check if the token is working correctly, and if you can access the Jira ticket details:
|
||||
|
||||
1. run `pip install jira==3.8.0`
|
||||
|
||||
2. run the following Python script (after replacing the placeholders with your actual values):
|
||||
|
||||
??? example "Script to validate PAT token"
|
||||
|
||||
```python
|
||||
from jira import JIRA
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
# Jira server URL
|
||||
server = "https://..."
|
||||
# Jira PAT token
|
||||
token_auth = "..."
|
||||
# Jira ticket code (e.g. "PROJ-123")
|
||||
ticket_id = "..."
|
||||
|
||||
print("Initializing JiraServerTicketProvider with JIRA server")
|
||||
# Initialize JIRA client
|
||||
jira = JIRA(
|
||||
server=server,
|
||||
token_auth=token_auth,
|
||||
timeout=30
|
||||
)
|
||||
if jira:
|
||||
print(f"JIRA client initialized successfully")
|
||||
else:
|
||||
print("Error initializing JIRA client")
|
||||
|
||||
# Fetch ticket details
|
||||
ticket = jira.issue(ticket_id)
|
||||
print(f"Ticket title: {ticket.fields.summary}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error fetching JIRA ticket details: {e}")
|
||||
```
|
||||
|
||||
### How to link a PR to a Jira ticket
|
||||
|
||||
To integrate with Jira, you can link your PR to a ticket using either of these methods:
|
||||
|
@ -1,17 +1,18 @@
|
||||
# Core Abilities
|
||||
Qodo Merge utilizes a variety of core abilities to provide a comprehensive and efficient code review experience. These abilities include:
|
||||
|
||||
- [Fetching ticket context](https://qodo-merge-docs.qodo.ai/core-abilities/fetching_ticket_context/)
|
||||
- [Auto best practices](https://qodo-merge-docs.qodo.ai/core-abilities/auto_best_practices/)
|
||||
- [Local and global metadata](https://qodo-merge-docs.qodo.ai/core-abilities/metadata/)
|
||||
- [Pull request benchmark](https://qodo-merge-docs.qodo.ai/finetuning_benchmark/)
|
||||
- [Code validation](https://qodo-merge-docs.qodo.ai/core-abilities/code_validation/)
|
||||
- [Compression strategy](https://qodo-merge-docs.qodo.ai/core-abilities/compression_strategy/)
|
||||
- [Dynamic context](https://qodo-merge-docs.qodo.ai/core-abilities/dynamic_context/)
|
||||
- [Self-reflection](https://qodo-merge-docs.qodo.ai/core-abilities/self_reflection/)
|
||||
- [Fetching ticket context](https://qodo-merge-docs.qodo.ai/core-abilities/fetching_ticket_context/)
|
||||
- [Impact evaluation](https://qodo-merge-docs.qodo.ai/core-abilities/impact_evaluation/)
|
||||
- [Interactivity](https://qodo-merge-docs.qodo.ai/core-abilities/interactivity/)
|
||||
- [Compression strategy](https://qodo-merge-docs.qodo.ai/core-abilities/compression_strategy/)
|
||||
- [Company Codebase](https://qodo-merge-docs.qodo.ai/core-abilities/company_codebase/)
|
||||
- [Local and global metadata](https://qodo-merge-docs.qodo.ai/core-abilities/metadata/)
|
||||
- [RAG context enrichment](https://qodo-merge-docs.qodo.ai/core-abilities/rag_context_enrichment/)
|
||||
- [Self-reflection](https://qodo-merge-docs.qodo.ai/core-abilities/self_reflection/)
|
||||
- [Static code analysis](https://qodo-merge-docs.qodo.ai/core-abilities/static_code_analysis/)
|
||||
- [Code fine-tuning benchmark](https://qodo-merge-docs.qodo.ai/finetuning_benchmark/)
|
||||
|
||||
## Blogs
|
||||
|
||||
|
@ -1,2 +1,43 @@
|
||||
## Interactive invocation 💎
|
||||
TBD
|
||||
# Interactivity
|
||||
|
||||
`Supported Git Platforms: GitHub, GitLab`
|
||||
|
||||
## Overview
|
||||
|
||||
Qodo Merge transforms static code reviews into interactive experiences by enabling direct actions from pull request (PR) comments.
|
||||
Developers can immediately trigger actions and apply changes with simple checkbox clicks.
|
||||
|
||||
This focused workflow maintains context while dramatically reducing the time between PR creation and final merge.
|
||||
The approach eliminates manual steps, provides clear visual indicators, and creates immediate feedback loops all within the same interface.
|
||||
|
||||
## Key Interactive Features
|
||||
|
||||
### 1\. Interactive `/improve` Tool
|
||||
|
||||
The [`/improve`](https://qodo-merge-docs.qodo.ai/tools/improve/) command delivers a comprehensive interactive experience:
|
||||
|
||||
- _**Apply this suggestion**_: Clicking this checkbox instantly converts a suggestion into a committable code change. When committed to the PR, changes made to code that was flagged for improvement will be marked with a check mark, allowing developers to easily track and review implemented recommendations.
|
||||
|
||||
- _**More**_: Triggers additional suggestions generation while keeping each suggestion focused and relevant as the original set
|
||||
|
||||
- _**Update**_: Triggers a re-analysis of the code, providing updated suggestions based on the latest changes
|
||||
|
||||
- _**Author self-review**_: Interactive acknowledgment that developers have opened and reviewed collapsed suggestions
|
||||
|
||||
|
||||
### 2\. Interactive `/analyze` Tool
|
||||
|
||||
The [`/analyze`](https://qodo-merge-docs.qodo.ai/tools/analyze/) command provides component-level analysis with interactive options for each identified code component:
|
||||
|
||||
- Interactive checkboxes to generate tests, documentation, and code suggestions for specific components
|
||||
|
||||
- On-demand similar code search that activates when a checkbox is clicked
|
||||
|
||||
- Component-specific actions that trigger only for the selected elements, providing focused assistance
|
||||
|
||||
|
||||
### 3\. Interactive `/help` Tool
|
||||
|
||||
The [`/help`](https://qodo-merge-docs.qodo.ai/tools/help/) command not only lists available tools and their descriptions but also enables immediate tool invocation through interactive checkboxes.
|
||||
When a user checks a tool's checkbox, Qodo Merge instantly triggers that tool without requiring additional commands.
|
||||
This transforms the standard help menu into an interactive launch pad for all Qodo Merge capabilities, eliminating context switching by keeping developers within their PR workflow.
|
||||
|
78
docs/docs/core-abilities/rag_context_enrichment.md
Normal file
78
docs/docs/core-abilities/rag_context_enrichment.md
Normal file
@ -0,0 +1,78 @@
|
||||
# RAG Context Enrichment 💎
|
||||
|
||||
`Supported Git Platforms: GitHub`
|
||||
|
||||
!!! info "Prerequisites"
|
||||
- RAG is available only for Qodo enterprise plan users, with single tenant or on-premises setup.
|
||||
- Database setup and codebase indexing must be completed before proceeding. [Contact support](https://www.qodo.ai/contact/) for more information.
|
||||
|
||||
|
||||
## Overview
|
||||
|
||||
### What is RAG Context Enrichment?
|
||||
|
||||
A feature that enhances AI analysis by retrieving and referencing relevant code patterns from your project, enabling context-aware insights during code reviews.
|
||||
|
||||
### How does RAG Context Enrichment work?
|
||||
|
||||
Using Retrieval-Augmented Generation (RAG), it searches your configured repositories for contextually relevant code segments, enriching pull request (PR) insights and accelerating review accuracy.
|
||||
|
||||
|
||||
## Getting started
|
||||
|
||||
### Configuration options
|
||||
|
||||
In order to enable the RAG feature, add the following lines to your configuration file:
|
||||
``` toml
|
||||
[rag_arguments]
|
||||
enable_rag=true
|
||||
```
|
||||
|
||||
!!! example "RAG Arguments Options"
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<td><b>enable_rag</b></td>
|
||||
<td>If set to true, repository enrichment using RAG will be enabled. Default is false.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>rag_repo_list</b></td>
|
||||
<td>A list of repositories that will be used by the semantic search for RAG. Use `['all']` to consider the entire codebase or a select list of repositories, for example: ['my-org/my-repo', ...]. Default: the repository from which the PR was opened.</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
### Applications
|
||||
|
||||
#### 1\. The `/review` Tool
|
||||
|
||||
The [`/review`](https://qodo-merge-docs.qodo.ai/tools/review/) tool offers the _Focus area from RAG data_ which contains feedback based on the RAG references analysis.
|
||||
The complete list of references found relevant to the PR will be shown in the _References_ section, helping developers understand the broader context by exploring the provided references.
|
||||
|
||||
{width=640}
|
||||
|
||||
#### 2\. The `/implement` Tool
|
||||
|
||||
The [`/implement`](https://qodo-merge-docs.qodo.ai/tools/implement/) tool utilizes the RAG feature to provide comprehensive context of the repository codebase, allowing it to generate more refined code output.
|
||||
The _References_ section contains links to the content used to support the code generation.
|
||||
|
||||
{width=640}
|
||||
|
||||
#### 3\. The `/ask` Tool
|
||||
|
||||
The [`/ask`](https://qodo-merge-docs.qodo.ai/tools/ask/) tool can access broader repository context through the RAG feature when answering questions that go beyond the PR scope alone.
|
||||
The _References_ section displays the additional repository content consulted to formulate the answer.
|
||||
|
||||
{width=640}
|
||||
|
||||
|
||||
## Limitations
|
||||
|
||||
### Querying the codebase presents significant challenges
|
||||
- **Search Method**: RAG uses natural language queries to find semantically relevant code sections
|
||||
- **Result Quality**: No guarantee that RAG results will be useful for all queries
|
||||
- **Scope Recommendation**: To reduce noise, focus on the PR repository rather than searching across multiple repositories
|
||||
|
||||
### This feature has several requirements and restrictions
|
||||
- **Codebase**: Must be properly indexed for search functionality
|
||||
- **Security**: Requires secure and private indexed codebase implementation
|
||||
- **Deployment**: Only available for Qodo Merge Enterprise plan using single tenant or on-premises setup
|
@ -1,10 +1,10 @@
|
||||
# Qodo Merge Code Fine-tuning Benchmark
|
||||
# Qodo Merge Pull Request Benchmark
|
||||
|
||||
On coding tasks, the gap between open-source models and top closed-source models such as GPT-4o is significant.
|
||||
On coding tasks, the gap between open-source models and top closed-source models such as Claude and GPT is significant.
|
||||
<br>
|
||||
In practice, open-source models are unsuitable for most real-world code tasks, and require further fine-tuning to produce acceptable results.
|
||||
|
||||
_Qodo Merge fine-tuning benchmark_ aims to benchmark open-source models on their ability to be fine-tuned for a coding task.
|
||||
_Qodo Merge pull request benchmark_ aims to benchmark models on their ability to be fine-tuned for a coding task.
|
||||
Specifically, we chose to fine-tune open-source models on the task of analyzing a pull request, and providing useful feedback and code suggestions.
|
||||
|
||||
Here are the results:
|
||||
@ -49,7 +49,7 @@ Here are the results:
|
||||
- **The best small model** - For small 7B code-dedicated models, the gaps when fine-tuning are much larger. **CodeQWEN 1.5-7B** is by far the best model for fine-tuning.
|
||||
- **Base vs. instruct** - For the top model (deepseek), we saw small advantage when starting from the instruct version. However, we recommend testing both versions on each specific task, as the base model is generally considered more suitable for fine-tuning.
|
||||
|
||||
## The dataset
|
||||
## Dataset
|
||||
|
||||
### Training dataset
|
||||
|
||||
@ -91,3 +91,11 @@ why: |
|
||||
actionable suggestions, such as changing variable names and adding comments, which are less
|
||||
critical for immediate code improvement."
|
||||
```
|
||||
|
||||
## Comparing Top Closed-Source Models
|
||||
|
||||
Another application of the Pull Request Benchmark is comparing leading closed-source models to determine which performs better at analyzing pull request code.
|
||||
|
||||
The evaluation methodology resembles the approach used for evaluating fine-tuned models:
|
||||
- We ran each model across 200 diverse pull requests, asking them to generate code suggestions using Qodo Merge's `improve` tool
|
||||
- A third top model served as judge to determine which response better fulfilled the prompt and would likely be perceived as superior by human users
|
@ -9,6 +9,7 @@ Qodo Merge is a hosted version of PR-Agent, designed for companies and teams tha
|
||||
|
||||
- See the [Tools Guide](./tools/index.md) for a detailed description of the different tools.
|
||||
|
||||
- See the [Video Tutorials](https://www.youtube.com/playlist?list=PLRTpyDOSgbwFMA_VBeKMnPLaaZKwjGBFT) for practical demonstrations on how to use the tools.
|
||||
|
||||
## Docs Smart Search
|
||||
|
||||
@ -22,42 +23,51 @@ To search the documentation site using natural language:
|
||||
2) The bot will respond with an [answer](https://github.com/Codium-ai/pr-agent/pull/1241#issuecomment-2365259334) that includes relevant documentation links.
|
||||
|
||||
|
||||
## Qodo Merge Features
|
||||
## Features
|
||||
|
||||
Qodo Merge offers extensive pull request functionalities across various git providers:
|
||||
PR-Agent and Qodo Merge offers extensive pull request functionalities across various git providers:
|
||||
|
||||
| | | GitHub | Gitlab | Bitbucket | Azure DevOps |
|
||||
|-------|-----------------------------------------------------------------------------------------------------------------------|:------:|:------:|:---------:|:------------:|
|
||||
| TOOLS | Review | ✅ | ✅ | ✅ | ✅ |
|
||||
| | ⮑ Incremental | ✅ | | | |
|
||||
| | Ask | ✅ | ✅ | ✅ | ✅ |
|
||||
| | Describe | ✅ | ✅ | ✅ | ✅ |
|
||||
| | ⮑ [Inline file summary](https://qodo-merge-docs.qodo.ai/tools/describe/#inline-file-summary){:target="_blank"} 💎 | ✅ | ✅ | | ✅ |
|
||||
| | Improve | ✅ | ✅ | ✅ | ✅ |
|
||||
| | ⮑ Extended | ✅ | ✅ | ✅ | ✅ |
|
||||
| | [Auto-Approve](https://qodo-merge-docs.qodo.ai/tools/improve/#auto-approval) 💎 | ✅ | ✅ | ✅ | |
|
||||
| | [Custom Prompt](./tools/custom_prompt.md){:target="_blank"} 💎 | ✅ | ✅ | ✅ | ✅ |
|
||||
| | Reflect and Review | ✅ | ✅ | ✅ | ✅ |
|
||||
| | Update CHANGELOG.md | ✅ | ✅ | ✅ | ️ |
|
||||
| | Find Similar Issue | ✅ | | | ️ |
|
||||
| | [Add PR Documentation](./tools/documentation.md){:target="_blank"} 💎 | ✅ | ✅ | | ✅ |
|
||||
| | [Generate Custom Labels](./tools/describe.md#handle-custom-labels-from-the-repos-labels-page-💎){:target="_blank"} 💎 | ✅ | ✅ | | ✅ |
|
||||
| | [Analyze PR Components](./tools/analyze.md){:target="_blank"} 💎 | ✅ | ✅ | | ✅ |
|
||||
| | [Test](https://pr-agent-docs.codium.ai/tools/test/) 💎 | ✅ | ✅ | | |
|
||||
| | [Implement](https://pr-agent-docs.codium.ai/tools/implement/) 💎 | ✅ | ✅ | ✅ | |
|
||||
| | | | | | ️ |
|
||||
| USAGE | CLI | ✅ | ✅ | ✅ | ✅ |
|
||||
| | App / webhook | ✅ | ✅ | ✅ | ✅ |
|
||||
| | Actions | ✅ | | | ️ |
|
||||
| | | | | |
|
||||
| CORE | PR compression | ✅ | ✅ | ✅ | ✅ |
|
||||
| | Repo language prioritization | ✅ | ✅ | ✅ | ✅ |
|
||||
| | Adaptive and token-aware file patch fitting | ✅ | ✅ | ✅ | ✅ |
|
||||
| | Multiple models support | ✅ | ✅ | ✅ | ✅ |
|
||||
| | [Static code analysis](./core-abilities/static_code_analysis/){:target="_blank"} 💎 | ✅ | ✅ | | |
|
||||
| | [Multiple configuration options](./usage-guide/configuration_options.md){:target="_blank"} 💎 | ✅ | ✅ | ✅ | ✅ |
|
||||
| | | GitHub | GitLab | Bitbucket | Azure DevOps |
|
||||
|-------|---------------------------------------------------------------------------------------------------------|:--------------------:|:--------------------:|:---------:|:------------:|
|
||||
| TOOLS | [Review](https://qodo-merge-docs.qodo.ai/tools/review/) | ✅ | ✅ | ✅ | ✅ |
|
||||
| | [Describe](https://qodo-merge-docs.qodo.ai/tools/describe/) | ✅ | ✅ | ✅ | ✅ |
|
||||
| | [Improve](https://qodo-merge-docs.qodo.ai/tools/improve/) | ✅ | ✅ | ✅ | ✅ |
|
||||
| | [Ask](https://qodo-merge-docs.qodo.ai/tools/ask/) | ✅ | ✅ | ✅ | ✅ |
|
||||
| | ⮑ [Ask on code lines](https://qodo-merge-docs.qodo.ai/tools/ask/#ask-lines) | ✅ | ✅ | | |
|
||||
| | [Update CHANGELOG](https://qodo-merge-docs.qodo.ai/tools/update_changelog/) | ✅ | ✅ | ✅ | ✅ |
|
||||
| | [Help Docs](https://qodo-merge-docs.qodo.ai/tools/help_docs/?h=auto#auto-approval) | ✅ | ✅ | ✅ | |
|
||||
| | [Ticket Context](https://qodo-merge-docs.qodo.ai/core-abilities/fetching_ticket_context/) 💎 | ✅ | ✅ | ✅ | |
|
||||
| | [Utilizing Best Practices](https://qodo-merge-docs.qodo.ai/tools/improve/#best-practices) 💎 | ✅ | ✅ | ✅ | |
|
||||
| | [PR Chat](https://qodo-merge-docs.qodo.ai/chrome-extension/features/#pr-chat) 💎 | ✅ | | | |
|
||||
| | [Suggestion Tracking](https://qodo-merge-docs.qodo.ai/tools/improve/#suggestion-tracking) 💎 | ✅ | ✅ | | |
|
||||
| | [CI Feedback](https://qodo-merge-docs.qodo.ai/tools/ci_feedback/) 💎 | ✅ | | | |
|
||||
| | [PR Documentation](https://qodo-merge-docs.qodo.ai/tools/documentation/) 💎 | ✅ | ✅ | | |
|
||||
| | [Custom Labels](https://qodo-merge-docs.qodo.ai/tools/custom_labels/) 💎 | ✅ | ✅ | | |
|
||||
| | [Analyze](https://qodo-merge-docs.qodo.ai/tools/analyze/) 💎 | ✅ | ✅ | | |
|
||||
| | [Similar Code](https://qodo-merge-docs.qodo.ai/tools/similar_code/) 💎 | ✅ | | | |
|
||||
| | [Custom Prompt](https://qodo-merge-docs.qodo.ai/tools/custom_prompt/) 💎 | ✅ | ✅ | ✅ | |
|
||||
| | [Test](https://qodo-merge-docs.qodo.ai/tools/test/) 💎 | ✅ | ✅ | | |
|
||||
| | [Implement](https://qodo-merge-docs.qodo.ai/tools/implement/) 💎 | ✅ | ✅ | ✅ | |
|
||||
| | [Auto-Approve](https://qodo-merge-docs.qodo.ai/tools/improve/?h=auto#auto-approval) 💎 | ✅ | ✅ | ✅ | |
|
||||
| | | | | | |
|
||||
| USAGE | [CLI](https://qodo-merge-docs.qodo.ai/usage-guide/automations_and_usage/#local-repo-cli) | ✅ | ✅ | ✅ | ✅ |
|
||||
| | [App / webhook](https://qodo-merge-docs.qodo.ai/usage-guide/automations_and_usage/#github-app) | ✅ | ✅ | ✅ | ✅ |
|
||||
| | [Tagging bot](https://github.com/Codium-ai/pr-agent#try-it-now) | ✅ | | | |
|
||||
| | [Actions](https://qodo-merge-docs.qodo.ai/installation/github/#run-as-a-github-action) | ✅ |✅| ✅ |✅|
|
||||
| | | | | | |
|
||||
| CORE | [PR compression](https://qodo-merge-docs.qodo.ai/core-abilities/compression_strategy/) | ✅ | ✅ | ✅ | ✅ |
|
||||
| | Adaptive and token-aware file patch fitting | ✅ | ✅ | ✅ | ✅ |
|
||||
| | [Multiple models support](https://qodo-merge-docs.qodo.ai/usage-guide/changing_a_model/) | ✅ | ✅ | ✅ | ✅ |
|
||||
| | [Local and global metadata](https://qodo-merge-docs.qodo.ai/core-abilities/metadata/) | ✅ | ✅ | ✅ | ✅ |
|
||||
| | [Dynamic context](https://qodo-merge-docs.qodo.ai/core-abilities/dynamic_context/) | ✅ | ✅ | ✅ | ✅ |
|
||||
| | [Self reflection](https://qodo-merge-docs.qodo.ai/core-abilities/self_reflection/) | ✅ | ✅ | ✅ | ✅ |
|
||||
| | [Static code analysis](https://qodo-merge-docs.qodo.ai/core-abilities/static_code_analysis/) 💎 | ✅ | ✅ | | |
|
||||
| | [Global and wiki configurations](https://qodo-merge-docs.qodo.ai/usage-guide/configuration_options/) 💎 | ✅ | ✅ | ✅ | |
|
||||
| | [PR interactive actions](https://www.qodo.ai/images/pr_agent/pr-actions.mp4) 💎 | ✅ | ✅ | | |
|
||||
| | [Impact Evaluation](https://qodo-merge-docs.qodo.ai/core-abilities/impact_evaluation/) 💎 | ✅ | ✅ | | |
|
||||
|
||||
💎 marks a feature available only in [Qodo Merge](https://www.codium.ai/pricing/){:target="_blank"}, and not in the open-source version.
|
||||
!!! note "💎 means Qodo Merge only"
|
||||
All along the documentation, 💎 marks a feature available only in [Qodo Merge](https://www.codium.ai/pricing/){:target="_blank"}, and not in the open-source version.
|
||||
|
||||
|
||||
## Example Results
|
||||
|
@ -1,33 +1,33 @@
|
||||
## Run as a Bitbucket Pipeline
|
||||
|
||||
|
||||
You can use the Bitbucket Pipeline system to run PR-Agent on every pull request open or update.
|
||||
|
||||
1. Add the following file in your repository bitbucket-pipelines.yml
|
||||
|
||||
```yaml
|
||||
pipelines:
|
||||
pull-requests:
|
||||
'**':
|
||||
- step:
|
||||
name: PR Agent Review
|
||||
image: python:3.12
|
||||
services:
|
||||
- docker
|
||||
script:
|
||||
- docker run -e CONFIG.GIT_PROVIDER=bitbucket -e OPENAI.KEY=$OPENAI_API_KEY -e BITBUCKET.BEARER_TOKEN=$BITBUCKET_BEARER_TOKEN codiumai/pr-agent:latest --pr_url=https://bitbucket.org/$BITBUCKET_WORKSPACE/$BITBUCKET_REPO_SLUG/pull-requests/$BITBUCKET_PR_ID review
|
||||
pull-requests:
|
||||
"**":
|
||||
- step:
|
||||
name: PR Agent Review
|
||||
image: python:3.12
|
||||
services:
|
||||
- docker
|
||||
script:
|
||||
- docker run -e CONFIG.GIT_PROVIDER=bitbucket -e OPENAI.KEY=$OPENAI_API_KEY -e BITBUCKET.BEARER_TOKEN=$BITBUCKET_BEARER_TOKEN codiumai/pr-agent:latest --pr_url=https://bitbucket.org/$BITBUCKET_WORKSPACE/$BITBUCKET_REPO_SLUG/pull-requests/$BITBUCKET_PR_ID review
|
||||
```
|
||||
|
||||
2. Add the following secure variables to your repository under Repository settings > Pipelines > Repository variables.
|
||||
OPENAI_API_KEY: `<your key>`
|
||||
BITBUCKET_BEARER_TOKEN: `<your token>`
|
||||
OPENAI_API_KEY: `<your key>`
|
||||
BITBUCKET.AUTH_TYPE: `basic` or `bearer` (default is `bearer`)
|
||||
BITBUCKET.BEARER_TOKEN: `<your token>` (required when auth_type is bearer)
|
||||
BITBUCKET.BASIC_TOKEN: `<your token>` (required when auth_type is basic)
|
||||
|
||||
You can get a Bitbucket token for your repository by following Repository Settings -> Security -> Access Tokens.
|
||||
For basic auth, you can generate a base64 encoded token from your username:password combination.
|
||||
|
||||
Note that comments on a PR are not supported in Bitbucket Pipeline.
|
||||
|
||||
|
||||
|
||||
## Bitbucket Server and Data Center
|
||||
|
||||
Login into your on-prem instance of Bitbucket with your service account username and password.
|
||||
@ -48,6 +48,7 @@ git_provider="bitbucket_server"
|
||||
```
|
||||
|
||||
and pass the Pull request URL:
|
||||
|
||||
```shell
|
||||
python cli.py --pr_url https://git.onpreminstanceofbitbucket.com/projects/PROJECT/repos/REPO/pull-requests/1 review
|
||||
```
|
||||
@ -55,6 +56,7 @@ python cli.py --pr_url https://git.onpreminstanceofbitbucket.com/projects/PROJEC
|
||||
### Run it as service
|
||||
|
||||
To run PR-Agent as webhook, build the docker image:
|
||||
|
||||
```
|
||||
docker build . -t codiumai/pr-agent:bitbucket_server_webhook --target bitbucket_server_webhook -f docker/Dockerfile
|
||||
docker push codiumai/pr-agent:bitbucket_server_webhook # Push to your Docker repository
|
||||
|
@ -6,7 +6,7 @@ To run PR-Agent locally, you first need to acquire two keys:
|
||||
|
||||
## Using Docker image
|
||||
|
||||
A list of the relevant tools can be found in the [tools guide](../tools/ask.md).
|
||||
A list of the relevant tools can be found in the [tools guide](../tools/).
|
||||
|
||||
To invoke a tool (for example `review`), you can run PR-Agent directly from the Docker image. Here's how:
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
Qodo Merge is a versatile application compatible with GitHub, GitLab, and BitBucket, hosted by QodoAI.
|
||||
See [here](https://qodo-merge-docs.qodo.ai/overview/pr_agent_pro/) for more details about the benefits of using Qodo Merge.
|
||||
|
||||
A complimentary two-week trial is provided to all new users. Following the trial period, user licenses (seats) are required for continued access.
|
||||
A complimentary two-week trial is provided to all new users (with three additional grace usages). Following the trial period, user licenses (seats) are required for continued access.
|
||||
To purchase user licenses, please visit our [pricing page](https://www.qodo.ai/pricing/).
|
||||
Once subscribed, users can seamlessly deploy the application across any of their code repositories.
|
||||
|
||||
|
@ -53,6 +53,6 @@ Results obtained with the prompt above:
|
||||
|
||||
- `prompt`: the prompt for the tool. It should be a multi-line string.
|
||||
|
||||
- `num_code_suggestions_per_chunk`: number of code suggestions provided by the 'custom_prompt' tool, per chunk. Default is 4.
|
||||
- `num_code_suggestions_per_chunk`: number of code suggestions provided by the 'custom_prompt' tool, per chunk. Default is 3.
|
||||
|
||||
- `enable_help_text`: if set to true, the tool will display a help text in the comment. Default is true.
|
||||
|
@ -379,7 +379,7 @@ Qodo Merge uses a dynamic strategy to generate code suggestions based on the siz
|
||||
- Each chunk contains up to `pr_code_suggestions.max_context_tokens` tokens (default: 14,000).
|
||||
|
||||
#### 2. Generating suggestions
|
||||
- For each chunk, Qodo Merge generates up to `pr_code_suggestions.num_code_suggestions_per_chunk` suggestions (default: 4).
|
||||
- For each chunk, Qodo Merge generates up to `pr_code_suggestions.num_code_suggestions_per_chunk` suggestions (default: 3).
|
||||
|
||||
This approach has two main benefits:
|
||||
|
||||
@ -434,6 +434,9 @@ Note: Chunking is primarily relevant for large PRs. For most PRs (up to 500 line
|
||||
<td><b>enable_chat_text</b></td>
|
||||
<td>If set to true, the tool will display a reference to the PR chat in the comment. Default is true.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>publish_output_no_suggestions</b></td>
|
||||
<td>If set to true, the tool will publish a comment even if no suggestions were found. Default is true.</td>
|
||||
<tr>
|
||||
<td><b>wiki_page_accepted_suggestions</b></td>
|
||||
<td>If set to true, the tool will automatically track accepted suggestions in a dedicated wiki page called `.pr_agent_accepted_suggestions`. Default is true.</td>
|
||||
@ -453,7 +456,7 @@ Note: Chunking is primarily relevant for large PRs. For most PRs (up to 500 line
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>num_code_suggestions_per_chunk</b></td>
|
||||
<td>Number of code suggestions provided by the 'improve' tool, per chunk. Default is 4.</td>
|
||||
<td>Number of code suggestions provided by the 'improve' tool, per chunk. Default is 3.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>max_number_of_calls</b></td>
|
||||
|
@ -15,6 +15,7 @@ It can be invoked manually by commenting on any PR:
|
||||
|
||||
Under the section `pr_update_changelog`, the [configuration file](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L50) contains options to customize the 'update changelog' tool:
|
||||
|
||||
- `push_changelog_changes`: whether to push the changes to CHANGELOG.md, or just print them. Default is false (print only).
|
||||
- `extra_instructions`: Optional extra instructions to the tool. For example: "focus on the changes in the file X. Ignore change in ...
|
||||
- `add_pr_link`: whether the model should try to add a link to the PR in the changelog. Default is true.
|
||||
- `push_changelog_changes`: whether to push the changes to CHANGELOG.md, or just publish them as a comment. Default is false (publish as comment).
|
||||
- `extra_instructions`: Optional extra instructions to the tool. For example: "Use the following structure: ..."
|
||||
- `add_pr_link`: whether the model should try to add a link to the PR in the changelog. Default is true.
|
||||
- `skip_ci_on_push`: whether the commit message (when `push_changelog_changes` is true) will include the term "[skip ci]", preventing CI tests to be triggered on the changelog commit. Default is true.
|
@ -120,24 +120,16 @@ Increasing this number provides more context to the model, but will also increas
|
||||
|
||||
If the PR is too large (see [PR Compression strategy](https://github.com/Codium-ai/pr-agent/blob/main/PR_COMPRESSION.md)), Qodo Merge may automatically set this number to 0, and will use the original git patch.
|
||||
|
||||
## Log Level
|
||||
|
||||
## Editing the prompts
|
||||
Qodo Merge allows you to control the verbosity of logging by using the `log_level` configuration parameter. This is particularly useful for troubleshooting and debugging issues with your PR workflows.
|
||||
|
||||
The prompts for the various Qodo Merge tools are defined in the `pr_agent/settings` folder.
|
||||
In practice, the prompts are loaded and stored as a standard setting object.
|
||||
Hence, editing them is similar to editing any other configuration value - just place the relevant key in `.pr_agent.toml`file, and override the default value.
|
||||
|
||||
For example, if you want to edit the prompts of the [describe](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/pr_description_prompts.toml) tool, you can add the following to your `.pr_agent.toml` file:
|
||||
```
|
||||
[pr_description_prompt]
|
||||
system="""
|
||||
...
|
||||
"""
|
||||
user="""
|
||||
...
|
||||
"""
|
||||
[config]
|
||||
log_level = "DEBUG" # Options: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
|
||||
```
|
||||
Note that the new prompt will need to generate an output compatible with the relevant [post-process function](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/tools/pr_description.py#L137).
|
||||
|
||||
The default log level is "DEBUG", which provides detailed output of all operations. If you prefer less verbose logs, you can set higher log levels like "INFO" or "WARNING".
|
||||
|
||||
## Integrating with Logging Observability Platforms
|
||||
|
||||
|
@ -206,9 +206,9 @@ push_commands = [
|
||||
Note that to use the 'handle_push_trigger' feature, you need to give the gitlab webhook also the "Push events" scope.
|
||||
|
||||
### BitBucket App
|
||||
Similar to GitHub app, when running Qodo Merge from BitBucket App, the default [configuration file](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml) from a pre-built docker will be initially loaded.
|
||||
Similar to GitHub app, when running Qodo Merge from BitBucket App, the default [configuration file](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml) will be initially loaded.
|
||||
|
||||
By uploading a local `.pr_agent.toml` file to the root of the repo's main branch, you can edit and customize any configuration parameter. Note that you need to upload `.pr_agent.toml` prior to creating a PR, in order for the configuration to take effect.
|
||||
By uploading a local `.pr_agent.toml` file to the root of the repo's default branch, you can edit and customize any configuration parameter. Note that you need to upload `.pr_agent.toml` prior to creating a PR, in order for the configuration to take effect.
|
||||
|
||||
For example, if your local `.pr_agent.toml` file contains:
|
||||
```toml
|
||||
|
@ -1,20 +1,26 @@
|
||||
## Changing a model in PR-Agent
|
||||
|
||||
See [here](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/algo/__init__.py) for a list of available models.
|
||||
To use a different model than the default (GPT-4), you need to edit in the [configuration file](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L2) the fields:
|
||||
```
|
||||
To use a different model than the default (o3-mini), you need to edit in the [configuration file](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L2) the fields:
|
||||
|
||||
```toml
|
||||
[config]
|
||||
model = "..."
|
||||
fallback_models = ["..."]
|
||||
```
|
||||
|
||||
For models and environments not from OpenAI, you might need to provide additional keys and other parameters.
|
||||
You can give parameters via a configuration file (see below for instructions), or from environment variables. See [litellm documentation](https://litellm.vercel.app/docs/proxy/quick_start#supported-llms) for the environment variables relevant per model.
|
||||
You can give parameters via a configuration file, or from environment variables.
|
||||
|
||||
!!! note "Model-specific environment variables"
|
||||
See [litellm documentation](https://litellm.vercel.app/docs/proxy/quick_start#supported-llms) for the environment variables needed per model, as they may vary and change over time. Our documentation per-model may not always be up-to-date with the latest changes.
|
||||
Failing to set the needed keys of a specific model will usually result in litellm not identifying the model type, and failing to utilize it.
|
||||
|
||||
### Azure
|
||||
|
||||
To use Azure, set in your `.secrets.toml` (working from CLI), or in the GitHub `Settings > Secrets and variables` (working from GitHub App or GitHub Action):
|
||||
```
|
||||
|
||||
```toml
|
||||
[openai]
|
||||
key = "" # your azure api key
|
||||
api_type = "azure"
|
||||
@ -24,26 +30,40 @@ deployment_id = "" # The deployment name you chose when you deployed the engine
|
||||
```
|
||||
|
||||
and set in your configuration file:
|
||||
```
|
||||
|
||||
```toml
|
||||
[config]
|
||||
model="" # the OpenAI model you've deployed on Azure (e.g. gpt-4o)
|
||||
fallback_models=["..."]
|
||||
```
|
||||
|
||||
Passing custom headers to the underlying LLM Model API can be done by setting extra_headers parameter to litellm.
|
||||
To use Azure AD (Entra id) based authentication set in your `.secrets.toml` (working from CLI), or in the GitHub `Settings > Secrets and variables` (working from GitHub App or GitHub Action):
|
||||
|
||||
```toml
|
||||
[azure_ad]
|
||||
client_id = "" # Your Azure AD application client ID
|
||||
client_secret = "" # Your Azure AD application client secret
|
||||
tenant_id = "" # Your Azure AD tenant ID
|
||||
api_base = "" # Your Azure OpenAI service base URL (e.g., https://openai.xyz.com/)
|
||||
```
|
||||
|
||||
|
||||
Passing custom headers to the underlying LLM Model API can be done by setting extra_headers parameter to litellm.
|
||||
|
||||
```toml
|
||||
[litellm]
|
||||
extra_headers='{"projectId": "<authorized projectId >", ...}') #The value of this setting should be a JSON string representing the desired headers, a ValueError is thrown otherwise.
|
||||
```
|
||||
This enables users to pass authorization tokens or API keys, when routing requests through an API management gateway.
|
||||
|
||||
This enables users to pass authorization tokens or API keys, when routing requests through an API management gateway.
|
||||
|
||||
### Ollama
|
||||
|
||||
You can run models locally through either [VLLM](https://docs.litellm.ai/docs/providers/vllm) or [Ollama](https://docs.litellm.ai/docs/providers/ollama)
|
||||
|
||||
E.g. to use a new model locally via Ollama, set in `.secrets.toml` or in a configuration file:
|
||||
```
|
||||
|
||||
```toml
|
||||
[config]
|
||||
model = "ollama/qwen2.5-coder:32b"
|
||||
fallback_models=["ollama/qwen2.5-coder:32b"]
|
||||
@ -60,7 +80,7 @@ Please note that the `custom_model_max_tokens` setting should be configured in a
|
||||
|
||||
!!! note "Local models vs commercial models"
|
||||
Qodo Merge is compatible with almost any AI model, but analyzing complex code repositories and pull requests requires a model specifically optimized for code analysis.
|
||||
|
||||
|
||||
Commercial models such as GPT-4, Claude Sonnet, and Gemini have demonstrated robust capabilities in generating structured output for code analysis tasks with large input. In contrast, most open-source models currently available (as of January 2025) face challenges with these complex tasks.
|
||||
|
||||
Based on our testing, local open-source models are suitable for experimentation and learning purposes (mainly for the `ask` command), but they are not suitable for production-level code analysis tasks.
|
||||
@ -70,7 +90,8 @@ Please note that the `custom_model_max_tokens` setting should be configured in a
|
||||
### Hugging Face
|
||||
|
||||
To use a new model with Hugging Face Inference Endpoints, for example, set:
|
||||
```
|
||||
|
||||
```toml
|
||||
[config] # in configuration.toml
|
||||
model = "huggingface/meta-llama/Llama-2-7b-chat-hf"
|
||||
fallback_models=["huggingface/meta-llama/Llama-2-7b-chat-hf"]
|
||||
@ -80,40 +101,59 @@ custom_model_max_tokens=... # set the maximal input tokens for the model
|
||||
key = ... # your Hugging Face api key
|
||||
api_base = ... # the base url for your Hugging Face inference endpoint
|
||||
```
|
||||
|
||||
(you can obtain a Llama2 key from [here](https://replicate.com/replicate/llama-2-70b-chat/api))
|
||||
|
||||
### Replicate
|
||||
|
||||
To use Llama2 model with Replicate, for example, set:
|
||||
```
|
||||
|
||||
```toml
|
||||
[config] # in configuration.toml
|
||||
model = "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
|
||||
fallback_models=["replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"]
|
||||
[replicate] # in .secrets.toml
|
||||
key = ...
|
||||
```
|
||||
(you can obtain a Llama2 key from [here](https://replicate.com/replicate/llama-2-70b-chat/api))
|
||||
|
||||
(you can obtain a Llama2 key from [here](https://replicate.com/replicate/llama-2-70b-chat/api))
|
||||
|
||||
Also, review the [AiHandler](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/algo/ai_handler.py) file for instructions on how to set keys for other models.
|
||||
|
||||
### Groq
|
||||
|
||||
To use Llama3 model with Groq, for example, set:
|
||||
```
|
||||
|
||||
```toml
|
||||
[config] # in configuration.toml
|
||||
model = "llama3-70b-8192"
|
||||
fallback_models = ["groq/llama3-70b-8192"]
|
||||
[groq] # in .secrets.toml
|
||||
key = ... # your Groq api key
|
||||
```
|
||||
|
||||
(you can obtain a Groq key from [here](https://console.groq.com/keys))
|
||||
|
||||
### xAI
|
||||
|
||||
To use xAI's models with PR-Agent, set:
|
||||
|
||||
```toml
|
||||
[config] # in configuration.toml
|
||||
model = "xai/grok-2-latest"
|
||||
fallback_models = ["xai/grok-2-latest"] # or any other model as fallback
|
||||
|
||||
[xai] # in .secrets.toml
|
||||
key = "..." # your xAI API key
|
||||
```
|
||||
|
||||
You can obtain an xAI API key from [xAI's console](https://console.x.ai/) by creating an account and navigating to the developer settings page.
|
||||
|
||||
### Vertex AI
|
||||
|
||||
To use Google's Vertex AI platform and its associated models (chat-bison/codechat-bison) set:
|
||||
|
||||
```
|
||||
```toml
|
||||
[config] # in configuration.toml
|
||||
model = "vertex_ai/codechat-bison"
|
||||
fallback_models="vertex_ai/codechat-bison"
|
||||
@ -146,37 +186,37 @@ If you don't want to set the API key in the .secrets.toml file, you can set the
|
||||
|
||||
To use Anthropic models, set the relevant models in the configuration section of the configuration file:
|
||||
|
||||
```
|
||||
```toml
|
||||
[config]
|
||||
model="anthropic/claude-3-opus-20240229"
|
||||
fallback_models=["anthropic/claude-3-opus-20240229"]
|
||||
```
|
||||
|
||||
And also set the api key in the .secrets.toml file:
|
||||
```
|
||||
|
||||
```toml
|
||||
[anthropic]
|
||||
KEY = "..."
|
||||
```
|
||||
|
||||
See [litellm](https://docs.litellm.ai/docs/providers/anthropic#usage) documentation for more information about the environment variables required for Anthropic.
|
||||
|
||||
### Amazon Bedrock
|
||||
|
||||
To use Amazon Bedrock and its foundational models, add the below configuration:
|
||||
|
||||
```
|
||||
```toml
|
||||
[config] # in configuration.toml
|
||||
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0"
|
||||
fallback_models=["bedrock/anthropic.claude-v2:1"]
|
||||
model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0"
|
||||
fallback_models=["bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0"]
|
||||
|
||||
[aws]
|
||||
AWS_ACCESS_KEY_ID="..."
|
||||
AWS_SECRET_ACCESS_KEY="..."
|
||||
AWS_REGION_NAME="..."
|
||||
```
|
||||
|
||||
Note that you have to add access to foundational models before using them. Please refer to [this document](https://docs.aws.amazon.com/bedrock/latest/userguide/setting-up.html) for more details.
|
||||
|
||||
If you are using the claude-3 model, please configure the following settings as there are parameters incompatible with claude-3.
|
||||
```
|
||||
[litellm]
|
||||
drop_params = true
|
||||
```
|
||||
|
||||
AWS session is automatically authenticated from your environment, but you can also explicitly set `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` and `AWS_REGION_NAME` environment variables. Please refer to [this document](https://litellm.vercel.app/docs/providers/bedrock) for more details.
|
||||
See [litellm](https://docs.litellm.ai/docs/providers/bedrock#usage) documentation for more information about the environment variables required for Amazon Bedrock.
|
||||
|
||||
### DeepSeek
|
||||
|
||||
@ -197,17 +237,18 @@ key = ...
|
||||
|
||||
(you can obtain a deepseek-chat key from [here](https://platform.deepseek.com))
|
||||
|
||||
|
||||
### DeepInfra
|
||||
|
||||
To use DeepSeek model with DeepInfra, for example, set:
|
||||
```
|
||||
|
||||
```toml
|
||||
[config] # in configuration.toml
|
||||
model = "deepinfra/deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
|
||||
fallback_models = ["deepinfra/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"]
|
||||
[deepinfra] # in .secrets.toml
|
||||
key = ... # your DeepInfra api key
|
||||
```
|
||||
|
||||
(you can obtain a DeepInfra key from [here](https://deepinfra.com/dash/api_keys))
|
||||
|
||||
### Custom models
|
||||
@ -215,33 +256,41 @@ key = ... # your DeepInfra api key
|
||||
If the relevant model doesn't appear [here](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/algo/__init__.py), you can still use it as a custom model:
|
||||
|
||||
1. Set the model name in the configuration file:
|
||||
```
|
||||
|
||||
```toml
|
||||
[config]
|
||||
model="custom_model_name"
|
||||
fallback_models=["custom_model_name"]
|
||||
```
|
||||
|
||||
2. Set the maximal tokens for the model:
|
||||
```
|
||||
|
||||
```toml
|
||||
[config]
|
||||
custom_model_max_tokens= ...
|
||||
```
|
||||
|
||||
3. Go to [litellm documentation](https://litellm.vercel.app/docs/proxy/quick_start#supported-llms), find the model you want to use, and set the relevant environment variables.
|
||||
|
||||
4. Most reasoning models do not support chat-style inputs (`system` and `user` messages) or temperature settings.
|
||||
4. Most reasoning models do not support chat-style inputs (`system` and `user` messages) or temperature settings.
|
||||
To bypass chat templates and temperature controls, set `config.custom_reasoning_model = true` in your configuration file.
|
||||
|
||||
## Dedicated parameters
|
||||
|
||||
### OpenAI models
|
||||
|
||||
```toml
|
||||
[config]
|
||||
reasoning_efffort= = "medium" # "low", "medium", "high"
|
||||
```
|
||||
|
||||
With the OpenAI models that support reasoning effort (eg: o3-mini), you can specify its reasoning effort via `config` section. The default value is `medium`. You can change it to `high` or `low` based on your usage.
|
||||
|
||||
### Anthropic models
|
||||
|
||||
```toml
|
||||
[config]
|
||||
enable_claude_extended_thinking = false # Set to true to enable extended thinking feature
|
||||
extended_thinking_budget_tokens = 2048
|
||||
extended_thinking_max_output_tokens = 4096
|
||||
```
|
||||
|
@ -41,7 +41,7 @@ Qodo Merge will know to remove the surrounding quotes when reading the configura
|
||||
`Platforms supported: GitHub, GitLab, Bitbucket, Azure DevOps`
|
||||
|
||||
|
||||
By uploading a local `.pr_agent.toml` file to the root of the repo's main branch, you can edit and customize any configuration parameter. Note that you need to upload `.pr_agent.toml` prior to creating a PR, in order for the configuration to take effect.
|
||||
By uploading a local `.pr_agent.toml` file to the root of the repo's default branch, you can edit and customize any configuration parameter. Note that you need to upload `.pr_agent.toml` prior to creating a PR, in order for the configuration to take effect.
|
||||
|
||||
For example, if you set in `.pr_agent.toml`:
|
||||
|
||||
@ -61,7 +61,7 @@ Then you can give a list of extra instructions to the `review` tool.
|
||||
|
||||
`Platforms supported: GitHub, GitLab, Bitbucket`
|
||||
|
||||
If you create a repo called `pr-agent-settings` in your **organization**, it's configuration file `.pr_agent.toml` will be used as a global configuration file for any other repo that belongs to the same organization.
|
||||
If you create a repo called `pr-agent-settings` in your **organization**, its configuration file `.pr_agent.toml` will be used as a global configuration file for any other repo that belongs to the same organization.
|
||||
Parameters from a local `.pr_agent.toml` file, in a specific repo, will override the global configuration parameters.
|
||||
|
||||
For example, in the GitHub organization `Codium-ai`:
|
||||
@ -71,9 +71,9 @@ For example, in the GitHub organization `Codium-ai`:
|
||||
- The repo [`https://github.com/Codium-ai/pr-agent`](https://github.com/Codium-ai/pr-agent/blob/main/.pr_agent.toml) inherits the global configuration file from `pr-agent-settings`.
|
||||
|
||||
### Bitbucket Organization level configuration file 💎
|
||||
`Relevant platforms: Bitbucket Cloud, Bitbucket Data Center`
|
||||
`Relevant platforms: Bitbucket Data Center`
|
||||
|
||||
In Bitbucket, there are two levels where you can define a global configuration file:
|
||||
In Bitbucket Data Center, there are two levels where you can define a global configuration file:
|
||||
|
||||
* Project-level global configuration:
|
||||
|
||||
|
@ -23,5 +23,4 @@ It includes information on how to adjust Qodo Merge configurations, define which
|
||||
- [Working with large PRs](./additional_configurations.md#working-with-large-prs)
|
||||
- [Changing a model](./additional_configurations.md#changing-a-model)
|
||||
- [Patch Extra Lines](./additional_configurations.md#patch-extra-lines)
|
||||
- [Editing the prompts](./additional_configurations.md#editing-the-prompts)
|
||||
- [Qodo Merge Models](./qodo_merge_models)
|
||||
|
@ -8,7 +8,7 @@ The models supported by Qodo Merge are:
|
||||
|
||||
- `claude-3-7-sonnet` (default)
|
||||
- `o3-mini`
|
||||
- `gpt-4o`
|
||||
- `gpt-4.1`
|
||||
- `deepseek/r1`
|
||||
|
||||
To restrict Qodo Merge to using only `o3-mini`, add this setting:
|
||||
@ -17,10 +17,10 @@ To restrict Qodo Merge to using only `o3-mini`, add this setting:
|
||||
model="o3-mini"
|
||||
```
|
||||
|
||||
To restrict Qodo Merge to using only `GPT-4o`, add this setting:
|
||||
To restrict Qodo Merge to using only `GPT-4.1`, add this setting:
|
||||
```
|
||||
[config]
|
||||
model="gpt-4o"
|
||||
model="gpt-4.1"
|
||||
```
|
||||
|
||||
To restrict Qodo Merge to using only `deepseek-r1` us-hosted, add this setting:
|
||||
|
@ -41,21 +41,23 @@ nav:
|
||||
- 💎 Implement: 'tools/implement.md'
|
||||
- Core Abilities:
|
||||
- 'core-abilities/index.md'
|
||||
- Fetching ticket context: 'core-abilities/fetching_ticket_context.md'
|
||||
- Auto best practices: 'core-abilities/auto_best_practices.md'
|
||||
- Local and global metadata: 'core-abilities/metadata.md'
|
||||
- Pull request benchmark: 'finetuning_benchmark/index.md'
|
||||
- Code validation: 'core-abilities/code_validation.md'
|
||||
- Compression strategy: 'core-abilities/compression_strategy.md'
|
||||
- Dynamic context: 'core-abilities/dynamic_context.md'
|
||||
- Self-reflection: 'core-abilities/self_reflection.md'
|
||||
- Fetching ticket context: 'core-abilities/fetching_ticket_context.md'
|
||||
- Impact evaluation: 'core-abilities/impact_evaluation.md'
|
||||
- Interactivity: 'core-abilities/interactivity.md'
|
||||
- Compression strategy: 'core-abilities/compression_strategy.md'
|
||||
- Company Codebase: 'core-abilities/company_codebase.md'
|
||||
- Local and global metadata: 'core-abilities/metadata.md'
|
||||
- RAG context enrichment: 'core-abilities/rag_context_enrichment.md'
|
||||
- Self-reflection: 'core-abilities/self_reflection.md'
|
||||
- Static code analysis: 'core-abilities/static_code_analysis.md'
|
||||
- Code Fine-tuning Benchmark: 'finetuning_benchmark/index.md'
|
||||
- Chrome Extension:
|
||||
- Qodo Merge Chrome Extension: 'chrome-extension/index.md'
|
||||
- Features: 'chrome-extension/features.md'
|
||||
- Data Privacy: 'chrome-extension/data_privacy.md'
|
||||
- Options: 'chrome-extension/options.md'
|
||||
- FAQ:
|
||||
- FAQ: 'faq/index.md'
|
||||
- AI Docs Search: 'ai_search/index.md'
|
||||
|
@ -20,6 +20,14 @@ MAX_TOKENS = {
|
||||
'gpt-4o-mini-2024-07-18': 128000, # 128K, but may be limited by config.max_model_tokens
|
||||
'gpt-4o-2024-08-06': 128000, # 128K, but may be limited by config.max_model_tokens
|
||||
'gpt-4o-2024-11-20': 128000, # 128K, but may be limited by config.max_model_tokens
|
||||
'gpt-4.5-preview': 128000, # 128K, but may be limited by config.max_model_tokens
|
||||
'gpt-4.5-preview-2025-02-27': 128000, # 128K, but may be limited by config.max_model_tokens
|
||||
'gpt-4.1': 1047576,
|
||||
'gpt-4.1-2025-04-14': 1047576,
|
||||
'gpt-4.1-mini': 1047576,
|
||||
'gpt-4.1-mini-2025-04-14': 1047576,
|
||||
'gpt-4.1-nano': 1047576,
|
||||
'gpt-4.1-nano-2025-04-14': 1047576,
|
||||
'o1-mini': 128000, # 128K, but may be limited by config.max_model_tokens
|
||||
'o1-mini-2024-09-12': 128000, # 128K, but may be limited by config.max_model_tokens
|
||||
'o1-preview': 128000, # 128K, but may be limited by config.max_model_tokens
|
||||
@ -51,6 +59,7 @@ MAX_TOKENS = {
|
||||
'gemini/gemini-1.5-pro': 1048576,
|
||||
'gemini/gemini-1.5-flash': 1048576,
|
||||
'gemini/gemini-2.0-flash': 1048576,
|
||||
'gemini/gemini-2.5-pro-preview-03-25': 1048576,
|
||||
'codechat-bison': 6144,
|
||||
'codechat-bison-32k': 32000,
|
||||
'anthropic.claude-instant-v1': 100000,
|
||||
@ -74,12 +83,21 @@ MAX_TOKENS = {
|
||||
"bedrock/us.anthropic.claude-3-5-sonnet-20241022-v2:0": 100000,
|
||||
"bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0": 200000,
|
||||
'claude-3-5-sonnet': 100000,
|
||||
'groq/meta-llama/llama-4-scout-17b-16e-instruct': 131072,
|
||||
'groq/meta-llama/llama-4-maverick-17b-128e-instruct': 131072,
|
||||
'groq/llama3-8b-8192': 8192,
|
||||
'groq/llama3-70b-8192': 8192,
|
||||
'groq/llama-3.1-8b-instant': 8192,
|
||||
'groq/llama-3.3-70b-versatile': 128000,
|
||||
'groq/mixtral-8x7b-32768': 32768,
|
||||
'groq/gemma2-9b-it': 8192,
|
||||
'xai/grok-2': 131072,
|
||||
'xai/grok-2-1212': 131072,
|
||||
'xai/grok-2-latest': 131072,
|
||||
'xai/grok-3-beta': 131072,
|
||||
'xai/grok-3-fast-beta': 131072,
|
||||
'xai/grok-3-mini-beta': 131072,
|
||||
'xai/grok-3-mini-fast-beta': 131072,
|
||||
'ollama/llama3': 4096,
|
||||
'watsonx/meta-llama/llama-3-8b-instruct': 4096,
|
||||
"watsonx/meta-llama/llama-3-70b-instruct": 4096,
|
||||
|
@ -1,5 +1,4 @@
|
||||
import os
|
||||
|
||||
import litellm
|
||||
import openai
|
||||
import requests
|
||||
@ -31,6 +30,7 @@ class LiteLLMAIHandler(BaseAiHandler):
|
||||
self.azure = False
|
||||
self.api_base = None
|
||||
self.repetition_penalty = None
|
||||
|
||||
if get_settings().get("OPENAI.KEY", None):
|
||||
openai.api_key = get_settings().openai.key
|
||||
litellm.openai_key = get_settings().openai.key
|
||||
@ -67,6 +67,8 @@ class LiteLLMAIHandler(BaseAiHandler):
|
||||
litellm.api_key = get_settings().groq.key
|
||||
if get_settings().get("REPLICATE.KEY", None):
|
||||
litellm.replicate_key = get_settings().replicate.key
|
||||
if get_settings().get("XAI.KEY", None):
|
||||
litellm.api_key = get_settings().xai.key
|
||||
if get_settings().get("HUGGINGFACE.KEY", None):
|
||||
litellm.huggingface_key = get_settings().huggingface.key
|
||||
if get_settings().get("HUGGINGFACE.API_BASE", None) and 'huggingface' in get_settings().config.model:
|
||||
@ -95,6 +97,19 @@ class LiteLLMAIHandler(BaseAiHandler):
|
||||
if get_settings().get("DEEPINFRA.KEY", None):
|
||||
os.environ['DEEPINFRA_API_KEY'] = get_settings().get("DEEPINFRA.KEY")
|
||||
|
||||
# Check for Azure AD configuration
|
||||
if get_settings().get("AZURE_AD.CLIENT_ID", None):
|
||||
self.azure = True
|
||||
# Generate access token using Azure AD credentials from settings
|
||||
access_token = self._get_azure_ad_token()
|
||||
litellm.api_key = access_token
|
||||
openai.api_key = access_token
|
||||
|
||||
# Set API base from settings
|
||||
self.api_base = get_settings().azure_ad.api_base
|
||||
litellm.api_base = self.api_base
|
||||
openai.api_base = self.api_base
|
||||
|
||||
# Models that only use user meessage
|
||||
self.user_message_only_models = USER_MESSAGE_ONLY_MODELS
|
||||
|
||||
@ -107,6 +122,26 @@ class LiteLLMAIHandler(BaseAiHandler):
|
||||
# Models that support extended thinking
|
||||
self.claude_extended_thinking_models = CLAUDE_EXTENDED_THINKING_MODELS
|
||||
|
||||
def _get_azure_ad_token(self):
|
||||
"""
|
||||
Generates an access token using Azure AD credentials from settings.
|
||||
Returns:
|
||||
str: The access token
|
||||
"""
|
||||
from azure.identity import ClientSecretCredential
|
||||
try:
|
||||
credential = ClientSecretCredential(
|
||||
tenant_id=get_settings().azure_ad.tenant_id,
|
||||
client_id=get_settings().azure_ad.client_id,
|
||||
client_secret=get_settings().azure_ad.client_secret
|
||||
)
|
||||
# Get token for Azure OpenAI service
|
||||
token = credential.get_token("https://cognitiveservices.azure.com/.default")
|
||||
return token.token
|
||||
except Exception as e:
|
||||
get_logger().error(f"Failed to get Azure AD token: {e}")
|
||||
raise
|
||||
|
||||
def prepare_logs(self, response, system, user, resp, finish_reason):
|
||||
response_log = response.dict().copy()
|
||||
response_log['system'] = system
|
||||
|
@ -1,7 +1,6 @@
|
||||
from threading import Lock
|
||||
|
||||
from jinja2 import Environment, StrictUndefined
|
||||
from math import ceil
|
||||
from tiktoken import encoding_for_model, get_encoding
|
||||
|
||||
from pr_agent.config_loader import get_settings
|
||||
@ -20,8 +19,11 @@ class TokenEncoder:
|
||||
with cls._lock: # Lock acquisition to ensure thread safety
|
||||
if cls._encoder_instance is None or model != cls._model:
|
||||
cls._model = model
|
||||
cls._encoder_instance = encoding_for_model(cls._model) if "gpt" in cls._model else get_encoding(
|
||||
"cl100k_base")
|
||||
try:
|
||||
cls._encoder_instance = encoding_for_model(cls._model) if "gpt" in cls._model else get_encoding(
|
||||
"o200k_base")
|
||||
except:
|
||||
cls._encoder_instance = get_encoding("o200k_base")
|
||||
return cls._encoder_instance
|
||||
|
||||
|
||||
@ -105,6 +107,19 @@ class TokenHandler:
|
||||
get_logger().error( f"Error in Anthropic token counting: {e}")
|
||||
return MaxTokens
|
||||
|
||||
def estimate_token_count_for_non_anth_claude_models(self, model, default_encoder_estimate):
|
||||
from math import ceil
|
||||
import re
|
||||
|
||||
model_is_from_o_series = re.match(r"^o[1-9](-mini|-preview)?$", model)
|
||||
if ('gpt' in get_settings().config.model.lower() or model_is_from_o_series) and get_settings(use_context=False).get('openai.key'):
|
||||
return default_encoder_estimate
|
||||
#else: Model is not an OpenAI one - therefore, cannot provide an accurate token count and instead, return a higher number as best effort.
|
||||
|
||||
elbow_factor = 1 + get_settings().get('config.model_token_count_estimate_factor', 0)
|
||||
get_logger().warning(f"{model}'s expected token count cannot be accurately estimated. Using {elbow_factor} of encoder output as best effort estimate")
|
||||
return ceil(elbow_factor * default_encoder_estimate)
|
||||
|
||||
def count_tokens(self, patch: str, force_accurate=False) -> int:
|
||||
"""
|
||||
Counts the number of tokens in a given patch string.
|
||||
@ -116,21 +131,15 @@ class TokenHandler:
|
||||
The number of tokens in the patch string.
|
||||
"""
|
||||
encoder_estimate = len(self.encoder.encode(patch, disallowed_special=()))
|
||||
|
||||
#If an estimate is enough (for example, in cases where the maximal allowed tokens is way below the known limits), return it.
|
||||
if not force_accurate:
|
||||
return encoder_estimate
|
||||
#else, need to provide an accurate estimation:
|
||||
|
||||
#else, force_accurate==True: User requested providing an accurate estimation:
|
||||
model = get_settings().config.model.lower()
|
||||
if force_accurate and 'claude' in model and get_settings(use_context=False).get('anthropic.key'):
|
||||
if 'claude' in model and get_settings(use_context=False).get('anthropic.key'):
|
||||
return self.calc_claude_tokens(patch) # API call to Anthropic for accurate token counting for Claude models
|
||||
#else: Non Anthropic provided model
|
||||
|
||||
import re
|
||||
model_is_from_o_series = re.match(r"^o[1-9](-mini|-preview)?$", model)
|
||||
if ('gpt' in get_settings().config.model.lower() or model_is_from_o_series) and get_settings(use_context=False).get('openai.key'):
|
||||
return encoder_estimate
|
||||
#else: Model is neither an OpenAI, nor an Anthropic model - therefore, cannot provide an accurate token count and instead, return a higher number as best effort.
|
||||
|
||||
elbow_factor = 1 + get_settings().get('config.model_token_count_estimate_factor', 0)
|
||||
get_logger().warning(f"{model}'s expected token count cannot be accurately estimated. Using {elbow_factor} of encoder output as best effort estimate")
|
||||
return ceil(elbow_factor * encoder_estimate)
|
||||
#else: Non Anthropic provided model:
|
||||
return self.estimate_token_count_for_non_anth_claude_models(model, encoder_estimate)
|
||||
|
@ -29,6 +29,7 @@ global_settings = Dynaconf(
|
||||
"settings/custom_labels.toml",
|
||||
"settings/pr_help_prompts.toml",
|
||||
"settings/pr_help_docs_prompts.toml",
|
||||
"settings/pr_help_docs_headings_prompts.toml",
|
||||
"settings/.secrets.toml",
|
||||
"settings_prod/.secrets.toml",
|
||||
]]
|
||||
|
@ -29,17 +29,36 @@ class BitbucketProvider(GitProvider):
|
||||
self, pr_url: Optional[str] = None, incremental: Optional[bool] = False
|
||||
):
|
||||
s = requests.Session()
|
||||
try:
|
||||
self.bearer_token = bearer = context.get("bitbucket_bearer_token", None)
|
||||
if not bearer and get_settings().get("BITBUCKET.BEARER_TOKEN", None):
|
||||
self.bearer_token = bearer = get_settings().get("BITBUCKET.BEARER_TOKEN", None)
|
||||
s.headers["Authorization"] = f"Bearer {bearer}"
|
||||
except Exception:
|
||||
self.bearer_token = get_settings().get("BITBUCKET.BEARER_TOKEN", None)
|
||||
s.headers[
|
||||
"Authorization"
|
||||
] = f'Bearer {self.bearer_token}'
|
||||
s.headers["Content-Type"] = "application/json"
|
||||
|
||||
self.auth_type = get_settings().get("BITBUCKET.AUTH_TYPE", "bearer")
|
||||
|
||||
try:
|
||||
def get_token(token_name, auth_type_name):
|
||||
token = get_settings().get(f"BITBUCKET.{token_name.upper()}", None)
|
||||
if not token:
|
||||
raise ValueError(f"{auth_type_name} auth requires a token")
|
||||
return token
|
||||
|
||||
if self.auth_type == "basic":
|
||||
self.basic_token = get_token("basic_token", "Basic")
|
||||
s.headers["Authorization"] = f"Basic {self.basic_token}"
|
||||
elif self.auth_type == "bearer":
|
||||
try:
|
||||
self.bearer_token = context.get("bitbucket_bearer_token", None)
|
||||
except:
|
||||
self.bearer_token = None
|
||||
|
||||
if not self.bearer_token:
|
||||
self.bearer_token = get_token("bearer_token", "Bearer")
|
||||
s.headers["Authorization"] = f"Bearer {self.bearer_token}"
|
||||
else:
|
||||
raise ValueError(f"Unsupported auth_type: {self.auth_type}")
|
||||
|
||||
except Exception as e:
|
||||
get_logger().exception(f"Failed to initialize Bitbucket authentication: {e}")
|
||||
raise
|
||||
|
||||
self.headers = s.headers
|
||||
self.bitbucket_client = Cloud(session=s)
|
||||
self.max_comment_length = 31000
|
||||
@ -92,7 +111,7 @@ class BitbucketProvider(GitProvider):
|
||||
return ("", "")
|
||||
workspace_name, project_name = repo_path.split('/')
|
||||
else:
|
||||
desired_branch = self.get_pr_branch()
|
||||
desired_branch = self.get_repo_default_branch()
|
||||
parsed_pr_url = urlparse(self.pr_url)
|
||||
scheme_and_netloc = parsed_pr_url.scheme + "://" + parsed_pr_url.netloc
|
||||
workspace_name, project_name = (self.workspace_slug, self.repo_slug)
|
||||
@ -470,6 +489,16 @@ class BitbucketProvider(GitProvider):
|
||||
def get_pr_branch(self):
|
||||
return self.pr.source_branch
|
||||
|
||||
# This function attempts to get the default branch of the repository. As a fallback, uses the PR destination branch.
|
||||
# Note: Must be running from a PR context.
|
||||
def get_repo_default_branch(self):
|
||||
try:
|
||||
url_repo = f"https://api.bitbucket.org/2.0/repositories/{self.workspace_slug}/{self.repo_slug}/"
|
||||
response_repo = requests.request("GET", url_repo, headers=self.headers).json()
|
||||
return response_repo['mainbranch']['name']
|
||||
except:
|
||||
return self.pr.destination_branch
|
||||
|
||||
def get_pr_owner_id(self) -> str | None:
|
||||
return self.workspace_slug
|
||||
|
||||
@ -598,16 +627,21 @@ class BitbucketProvider(GitProvider):
|
||||
if "bitbucket.org" not in repo_url_to_clone:
|
||||
get_logger().error("Repo URL is not a valid bitbucket URL.")
|
||||
return None
|
||||
bearer_token = self.bearer_token
|
||||
if not bearer_token:
|
||||
get_logger().error("No bearer token provided. Returning None")
|
||||
return None
|
||||
|
||||
#For example: For repo: https://bitbucket.org/codiumai/pr-agent-tests.git
|
||||
#clone url will be: https://x-token-auth:<token>@bitbucket.org/codiumai/pr-agent-tests.git
|
||||
(scheme, base_url) = repo_url_to_clone.split("bitbucket.org")
|
||||
if not all([scheme, base_url]):
|
||||
get_logger().error(f"repo_url_to_clone: {repo_url_to_clone} is not a valid bitbucket URL.")
|
||||
return None
|
||||
clone_url = f"{scheme}x-token-auth:{bearer_token}@bitbucket.org{base_url}"
|
||||
|
||||
if self.auth_type == "basic":
|
||||
# Basic auth with token
|
||||
clone_url = f"{scheme}x-token-auth:{self.basic_token}@bitbucket.org{base_url}"
|
||||
elif self.auth_type == "bearer":
|
||||
# Bearer token
|
||||
clone_url = f"{scheme}x-token-auth:{self.bearer_token}@bitbucket.org{base_url}"
|
||||
else:
|
||||
# This case should ideally not be reached if __init__ validates auth_type
|
||||
get_logger().error(f"Unsupported or uninitialized auth_type: {getattr(self, 'auth_type', 'N/A')}. Returning None")
|
||||
return None
|
||||
|
||||
return clone_url
|
||||
|
@ -64,9 +64,15 @@ class BitbucketServerProvider(GitProvider):
|
||||
workspace_name = None
|
||||
project_name = None
|
||||
if not repo_git_url:
|
||||
desired_branch = self.get_pr_branch()
|
||||
workspace_name = self.workspace_slug
|
||||
project_name = self.repo_slug
|
||||
default_branch_dict = self.bitbucket_client.get_default_branch(workspace_name, project_name)
|
||||
if 'displayId' in default_branch_dict:
|
||||
desired_branch = default_branch_dict['displayId']
|
||||
else:
|
||||
get_logger().error(f"Cannot obtain default branch for workspace_name={workspace_name}, "
|
||||
f"project_name={project_name}, default_branch_dict={default_branch_dict}")
|
||||
return ("", "")
|
||||
elif '.git' in repo_git_url and 'scm/' in repo_git_url:
|
||||
repo_path = repo_git_url.split('.git')[0].split('scm/')[-1]
|
||||
if repo_path.count('/') == 1: # Has to have the form <workspace>/<repo>
|
||||
|
@ -133,7 +133,7 @@ class GithubProvider(GitProvider):
|
||||
if (not owner or not repo) and self.repo: #"else" - User did not provide an external git url, or not an issue, use self.repo object
|
||||
owner, repo = self.repo.split('/')
|
||||
scheme_and_netloc = self.base_url_html
|
||||
desired_branch = self.get_pr_branch()
|
||||
desired_branch = self.repo_obj.default_branch
|
||||
if not all([scheme_and_netloc, owner, repo]): #"else": Not invoked from a PR context,but no provided git url for context
|
||||
get_logger().error(f"Unable to get canonical url parts since missing context (PR or explicit git url)")
|
||||
return ("", "")
|
||||
|
@ -87,7 +87,11 @@ class GitLabProvider(GitProvider):
|
||||
return ("", "")
|
||||
if not repo_git_url: #Use PR url as context
|
||||
repo_path = self._get_project_path_from_pr_or_issue_url(self.pr_url)
|
||||
desired_branch = self.get_pr_branch()
|
||||
try:
|
||||
desired_branch = self.gl.projects.get(self.id_project).default_branch
|
||||
except Exception as e:
|
||||
get_logger().exception(f"Cannot get PR: {self.pr_url} default branch. Tried project ID: {self.id_project}")
|
||||
return ("", "")
|
||||
else: #Use repo git url
|
||||
repo_path = repo_git_url.split('.git')[0].split('.com/')[-1]
|
||||
prefix = f"{self.gitlab_url}/{repo_path}/-/blob/{desired_branch}"
|
||||
@ -515,7 +519,8 @@ class GitLabProvider(GitProvider):
|
||||
|
||||
def get_repo_settings(self):
|
||||
try:
|
||||
contents = self.gl.projects.get(self.id_project).files.get(file_path='.pr_agent.toml', ref=self.mr.target_branch).decode()
|
||||
main_branch = self.gl.projects.get(self.id_project).default_branch
|
||||
contents = self.gl.projects.get(self.id_project).files.get(file_path='.pr_agent.toml', ref=main_branch).decode()
|
||||
return contents
|
||||
except Exception:
|
||||
return ""
|
||||
|
@ -12,6 +12,7 @@ from pr_agent.log import get_logger
|
||||
|
||||
|
||||
def apply_repo_settings(pr_url):
|
||||
os.environ["AUTO_CAST_FOR_DYNACONF"] = "false"
|
||||
git_provider = get_git_provider_with_context(pr_url)
|
||||
if get_settings().config.use_repo_settings_file:
|
||||
repo_settings_file = None
|
||||
|
@ -1,6 +1,7 @@
|
||||
import os
|
||||
os.environ["AUTO_CAST_FOR_DYNACONF"] = "false"
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from enum import Enum
|
||||
|
||||
|
@ -25,7 +25,7 @@ from pr_agent.config_loader import get_settings
|
||||
from pr_agent.git_providers.utils import apply_repo_settings
|
||||
from pr_agent.log import LoggingFormat, get_logger, setup_logger
|
||||
|
||||
setup_logger(fmt=LoggingFormat.JSON, level="DEBUG")
|
||||
setup_logger(fmt=LoggingFormat.JSON, level=get_settings().get("CONFIG.LOG_LEVEL", "DEBUG"))
|
||||
security = HTTPBasic()
|
||||
router = APIRouter()
|
||||
available_commands_rgx = re.compile(r"^\/(" + "|".join(command2class.keys()) + r")\s*")
|
||||
|
@ -25,7 +25,7 @@ from pr_agent.identity_providers.identity_provider import Eligibility
|
||||
from pr_agent.log import LoggingFormat, get_logger, setup_logger
|
||||
from pr_agent.secret_providers import get_secret_provider
|
||||
|
||||
setup_logger(fmt=LoggingFormat.JSON, level="DEBUG")
|
||||
setup_logger(fmt=LoggingFormat.JSON, level=get_settings().get("CONFIG.LOG_LEVEL", "DEBUG"))
|
||||
router = APIRouter()
|
||||
secret_provider = get_secret_provider() if get_settings().get("CONFIG.SECRET_PROVIDER") else None
|
||||
|
||||
|
@ -21,7 +21,7 @@ from pr_agent.git_providers.utils import apply_repo_settings
|
||||
from pr_agent.log import LoggingFormat, get_logger, setup_logger
|
||||
from pr_agent.servers.utils import verify_signature
|
||||
|
||||
setup_logger(fmt=LoggingFormat.JSON, level="DEBUG")
|
||||
setup_logger(fmt=LoggingFormat.JSON, level=get_settings().get("CONFIG.LOG_LEVEL", "DEBUG"))
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
|
@ -24,7 +24,7 @@ from pr_agent.identity_providers.identity_provider import Eligibility
|
||||
from pr_agent.log import LoggingFormat, get_logger, setup_logger
|
||||
from pr_agent.servers.utils import DefaultDictWithTimeout, verify_signature
|
||||
|
||||
setup_logger(fmt=LoggingFormat.JSON, level="DEBUG")
|
||||
setup_logger(fmt=LoggingFormat.JSON, level=get_settings().get("CONFIG.LOG_LEVEL", "DEBUG"))
|
||||
base_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
||||
build_number_path = os.path.join(base_path, "build_number.txt")
|
||||
if os.path.exists(build_number_path):
|
||||
|
@ -13,7 +13,7 @@ from pr_agent.config_loader import get_settings
|
||||
from pr_agent.git_providers import get_git_provider
|
||||
from pr_agent.log import LoggingFormat, get_logger, setup_logger
|
||||
|
||||
setup_logger(fmt=LoggingFormat.JSON, level="DEBUG")
|
||||
setup_logger(fmt=LoggingFormat.JSON, level=get_settings().get("CONFIG.LOG_LEVEL", "DEBUG"))
|
||||
NOTIFICATION_URL = "https://api.github.com/notifications"
|
||||
|
||||
|
||||
|
@ -19,7 +19,7 @@ from pr_agent.git_providers.utils import apply_repo_settings
|
||||
from pr_agent.log import LoggingFormat, get_logger, setup_logger
|
||||
from pr_agent.secret_providers import get_secret_provider
|
||||
|
||||
setup_logger(fmt=LoggingFormat.JSON, level="DEBUG")
|
||||
setup_logger(fmt=LoggingFormat.JSON, level=get_settings().get("CONFIG.LOG_LEVEL", "DEBUG"))
|
||||
router = APIRouter()
|
||||
|
||||
secret_provider = get_secret_provider() if get_settings().get("CONFIG.SECRET_PROVIDER") else None
|
||||
|
@ -32,6 +32,9 @@ key = "" # Optional, uncomment if you want to use Replicate. Acquire through htt
|
||||
[groq]
|
||||
key = "" # Acquire through https://console.groq.com/keys
|
||||
|
||||
[xai]
|
||||
key = "" # Optional, uncomment if you want to use xAI. Acquire through https://console.x.ai/
|
||||
|
||||
[huggingface]
|
||||
key = "" # Optional, uncomment if you want to use Huggingface Inference API. Acquire through https://huggingface.co/docs/api-inference/quicktour
|
||||
api_base = "" # the base url for your huggingface inference endpoint
|
||||
@ -66,8 +69,12 @@ personal_access_token = ""
|
||||
shared_secret = "" # webhook secret
|
||||
|
||||
[bitbucket]
|
||||
# For Bitbucket personal/repository bearer token
|
||||
# For Bitbucket authentication
|
||||
auth_type = "bearer" # "bearer" or "basic"
|
||||
# For bearer token authentication
|
||||
bearer_token = ""
|
||||
# For basic authentication (uses token only)
|
||||
basic_token = ""
|
||||
|
||||
[bitbucket_server]
|
||||
# For Bitbucket Server bearer token
|
||||
@ -94,3 +101,10 @@ key = ""
|
||||
|
||||
[deepinfra]
|
||||
key = ""
|
||||
|
||||
[azure_ad]
|
||||
# Azure AD authentication for OpenAI services
|
||||
client_id = "" # Your Azure AD application client ID
|
||||
client_secret = "" # Your Azure AD application client secret
|
||||
tenant_id = "" # Your Azure AD tenant ID
|
||||
api_base = "" # Your Azure OpenAI service base URL (e.g., https://openai.xyz.com/)
|
@ -59,7 +59,8 @@ Specific guidelines for generating code suggestions:
|
||||
- use more specific exception types
|
||||
{%- endif %}
|
||||
- When mentioning code elements (variables, names, or files) in your response, surround them with backticks (`). For example: "verify that `user_id` is..."
|
||||
- Note that you only see changed code segments (diff hunks in a PR), not the entire codebase. Avoid suggestions that might duplicate existing functionality or questioning code elements (like variables declarations or import statements) that may be defined elsewhere in the codebase.
|
||||
- Note that you will only see partial code segments that were changed (diff hunks in a PR), and not the entire codebase. Avoid suggestions that might duplicate existing functionality or question the existence of code elements like variables, functions, classes, and import statements, that may be defined elsewhere in the codebase.
|
||||
- Also note that if the code ends at an opening brace or statement that begins a new scope (like 'if', 'for', 'try'), don't treat it as incomplete. Instead, acknowledge the visible scope boundary and analyze only the code shown.
|
||||
|
||||
{%- if extra_instructions %}
|
||||
|
||||
@ -76,7 +77,7 @@ The output must be a YAML object equivalent to type $PRCodeSuggestions, accordin
|
||||
class CodeSuggestion(BaseModel):
|
||||
relevant_file: str = Field(description="Full path of the relevant file")
|
||||
language: str = Field(description="Programming language used by the relevant file")
|
||||
existing_code: str = Field(description="A short code snippet from the final state of the PR diff, that the suggestion aims to enhance or fix. Include only complete code lines, preserving all indentation, newlines, and original formatting. Use ellipsis (...) for brevity if needed. This snippet should represent the specific PR code targeted for improvement.")
|
||||
existing_code: str = Field(description="A short code snippet from the final state of the PR diff that the suggestion will address. Select only the span of code that will be modified - without surrounding unchanged code. Preserve all indentation, newlines, and original formatting. Use ellipsis (...) for brevity if needed.")
|
||||
suggestion_content: str = Field(description="An actionable suggestion to enhance, improve or fix the new code introduced in the PR. Don't present here actual code snippets, just the suggestion. Be short and concise")
|
||||
improved_code: str = Field(description="A refined code snippet that replaces the 'existing_code' snippet after implementing the suggestion.")
|
||||
one_sentence_summary: str = Field(description="A concise, single-sentence overview (up to 6 words) of the suggested improvement. Focus on the 'what'. Be general, and avoid method or variable names.")
|
||||
|
@ -9,14 +9,14 @@
|
||||
model="o3-mini"
|
||||
fallback_models=["gpt-4o-2024-11-20"]
|
||||
#model_weak="gpt-4o-mini-2024-07-18" # optional, a weaker model to use for some easier tasks
|
||||
model_token_count_estimate_factor=0.3 # factor to increase the token count estimate, in order to reduce likelihood of model failure due to too many tokens.
|
||||
# CLI
|
||||
git_provider="github"
|
||||
publish_output=true
|
||||
publish_output_progress=true
|
||||
publish_output_no_suggestions=true
|
||||
verbosity_level=0 # 0,1,2
|
||||
use_extra_bad_extensions=false
|
||||
# Log
|
||||
log_level="DEBUG"
|
||||
# Configurations
|
||||
use_wiki_settings_file=true
|
||||
use_repo_settings_file=true
|
||||
@ -31,6 +31,7 @@ max_description_tokens = 500
|
||||
max_commits_tokens = 500
|
||||
max_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities.
|
||||
custom_model_max_tokens=-1 # for models not in the default list
|
||||
model_token_count_estimate_factor=0.3 # factor to increase the token count estimate, in order to reduce likelihood of model failure due to too many tokens - applicable only when requesting an accurate estimate.
|
||||
# patch extension logic
|
||||
patch_extension_skip_types =[".md",".txt"]
|
||||
allow_dynamic_context=true
|
||||
@ -130,9 +131,9 @@ focus_only_on_problems=true
|
||||
extra_instructions = ""
|
||||
enable_help_text=false
|
||||
enable_chat_text=false
|
||||
enable_intro_text=true
|
||||
persistent_comment=true
|
||||
max_history_len=4
|
||||
publish_output_no_suggestions=true
|
||||
# enable to apply suggestion 💎
|
||||
apply_suggestions_checkbox=true
|
||||
# suggestions scoring
|
||||
@ -142,7 +143,7 @@ new_score_mechanism_th_high=9
|
||||
new_score_mechanism_th_medium=7
|
||||
# params for '/improve --extended' mode
|
||||
auto_extended_mode=true
|
||||
num_code_suggestions_per_chunk=4
|
||||
num_code_suggestions_per_chunk=3
|
||||
max_number_of_calls = 3
|
||||
parallel_calls = true
|
||||
|
||||
@ -166,7 +167,7 @@ The code suggestions should focus only on the following:
|
||||
...
|
||||
"""
|
||||
suggestions_score_threshold=0
|
||||
num_code_suggestions_per_chunk=4
|
||||
num_code_suggestions_per_chunk=3
|
||||
self_reflect_on_custom_suggestions=true
|
||||
enable_help_text=false
|
||||
|
||||
@ -181,6 +182,7 @@ class_name = "" # in case there are several methods with the same name in
|
||||
push_changelog_changes=false
|
||||
extra_instructions = ""
|
||||
add_pr_link=true
|
||||
skip_ci_on_push=true
|
||||
|
||||
[pr_analyze] # /analyze #
|
||||
enable_help_text=true
|
||||
|
101
pr_agent/settings/pr_help_docs_headings_prompts.toml
Normal file
101
pr_agent/settings/pr_help_docs_headings_prompts.toml
Normal file
@ -0,0 +1,101 @@
|
||||
|
||||
[pr_help_docs_headings_prompts]
|
||||
system="""You are Doc-helper, a language model that ranks documentation files based on their relevance to user questions.
|
||||
You will receive a question, a repository url and file names along with optional groups of headings extracted from such files from that repository (either as markdown or as restructred text).
|
||||
Your task is to rank file paths based on how likely they contain the answer to a user's question, using only the headings from each such file and the file name.
|
||||
|
||||
======
|
||||
==file name==
|
||||
|
||||
'src/file1.py'
|
||||
|
||||
==index==
|
||||
|
||||
0 based integer
|
||||
|
||||
==file headings==
|
||||
heading #1
|
||||
heading #2
|
||||
...
|
||||
|
||||
==file name==
|
||||
|
||||
'src/file2.py'
|
||||
|
||||
==index==
|
||||
|
||||
0 based integer
|
||||
|
||||
==file headings==
|
||||
heading #1
|
||||
heading #2
|
||||
...
|
||||
|
||||
...
|
||||
======
|
||||
|
||||
Additional instructions:
|
||||
- Consider only the file names and section headings within each document
|
||||
- Present the most relevant files first, based strictly on how well their headings and file names align with user question
|
||||
|
||||
The output must be a YAML object equivalent to type $DocHeadingsHelper, according to the following Pydantic definitions:
|
||||
=====
|
||||
class file_idx_and_path(BaseModel):
|
||||
idx: int = Field(description="The zero based index of file_name, as it appeared in the original list of headings. Cannot be negative.")
|
||||
file_name: str = Field(description="The file_name exactly as it appeared in the question")
|
||||
|
||||
class DocHeadingsHelper(BaseModel):
|
||||
user_question: str = Field(description="The user's question")
|
||||
relevant_files_ranking: List[file_idx_and_path] = Field(description="Files sorted in descending order by relevance to question")
|
||||
=====
|
||||
|
||||
|
||||
Example output:
|
||||
```yaml
|
||||
user_question: |
|
||||
...
|
||||
relevant_files_ranking:
|
||||
- idx: 101
|
||||
file_name: "src/file1.py"
|
||||
- ...
|
||||
"""
|
||||
|
||||
user="""\
|
||||
Documentation url: '{{ docs_url|trim }}'
|
||||
-----
|
||||
|
||||
|
||||
User's Question:
|
||||
=====
|
||||
{{ question|trim }}
|
||||
=====
|
||||
|
||||
|
||||
Filenames with optional headings from documentation website content:
|
||||
=====
|
||||
{{ snippets|trim }}
|
||||
=====
|
||||
|
||||
|
||||
Reminder: The output must be a YAML object equivalent to type $DocHeadingsHelper, similar to the following example output:
|
||||
=====
|
||||
|
||||
|
||||
Example output:
|
||||
```yaml
|
||||
user_question: |
|
||||
...
|
||||
relevant_files_ranking:
|
||||
- idx: 101
|
||||
file_name: "src/file1.py"
|
||||
- ...
|
||||
=====
|
||||
|
||||
Important Notes:
|
||||
1. Output most relevant file names first, by descending order of relevancy.
|
||||
2. Only include files with non-negative indices
|
||||
|
||||
|
||||
Response (should be a valid YAML, and nothing else).
|
||||
```yaml
|
||||
"""
|
@ -29,7 +29,7 @@ __old hunk__
|
||||
@@ ... @@ def func2():
|
||||
__new hunk__
|
||||
unchanged code line4
|
||||
+new code line5 removed
|
||||
+new code line5 added
|
||||
unchanged code line6
|
||||
|
||||
## File: 'src/file2.py'
|
||||
@ -44,7 +44,8 @@ __new hunk__
|
||||
- If available, an AI-generated summary will appear and provide a high-level overview of the file changes. Note that this summary may not be fully accurate or complete.
|
||||
{%- endif %}
|
||||
- When quoting variables, names or file paths from the code, use backticks (`) instead of single quote (').
|
||||
|
||||
- Note that you only see changed code segments (diff hunks in a PR), not the entire codebase. Avoid suggestions that might duplicate existing functionality or questioning code elements (like variables declarations or import statements) that may be defined elsewhere in the codebase.
|
||||
- Also note that if the code ends at an opening brace or statement that begins a new scope (like 'if', 'for', 'try'), don't treat it as incomplete. Instead, acknowledge the visible scope boundary and analyze only the code shown.
|
||||
|
||||
{%- if extra_instructions %}
|
||||
|
||||
@ -67,7 +68,7 @@ class SubPR(BaseModel):
|
||||
class KeyIssuesComponentLink(BaseModel):
|
||||
relevant_file: str = Field(description="The full file path of the relevant file")
|
||||
issue_header: str = Field(description="One or two word title for the issue. For example: 'Possible Bug', etc.")
|
||||
issue_content: str = Field(description="A short and concise summary of what should be further inspected and validated during the PR review process for this issue. Do not reference line numbers in this field.")
|
||||
issue_content: str = Field(description="A short and concise summary of what should be further inspected and validated during the PR review process for this issue. Do not mention line numbers in this field.")
|
||||
start_line: int = Field(description="The start line that corresponds to this issue in the relevant file")
|
||||
end_line: int = Field(description="The end line that corresponds to this issue in the relevant file")
|
||||
|
||||
|
@ -217,7 +217,8 @@ class PRCodeSuggestions:
|
||||
|
||||
async def publish_no_suggestions(self):
|
||||
pr_body = "## PR Code Suggestions ✨\n\nNo code suggestions found for the PR."
|
||||
if get_settings().config.publish_output and get_settings().config.publish_output_no_suggestions:
|
||||
if (get_settings().config.publish_output and
|
||||
get_settings().pr_code_suggestions.get('publish_output_no_suggestions', True)):
|
||||
get_logger().warning('No code suggestions found for the PR.')
|
||||
get_logger().debug(f"PR output", artifact=pr_body)
|
||||
if self.progress_response:
|
||||
@ -784,7 +785,7 @@ class PRCodeSuggestions:
|
||||
pr_body += "No suggestions found to improve this PR."
|
||||
return pr_body
|
||||
|
||||
if get_settings().pr_code_suggestions.enable_intro_text and get_settings().config.is_auto_command:
|
||||
if get_settings().config.is_auto_command:
|
||||
pr_body += "Explore these optional code suggestions:\n\n"
|
||||
|
||||
language_extension_map_org = get_settings().language_extension_map_org
|
||||
|
@ -1,11 +1,11 @@
|
||||
import copy
|
||||
from functools import partial
|
||||
|
||||
from jinja2 import Environment, StrictUndefined
|
||||
import math
|
||||
import os
|
||||
import re
|
||||
from tempfile import TemporaryDirectory
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
from pr_agent.algo import MAX_TOKENS
|
||||
from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
|
||||
@ -78,47 +78,118 @@ def get_maximal_text_input_length_for_token_count_estimation():
|
||||
return 900000 #Claude API for token estimation allows maximal text input of 900K chars
|
||||
return math.inf #Otherwise, no known limitation on input text just for token estimation
|
||||
|
||||
# Load documentation files to memory, decorating them with a header to mark where each file begins,
|
||||
# as to help the LLM to give a better answer.
|
||||
def aggregate_documentation_files_for_prompt_contents(base_path: str, doc_files: List[str]) -> Optional[str]:
|
||||
docs_prompt = ""
|
||||
for file in doc_files:
|
||||
try:
|
||||
with open(file, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
# Skip files with no text content
|
||||
if not re.search(r'[a-zA-Z]', content):
|
||||
continue
|
||||
file_path = str(file).replace(str(base_path), '')
|
||||
docs_prompt += f"\n==file name==\n\n{file_path}\n\n==file content==\n\n{content.strip()}\n=========\n\n"
|
||||
except Exception as e:
|
||||
get_logger().warning(f"Error while reading the file {file}: {e}")
|
||||
continue
|
||||
if not docs_prompt:
|
||||
get_logger().error("Couldn't find any usable documentation files. Returning None.")
|
||||
return None
|
||||
return docs_prompt
|
||||
def return_document_headings(text: str, ext: str) -> str:
|
||||
try:
|
||||
lines = text.split('\n')
|
||||
headings = set()
|
||||
|
||||
def format_markdown_q_and_a_response(question_str: str, response_str: str, relevant_sections: List[Dict[str, str]],
|
||||
supported_suffixes: List[str], base_url_prefix: str, base_url_suffix: str="") -> str:
|
||||
base_url_prefix = base_url_prefix.strip('/') #Sanitize base_url_prefix
|
||||
answer_str = ""
|
||||
answer_str += f"### Question: \n{question_str}\n\n"
|
||||
answer_str += f"### Answer:\n{response_str.strip()}\n\n"
|
||||
answer_str += f"#### Relevant Sources:\n\n"
|
||||
for section in relevant_sections:
|
||||
file = section.get('file_name').strip()
|
||||
ext = [suffix for suffix in supported_suffixes if file.endswith(suffix)]
|
||||
if not ext:
|
||||
get_logger().warning(f"Unsupported file extension: {file}")
|
||||
continue
|
||||
if str(section['relevant_section_header_string']).strip():
|
||||
markdown_header = format_markdown_header(section['relevant_section_header_string'])
|
||||
if base_url_prefix:
|
||||
answer_str += f"> - {base_url_prefix}/{file}{base_url_suffix}#{markdown_header}\n"
|
||||
if not text or not re.search(r'[a-zA-Z]', text):
|
||||
get_logger().error(f"Empty or non text content found in text: {text}.")
|
||||
return ""
|
||||
|
||||
if ext in ['.md', '.mdx']:
|
||||
# Extract Markdown headings (lines starting with #)
|
||||
headings = {line.strip() for line in lines if line.strip().startswith('#')}
|
||||
elif ext == '.rst':
|
||||
# Find indices of lines that have all same character:
|
||||
#Allowed characters according to list from: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#sections
|
||||
section_chars = set('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~')
|
||||
|
||||
# Find potential section marker lines (underlines/overlines): They have to be the same character
|
||||
marker_lines = []
|
||||
for i, line in enumerate(lines):
|
||||
line = line.rstrip()
|
||||
if line and all(c == line[0] for c in line) and line[0] in section_chars:
|
||||
marker_lines.append((i, len(line)))
|
||||
|
||||
# Check for headings adjacent to marker lines (below + text must be in length equal or less)
|
||||
for idx, length in marker_lines:
|
||||
# Check if it's an underline (heading is above it)
|
||||
if idx > 0 and lines[idx - 1].rstrip() and len(lines[idx - 1].rstrip()) <= length:
|
||||
headings.add(lines[idx - 1].rstrip())
|
||||
else:
|
||||
answer_str += f"> - {base_url_prefix}/{file}{base_url_suffix}\n"
|
||||
return answer_str
|
||||
get_logger().error(f"Unsupported file extension: {ext}")
|
||||
return ""
|
||||
|
||||
return '\n'.join(headings)
|
||||
except Exception as e:
|
||||
get_logger().exception(f"Unexpected exception thrown. Returning empty result.")
|
||||
return ""
|
||||
|
||||
# Load documentation files to memory: full file path (as will be given as prompt) -> doc contents
|
||||
def map_documentation_files_to_contents(base_path: str, doc_files: list[str], max_allowed_file_len=5000) -> dict[str, str]:
|
||||
try:
|
||||
returned_dict = {}
|
||||
for file in doc_files:
|
||||
try:
|
||||
with open(file, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
# Skip files with no text content
|
||||
if not re.search(r'[a-zA-Z]', content):
|
||||
continue
|
||||
if len(content) > max_allowed_file_len:
|
||||
get_logger().warning(f"File {file} length: {len(content)} exceeds limit: {max_allowed_file_len}, so it will be trimmed.")
|
||||
content = content[:max_allowed_file_len]
|
||||
file_path = str(file).replace(str(base_path), '')
|
||||
returned_dict[file_path] = content.strip()
|
||||
except Exception as e:
|
||||
get_logger().warning(f"Error while reading the file {file}: {e}")
|
||||
continue
|
||||
if not returned_dict:
|
||||
get_logger().error("Couldn't find any usable documentation files. Returning empty dict.")
|
||||
return returned_dict
|
||||
except Exception as e:
|
||||
get_logger().exception(f"Unexpected exception thrown. Returning empty dict.")
|
||||
return {}
|
||||
|
||||
# Goes over files' contents, generating payload for prompt while decorating them with a header to mark where each file begins,
|
||||
# as to help the LLM to give a better answer.
|
||||
def aggregate_documentation_files_for_prompt_contents(file_path_to_contents: dict[str, str], return_just_headings=False) -> str:
|
||||
try:
|
||||
docs_prompt = ""
|
||||
for idx, file_path in enumerate(file_path_to_contents):
|
||||
file_contents = file_path_to_contents[file_path].strip()
|
||||
if not file_contents:
|
||||
get_logger().error(f"Got empty file contents for: {file_path}. Skipping this file.")
|
||||
continue
|
||||
if return_just_headings:
|
||||
file_headings = return_document_headings(file_contents, os.path.splitext(file_path)[-1]).strip()
|
||||
if file_headings:
|
||||
docs_prompt += f"\n==file name==\n\n{file_path}\n\n==index==\n\n{idx}\n\n==file headings==\n\n{file_headings}\n=========\n\n"
|
||||
else:
|
||||
get_logger().warning(f"No headers for: {file_path}. Will only use filename")
|
||||
docs_prompt += f"\n==file name==\n\n{file_path}\n\n==index==\n\n{idx}\n\n"
|
||||
else:
|
||||
docs_prompt += f"\n==file name==\n\n{file_path}\n\n==file content==\n\n{file_contents}\n=========\n\n"
|
||||
return docs_prompt
|
||||
except Exception as e:
|
||||
get_logger().exception(f"Unexpected exception thrown. Returning empty result.")
|
||||
return ""
|
||||
|
||||
def format_markdown_q_and_a_response(question_str: str, response_str: str, relevant_sections: list[dict[str, str]],
|
||||
supported_suffixes: list[str], base_url_prefix: str, base_url_suffix: str="") -> str:
|
||||
try:
|
||||
base_url_prefix = base_url_prefix.strip('/') #Sanitize base_url_prefix
|
||||
answer_str = ""
|
||||
answer_str += f"### Question: \n{question_str}\n\n"
|
||||
answer_str += f"### Answer:\n{response_str.strip()}\n\n"
|
||||
answer_str += f"#### Relevant Sources:\n\n"
|
||||
for section in relevant_sections:
|
||||
file = section.get('file_name').lstrip('/').strip() #Remove any '/' in the beginning, since some models do it anyway
|
||||
ext = [suffix for suffix in supported_suffixes if file.endswith(suffix)]
|
||||
if not ext:
|
||||
get_logger().warning(f"Unsupported file extension: {file}")
|
||||
continue
|
||||
if str(section['relevant_section_header_string']).strip():
|
||||
markdown_header = format_markdown_header(section['relevant_section_header_string'])
|
||||
if base_url_prefix:
|
||||
answer_str += f"> - {base_url_prefix}/{file}{base_url_suffix}#{markdown_header}\n"
|
||||
else:
|
||||
answer_str += f"> - {base_url_prefix}/{file}{base_url_suffix}\n"
|
||||
return answer_str
|
||||
except Exception as e:
|
||||
get_logger().exception(f"Unexpected exception thrown. Returning empty result.")
|
||||
return ""
|
||||
|
||||
def format_markdown_header(header: str) -> str:
|
||||
try:
|
||||
@ -157,87 +228,103 @@ def clean_markdown_content(content: str) -> str:
|
||||
Returns:
|
||||
Cleaned markdown content
|
||||
"""
|
||||
# Remove HTML comments
|
||||
content = re.sub(r'<!--.*?-->', '', content, flags=re.DOTALL)
|
||||
try:
|
||||
# Remove HTML comments
|
||||
content = re.sub(r'<!--.*?-->', '', content, flags=re.DOTALL)
|
||||
|
||||
# Remove frontmatter (YAML between --- or +++ delimiters)
|
||||
content = re.sub(r'^---\s*\n.*?\n---\s*\n', '', content, flags=re.DOTALL)
|
||||
content = re.sub(r'^\+\+\+\s*\n.*?\n\+\+\+\s*\n', '', content, flags=re.DOTALL)
|
||||
# Remove frontmatter (YAML between --- or +++ delimiters)
|
||||
content = re.sub(r'^---\s*\n.*?\n---\s*\n', '', content, flags=re.DOTALL)
|
||||
content = re.sub(r'^\+\+\+\s*\n.*?\n\+\+\+\s*\n', '', content, flags=re.DOTALL)
|
||||
|
||||
# Remove excessive blank lines (more than 2 consecutive)
|
||||
content = re.sub(r'\n{3,}', '\n\n', content)
|
||||
# Remove excessive blank lines (more than 2 consecutive)
|
||||
content = re.sub(r'\n{3,}', '\n\n', content)
|
||||
|
||||
# Remove HTML tags that are often used for styling only
|
||||
content = re.sub(r'<div.*?>|</div>|<span.*?>|</span>', '', content, flags=re.DOTALL)
|
||||
# Remove HTML tags that are often used for styling only
|
||||
content = re.sub(r'<div.*?>|</div>|<span.*?>|</span>', '', content, flags=re.DOTALL)
|
||||
|
||||
# Remove image alt text which can be verbose
|
||||
content = re.sub(r'!\[(.*?)\]', '![]', content)
|
||||
# Remove image alt text which can be verbose
|
||||
content = re.sub(r'!\[(.*?)\]', '![]', content)
|
||||
|
||||
# Remove images completely
|
||||
content = re.sub(r'!\[.*?\]\(.*?\)', '', content)
|
||||
# Remove images completely
|
||||
content = re.sub(r'!\[.*?\]\(.*?\)', '', content)
|
||||
|
||||
# Remove simple HTML tags but preserve content between them
|
||||
content = re.sub(r'<(?!table|tr|td|th|thead|tbody)([a-zA-Z][a-zA-Z0-9]*)[^>]*>(.*?)</\1>',
|
||||
r'\2', content, flags=re.DOTALL)
|
||||
return content.strip()
|
||||
# Remove simple HTML tags but preserve content between them
|
||||
content = re.sub(r'<(?!table|tr|td|th|thead|tbody)([a-zA-Z][a-zA-Z0-9]*)[^>]*>(.*?)</\1>',
|
||||
r'\2', content, flags=re.DOTALL)
|
||||
return content.strip()
|
||||
except Exception as e:
|
||||
get_logger().exception(f"Unexpected exception thrown. Returning empty result.")
|
||||
return ""
|
||||
|
||||
class PredictionPreparator:
|
||||
def __init__(self, ai_handler, vars, system_prompt, user_prompt):
|
||||
self.ai_handler = ai_handler
|
||||
variables = copy.deepcopy(vars)
|
||||
environment = Environment(undefined=StrictUndefined)
|
||||
self.system_prompt = environment.from_string(system_prompt).render(variables)
|
||||
self.user_prompt = environment.from_string(user_prompt).render(variables)
|
||||
try:
|
||||
self.ai_handler = ai_handler
|
||||
variables = copy.deepcopy(vars)
|
||||
environment = Environment(undefined=StrictUndefined)
|
||||
self.system_prompt = environment.from_string(system_prompt).render(variables)
|
||||
self.user_prompt = environment.from_string(user_prompt).render(variables)
|
||||
except Exception as e:
|
||||
get_logger().exception(f"Caught exception during init. Setting ai_handler to None to prevent __call__.")
|
||||
self.ai_handler = None
|
||||
|
||||
#Called by retry_with_fallback_models and therefore, on any failure must throw an exception:
|
||||
async def __call__(self, model: str) -> str:
|
||||
if not self.ai_handler:
|
||||
get_logger().error("ai handler not set. Cannot invoke model!")
|
||||
raise ValueError("PredictionPreparator not initialized")
|
||||
try:
|
||||
response, finish_reason = await self.ai_handler.chat_completion(
|
||||
model=model, temperature=get_settings().config.temperature, system=self.system_prompt, user=self.user_prompt)
|
||||
return response
|
||||
except Exception as e:
|
||||
get_logger().error(f"Error while preparing prediction: {e}")
|
||||
return ""
|
||||
get_logger().exception("Caught exception during prediction.", artifacts={'system': self.system_prompt, 'user': self.user_prompt})
|
||||
raise e
|
||||
|
||||
|
||||
class PRHelpDocs(object):
|
||||
def __init__(self, ctx_url, ai_handler:partial[BaseAiHandler,] = LiteLLMAIHandler, args: Tuple[str]=None, return_as_string: bool=False):
|
||||
self.ctx_url = ctx_url
|
||||
self.question = args[0] if args else None
|
||||
self.return_as_string = return_as_string
|
||||
self.repo_url_given_explicitly = True
|
||||
self.repo_url = get_settings().get('PR_HELP_DOCS.REPO_URL', '')
|
||||
self.repo_desired_branch = get_settings().get('PR_HELP_DOCS.REPO_DEFAULT_BRANCH', 'main') #Ignored if self.repo_url is empty
|
||||
self.include_root_readme_file = not(get_settings()['PR_HELP_DOCS.EXCLUDE_ROOT_README'])
|
||||
self.supported_doc_exts = get_settings()['PR_HELP_DOCS.SUPPORTED_DOC_EXTS']
|
||||
self.docs_path = get_settings()['PR_HELP_DOCS.DOCS_PATH']
|
||||
def __init__(self, ctx_url, ai_handler:partial[BaseAiHandler,] = LiteLLMAIHandler, args: tuple[str]=None, return_as_string: bool=False):
|
||||
try:
|
||||
self.ctx_url = ctx_url
|
||||
self.question = args[0] if args else None
|
||||
self.return_as_string = return_as_string
|
||||
self.repo_url_given_explicitly = True
|
||||
self.repo_url = get_settings().get('PR_HELP_DOCS.REPO_URL', '')
|
||||
self.repo_desired_branch = get_settings().get('PR_HELP_DOCS.REPO_DEFAULT_BRANCH', 'main') #Ignored if self.repo_url is empty
|
||||
self.include_root_readme_file = not(get_settings()['PR_HELP_DOCS.EXCLUDE_ROOT_README'])
|
||||
self.supported_doc_exts = get_settings()['PR_HELP_DOCS.SUPPORTED_DOC_EXTS']
|
||||
self.docs_path = get_settings()['PR_HELP_DOCS.DOCS_PATH']
|
||||
|
||||
retrieved_settings = [self.include_root_readme_file, self.supported_doc_exts, self.docs_path]
|
||||
if any([setting is None for setting in retrieved_settings]):
|
||||
raise Exception(f"One of the settings is invalid: {retrieved_settings}")
|
||||
retrieved_settings = [self.include_root_readme_file, self.supported_doc_exts, self.docs_path]
|
||||
if any([setting is None for setting in retrieved_settings]):
|
||||
raise Exception(f"One of the settings is invalid: {retrieved_settings}")
|
||||
|
||||
self.git_provider = get_git_provider_with_context(ctx_url)
|
||||
if not self.git_provider:
|
||||
raise Exception(f"No git provider found at {ctx_url}")
|
||||
if not self.repo_url:
|
||||
self.repo_url_given_explicitly = False
|
||||
get_logger().debug(f"No explicit repo url provided, deducing it from type: {self.git_provider.__class__.__name__} "
|
||||
f"context url: {self.ctx_url}")
|
||||
self.repo_url = self.git_provider.get_git_repo_url(self.ctx_url)
|
||||
self.git_provider = get_git_provider_with_context(ctx_url)
|
||||
if not self.git_provider:
|
||||
raise Exception(f"No git provider found at {ctx_url}")
|
||||
if not self.repo_url:
|
||||
raise Exception(f"Unable to deduce repo url from type: {self.git_provider.__class__.__name__} url: {self.ctx_url}")
|
||||
get_logger().debug(f"deduced repo url: {self.repo_url}")
|
||||
self.repo_desired_branch = None #Inferred from the repo provider.
|
||||
self.repo_url_given_explicitly = False
|
||||
get_logger().debug(f"No explicit repo url provided, deducing it from type: {self.git_provider.__class__.__name__} "
|
||||
f"context url: {self.ctx_url}")
|
||||
self.repo_url = self.git_provider.get_git_repo_url(self.ctx_url)
|
||||
if not self.repo_url:
|
||||
raise Exception(f"Unable to deduce repo url from type: {self.git_provider.__class__.__name__} url: {self.ctx_url}")
|
||||
get_logger().debug(f"deduced repo url: {self.repo_url}")
|
||||
self.repo_desired_branch = None #Inferred from the repo provider.
|
||||
|
||||
self.ai_handler = ai_handler()
|
||||
self.vars = {
|
||||
"docs_url": self.repo_url,
|
||||
"question": self.question,
|
||||
"snippets": "",
|
||||
}
|
||||
self.token_handler = TokenHandler(None,
|
||||
self.vars,
|
||||
get_settings().pr_help_docs_prompts.system,
|
||||
get_settings().pr_help_docs_prompts.user)
|
||||
self.ai_handler = ai_handler()
|
||||
self.vars = {
|
||||
"docs_url": self.repo_url,
|
||||
"question": self.question,
|
||||
"snippets": "",
|
||||
}
|
||||
self.token_handler = TokenHandler(None,
|
||||
self.vars,
|
||||
get_settings().pr_help_docs_prompts.system,
|
||||
get_settings().pr_help_docs_prompts.user)
|
||||
except Exception as e:
|
||||
get_logger().exception(f"Caught exception during init. Setting self.question to None to prevent run() to do anything.")
|
||||
self.question = None
|
||||
|
||||
async def run(self):
|
||||
if not self.question:
|
||||
@ -246,7 +333,93 @@ class PRHelpDocs(object):
|
||||
|
||||
try:
|
||||
# Clone the repository and gather relevant documentation files.
|
||||
docs_prompt = None
|
||||
docs_filepath_to_contents = self._gen_filenames_to_contents_map_from_repo()
|
||||
|
||||
#Generate prompt for the AI model. This will be the full text of all the documentation files combined.
|
||||
docs_prompt = aggregate_documentation_files_for_prompt_contents(docs_filepath_to_contents)
|
||||
if not docs_filepath_to_contents or not docs_prompt:
|
||||
get_logger().warning(f"Could not find any usable documentation. Returning with no result...")
|
||||
return None
|
||||
docs_prompt_to_send_to_model = docs_prompt
|
||||
|
||||
# Estimate how many tokens will be needed.
|
||||
# In case the expected number of tokens exceeds LLM limits, retry with just headings, asking the LLM to rank according to relevance to the question.
|
||||
# Based on returned ranking, rerun but sort the documents accordingly, this time, trim in case of exceeding limit.
|
||||
|
||||
#First, check if the text is not too long to even query the LLM provider:
|
||||
max_allowed_txt_input = get_maximal_text_input_length_for_token_count_estimation()
|
||||
invoke_llm_just_with_headings = self._trim_docs_input(docs_prompt_to_send_to_model, max_allowed_txt_input,
|
||||
only_return_if_trim_needed=True)
|
||||
if invoke_llm_just_with_headings:
|
||||
#Entire docs is too long. Rank and return according to relevance.
|
||||
docs_prompt_to_send_to_model = await self._rank_docs_and_return_them_as_prompt(docs_filepath_to_contents,
|
||||
max_allowed_txt_input)
|
||||
|
||||
if not docs_prompt_to_send_to_model:
|
||||
get_logger().error("Failed to generate docs prompt for model. Returning with no result...")
|
||||
return
|
||||
# At this point, either all original documents be used (if their total length doesn't exceed limits), or only those selected.
|
||||
self.vars['snippets'] = docs_prompt_to_send_to_model.strip()
|
||||
# Run the AI model and extract sections from its response
|
||||
response = await retry_with_fallback_models(PredictionPreparator(self.ai_handler, self.vars,
|
||||
get_settings().pr_help_docs_prompts.system,
|
||||
get_settings().pr_help_docs_prompts.user),
|
||||
model_type=ModelType.REGULAR)
|
||||
response_yaml = load_yaml(response)
|
||||
if not response_yaml:
|
||||
get_logger().error("Failed to parse the AI response.", artifacts={'response': response})
|
||||
return
|
||||
response_str = response_yaml.get('response')
|
||||
relevant_sections = response_yaml.get('relevant_sections')
|
||||
if not response_str or not relevant_sections:
|
||||
get_logger().error("Failed to extract response/relevant sections.",
|
||||
artifacts={'raw_response': response, 'response_str': response_str, 'relevant_sections': relevant_sections})
|
||||
return
|
||||
if int(response_yaml.get('question_is_relevant', '1')) == 0:
|
||||
get_logger().warning(f"Question is not relevant. Returning without an answer...",
|
||||
artifacts={'raw_response': response})
|
||||
return
|
||||
|
||||
# Format the response as markdown
|
||||
answer_str = self._format_model_answer(response_str, relevant_sections)
|
||||
if self.return_as_string: #Skip publishing
|
||||
return answer_str
|
||||
#Otherwise, publish the answer if answer is non empty and publish is not turned off:
|
||||
if answer_str and get_settings().config.publish_output:
|
||||
self.git_provider.publish_comment(answer_str)
|
||||
else:
|
||||
get_logger().info("Answer:", artifacts={'answer_str': answer_str})
|
||||
return answer_str
|
||||
except Exception as e:
|
||||
get_logger().exception('failed to provide answer to given user question as a result of a thrown exception (see above)')
|
||||
|
||||
def _find_all_document_files_matching_exts(self, abs_docs_path: str, ignore_readme=False, max_allowed_files=5000) -> list[str]:
|
||||
try:
|
||||
matching_files = []
|
||||
|
||||
# Ensure extensions don't have leading dots and are lowercase
|
||||
dotless_extensions = [ext.lower().lstrip('.') for ext in self.supported_doc_exts]
|
||||
|
||||
# Walk through directory and subdirectories
|
||||
file_cntr = 0
|
||||
for root, _, files in os.walk(abs_docs_path):
|
||||
for file in files:
|
||||
if ignore_readme and root == abs_docs_path and file.lower() in [f"readme.{ext}" for ext in dotless_extensions]:
|
||||
continue
|
||||
# Check if file has one of the specified extensions
|
||||
if any(file.lower().endswith(f'.{ext}') for ext in dotless_extensions):
|
||||
file_cntr+=1
|
||||
matching_files.append(os.path.join(root, file))
|
||||
if file_cntr >= max_allowed_files:
|
||||
get_logger().warning(f"Found at least {max_allowed_files} files in {abs_docs_path}, skipping the rest.")
|
||||
return matching_files
|
||||
return matching_files
|
||||
except Exception as e:
|
||||
get_logger().exception(f"Unexpected exception thrown. Returning empty list.")
|
||||
return []
|
||||
|
||||
def _gen_filenames_to_contents_map_from_repo(self) -> dict[str, str]:
|
||||
try:
|
||||
with TemporaryDirectory() as tmp_dir:
|
||||
get_logger().debug(f"About to clone repository: {self.repo_url} to temporary directory: {tmp_dir}...")
|
||||
returned_cloned_repo_root = self.git_provider.clone(self.repo_url, tmp_dir, remove_dest_folder=False)
|
||||
@ -268,103 +441,120 @@ class PRHelpDocs(object):
|
||||
ignore_readme=(self.docs_path=='.')))
|
||||
if not doc_files:
|
||||
get_logger().warning(f"No documentation files found matching file extensions: "
|
||||
f"{self.supported_doc_exts} under repo: {self.repo_url} path: {self.docs_path}")
|
||||
return None
|
||||
f"{self.supported_doc_exts} under repo: {self.repo_url} "
|
||||
f"path: {self.docs_path}. Returning empty dict.")
|
||||
return {}
|
||||
|
||||
get_logger().info(f'Answering a question inside context {self.ctx_url} for repo: {self.repo_url}'
|
||||
f' using the following documentation files: ', artifacts={'doc_files': doc_files})
|
||||
get_logger().info(f'For context {self.ctx_url} and repo: {self.repo_url}'
|
||||
f' will be using the following documentation files: ',
|
||||
artifacts={'doc_files': doc_files})
|
||||
|
||||
docs_prompt = aggregate_documentation_files_for_prompt_contents(returned_cloned_repo_root.path, doc_files)
|
||||
if not docs_prompt:
|
||||
get_logger().warning(f"Error reading one of the documentation files. Returning with no result...")
|
||||
return None
|
||||
docs_prompt_to_send_to_model = docs_prompt
|
||||
return map_documentation_files_to_contents(returned_cloned_repo_root.path, doc_files)
|
||||
except Exception as e:
|
||||
get_logger().exception(f"Unexpected exception thrown. Returning empty dict.")
|
||||
return {}
|
||||
|
||||
# Estimate how many tokens will be needed. Trim in case of exceeding limit.
|
||||
# Firstly, check if text needs to be trimmed, as some models fail to return the estimated token count if the input text is too long.
|
||||
max_allowed_txt_input = get_maximal_text_input_length_for_token_count_estimation()
|
||||
if len(docs_prompt_to_send_to_model) >= max_allowed_txt_input:
|
||||
get_logger().warning(f"Text length: {len(docs_prompt_to_send_to_model)} exceeds the current returned limit of {max_allowed_txt_input} just for token count estimation. Trimming the text...")
|
||||
docs_prompt_to_send_to_model = docs_prompt_to_send_to_model[:max_allowed_txt_input]
|
||||
def _trim_docs_input(self, docs_input: str, max_allowed_txt_input: int, only_return_if_trim_needed=False) -> bool|str:
|
||||
try:
|
||||
if len(docs_input) >= max_allowed_txt_input:
|
||||
get_logger().warning(
|
||||
f"Text length: {len(docs_input)} exceeds the current returned limit of {max_allowed_txt_input} just for token count estimation. Trimming the text...")
|
||||
if only_return_if_trim_needed:
|
||||
return True
|
||||
docs_input = docs_input[:max_allowed_txt_input]
|
||||
# Then, count the tokens in the prompt. If the count exceeds the limit, trim the text.
|
||||
token_count = self.token_handler.count_tokens(docs_prompt_to_send_to_model, force_accurate=True)
|
||||
token_count = self.token_handler.count_tokens(docs_input, force_accurate=True)
|
||||
get_logger().debug(f"Estimated token count of documentation to send to model: {token_count}")
|
||||
model = get_settings().config.model
|
||||
if model in MAX_TOKENS:
|
||||
max_tokens_full = MAX_TOKENS[model] # note - here we take the actual max tokens, without any reductions. we do aim to get the full documentation website in the prompt
|
||||
max_tokens_full = MAX_TOKENS[
|
||||
model] # note - here we take the actual max tokens, without any reductions. we do aim to get the full documentation website in the prompt
|
||||
else:
|
||||
max_tokens_full = get_max_tokens(model)
|
||||
delta_output = 5000 #Elbow room to reduce chance of exceeding token limit or model paying less attention to prompt guidelines.
|
||||
delta_output = 5000 # Elbow room to reduce chance of exceeding token limit or model paying less attention to prompt guidelines.
|
||||
if token_count > max_tokens_full - delta_output:
|
||||
docs_prompt_to_send_to_model = clean_markdown_content(docs_prompt_to_send_to_model) #Reduce unnecessary text/images/etc.
|
||||
get_logger().info(f"Token count {token_count} exceeds the limit {max_tokens_full - delta_output}. Attempting to clip text to fit within the limit...")
|
||||
docs_prompt_to_send_to_model = clip_tokens(docs_prompt_to_send_to_model, max_tokens_full - delta_output,
|
||||
if only_return_if_trim_needed:
|
||||
return True
|
||||
docs_input = clean_markdown_content(
|
||||
docs_input) # Reduce unnecessary text/images/etc.
|
||||
get_logger().info(
|
||||
f"Token count {token_count} exceeds the limit {max_tokens_full - delta_output}. Attempting to clip text to fit within the limit...")
|
||||
docs_input = clip_tokens(docs_input, max_tokens_full - delta_output,
|
||||
num_input_tokens=token_count)
|
||||
self.vars['snippets'] = docs_prompt_to_send_to_model.strip()
|
||||
if only_return_if_trim_needed:
|
||||
return False
|
||||
return docs_input
|
||||
except Exception as e:
|
||||
# Unexpected exception. Rethrowing it since:
|
||||
# 1. This is an internal function.
|
||||
# 2. An empty str/False result is a valid one - would require now checking also for None.
|
||||
get_logger().exception(f"Unexpected exception thrown. Rethrowing it...")
|
||||
raise e
|
||||
|
||||
async def _rank_docs_and_return_them_as_prompt(self, docs_filepath_to_contents: dict[str, str], max_allowed_txt_input: int) -> str:
|
||||
try:
|
||||
#Return just file name and their headings (if exist):
|
||||
docs_prompt_to_send_to_model = (
|
||||
aggregate_documentation_files_for_prompt_contents(docs_filepath_to_contents,
|
||||
return_just_headings=True))
|
||||
# Verify list of headings does not exceed limits - trim it if it does.
|
||||
docs_prompt_to_send_to_model = self._trim_docs_input(docs_prompt_to_send_to_model, max_allowed_txt_input,
|
||||
only_return_if_trim_needed=False)
|
||||
if not docs_prompt_to_send_to_model:
|
||||
get_logger().error("_trim_docs_input returned an empty result.")
|
||||
return ""
|
||||
|
||||
self.vars['snippets'] = docs_prompt_to_send_to_model.strip()
|
||||
# Run the AI model and extract sections from its response
|
||||
response = await retry_with_fallback_models(PredictionPreparator(self.ai_handler, self.vars,
|
||||
get_settings().pr_help_docs_prompts.system,
|
||||
get_settings().pr_help_docs_prompts.user),
|
||||
get_settings().pr_help_docs_headings_prompts.system,
|
||||
get_settings().pr_help_docs_headings_prompts.user),
|
||||
model_type=ModelType.REGULAR)
|
||||
response_yaml = load_yaml(response)
|
||||
if not response_yaml:
|
||||
get_logger().exception("Failed to parse the AI response.", artifacts={'response': response})
|
||||
raise Exception(f"Failed to parse the AI response.")
|
||||
response_str = response_yaml.get('response')
|
||||
relevant_sections = response_yaml.get('relevant_sections')
|
||||
if not response_str or not relevant_sections:
|
||||
get_logger().exception("Failed to extract response/relevant sections.",
|
||||
artifacts={'response_str': response_str, 'relevant_sections': relevant_sections})
|
||||
raise Exception(f"Failed to extract response/relevant sections.")
|
||||
get_logger().error("Failed to parse the AI response.", artifacts={'response': response})
|
||||
return ""
|
||||
# else: Sanitize the output so that the file names match 1:1 dictionary keys. Do this via the file index and not its name, which may be altered by the model.
|
||||
valid_indices = [int(entry['idx']) for entry in response_yaml.get('relevant_files_ranking')
|
||||
if int(entry['idx']) >= 0 and int(entry['idx']) < len(docs_filepath_to_contents)]
|
||||
valid_file_paths = [list(docs_filepath_to_contents.keys())[idx] for idx in valid_indices]
|
||||
selected_docs_dict = {file_path: docs_filepath_to_contents[file_path] for file_path in valid_file_paths}
|
||||
docs_prompt = aggregate_documentation_files_for_prompt_contents(selected_docs_dict)
|
||||
docs_prompt_to_send_to_model = docs_prompt
|
||||
|
||||
# Format the response as markdown
|
||||
canonical_url_prefix, canonical_url_suffix = self.git_provider.get_canonical_url_parts(repo_git_url=self.repo_url if self.repo_url_given_explicitly else None,
|
||||
desired_branch=self.repo_desired_branch)
|
||||
answer_str = format_markdown_q_and_a_response(self.question, response_str, relevant_sections, self.supported_doc_exts, canonical_url_prefix, canonical_url_suffix)
|
||||
# Check if the updated list of documents does not exceed limits and trim if it does:
|
||||
docs_prompt_to_send_to_model = self._trim_docs_input(docs_prompt_to_send_to_model, max_allowed_txt_input,
|
||||
only_return_if_trim_needed=False)
|
||||
if not docs_prompt_to_send_to_model:
|
||||
get_logger().error("_trim_docs_input returned an empty result.")
|
||||
return ""
|
||||
return docs_prompt_to_send_to_model
|
||||
except Exception as e:
|
||||
get_logger().exception(f"Unexpected exception thrown. Returning empty result.")
|
||||
return ""
|
||||
|
||||
def _format_model_answer(self, response_str: str, relevant_sections: list[dict[str, str]]) -> str:
|
||||
try:
|
||||
canonical_url_prefix, canonical_url_suffix = (
|
||||
self.git_provider.get_canonical_url_parts(repo_git_url=self.repo_url if self.repo_url_given_explicitly else None,
|
||||
desired_branch=self.repo_desired_branch))
|
||||
answer_str = format_markdown_q_and_a_response(self.question, response_str, relevant_sections,
|
||||
self.supported_doc_exts, canonical_url_prefix, canonical_url_suffix)
|
||||
if answer_str:
|
||||
#Remove the question phrase and replace with light bulb and a heading mentioning this is an automated answer:
|
||||
answer_str = modify_answer_section(answer_str)
|
||||
# For PR help docs, we return the answer string instead of publishing it
|
||||
#In case the response should not be published and returned as string, stop here:
|
||||
if answer_str and self.return_as_string:
|
||||
if int(response_yaml.get('question_is_relevant', '1')) == 0:
|
||||
get_logger().warning(f"Chat help docs answer would be ignored due to an invalid question.",
|
||||
artifacts={'answer_str': answer_str})
|
||||
return ""
|
||||
get_logger().info(f"Chat help docs answer", artifacts={'answer_str': answer_str})
|
||||
return answer_str
|
||||
|
||||
# Publish the answer
|
||||
if not answer_str or int(response_yaml.get('question_is_relevant', '1')) == 0:
|
||||
if not answer_str:
|
||||
get_logger().info(f"No answer found")
|
||||
return ""
|
||||
|
||||
if self.git_provider.is_supported("gfm_markdown") and get_settings().pr_help_docs.enable_help_text:
|
||||
answer_str += "<hr>\n\n<details> <summary><strong>💡 Tool usage guide:</strong></summary><hr> \n\n"
|
||||
answer_str += HelpMessage.get_help_docs_usage_guide()
|
||||
answer_str += "\n</details>\n"
|
||||
|
||||
if get_settings().config.publish_output:
|
||||
self.git_provider.publish_comment(answer_str)
|
||||
else:
|
||||
get_logger().info("Answer:", artifacts={'answer_str': answer_str})
|
||||
|
||||
except:
|
||||
get_logger().exception('failed to provide answer to given user question as a result of a thrown exception (see above)')
|
||||
|
||||
|
||||
def _find_all_document_files_matching_exts(self, abs_docs_path: str, ignore_readme=False) -> List[str]:
|
||||
matching_files = []
|
||||
|
||||
# Ensure extensions don't have leading dots and are lowercase
|
||||
dotless_extensions = [ext.lower().lstrip('.') for ext in self.supported_doc_exts]
|
||||
|
||||
# Walk through directory and subdirectories
|
||||
for root, _, files in os.walk(abs_docs_path):
|
||||
for file in files:
|
||||
if ignore_readme and root == abs_docs_path and file.lower() in [f"readme.{ext}" for ext in dotless_extensions]:
|
||||
continue
|
||||
# Check if file has one of the specified extensions
|
||||
if any(file.lower().endswith(f'.{ext}') for ext in dotless_extensions):
|
||||
matching_files.append(os.path.join(root, file))
|
||||
return matching_files
|
||||
return answer_str
|
||||
except Exception as e:
|
||||
get_logger().exception(f"Unexpected exception thrown. Returning empty result.")
|
||||
return ""
|
||||
|
@ -9,7 +9,7 @@ from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
|
||||
from pr_agent.algo.token_handler import TokenHandler
|
||||
from pr_agent.algo.utils import ModelType
|
||||
from pr_agent.config_loader import get_settings
|
||||
from pr_agent.git_providers import get_git_provider
|
||||
from pr_agent.git_providers import get_git_provider, GitLabProvider
|
||||
from pr_agent.git_providers.git_provider import get_main_pr_language
|
||||
from pr_agent.log import get_logger
|
||||
from pr_agent.servers.help import HelpMessage
|
||||
@ -116,10 +116,22 @@ class PRQuestions:
|
||||
model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt)
|
||||
return response
|
||||
|
||||
def gitlab_protections(self, model_answer: str) -> str:
|
||||
github_quick_actions_MR = ["/approve", "/close", "/merge", "/reopen", "/unapprove", "/title", "/assign",
|
||||
"/copy_metadata", "/target_branch"]
|
||||
if any(action in model_answer for action in github_quick_actions_MR):
|
||||
str_err = "Model answer contains GitHub quick actions, which are not supported in GitLab"
|
||||
get_logger().error(str_err)
|
||||
return str_err
|
||||
return model_answer
|
||||
|
||||
def _prepare_pr_answer(self) -> str:
|
||||
model_answer = self.prediction.strip()
|
||||
# sanitize the answer so that no line will start with "/"
|
||||
model_answer_sanitized = model_answer.replace("\n/", "\n /")
|
||||
model_answer_sanitized = model_answer_sanitized.replace("\r/", "\r /")
|
||||
if isinstance(self.git_provider, GitLabProvider):
|
||||
model_answer_sanitized = self.gitlab_protections(model_answer_sanitized)
|
||||
if model_answer_sanitized.startswith("/"):
|
||||
model_answer_sanitized = " " + model_answer_sanitized
|
||||
if model_answer_sanitized != model_answer:
|
||||
|
@ -229,6 +229,10 @@ class PRReviewer:
|
||||
first_key=first_key, last_key=last_key)
|
||||
github_action_output(data, 'review')
|
||||
|
||||
if 'review' not in data:
|
||||
get_logger().exception("Failed to parse review data", artifact={"data": data})
|
||||
return ""
|
||||
|
||||
# move data['review'] 'key_issues_to_review' key to the end of the dictionary
|
||||
if 'key_issues_to_review' in data['review']:
|
||||
key_issues_to_review = data['review'].pop('key_issues_to_review')
|
||||
|
@ -140,11 +140,15 @@ class PRUpdateChangelog:
|
||||
return new_file_content, answer
|
||||
|
||||
def _push_changelog_update(self, new_file_content, answer):
|
||||
if get_settings().pr_update_changelog.get("skip_ci_on_push", True):
|
||||
commit_message = "[skip ci] Update CHANGELOG.md"
|
||||
else:
|
||||
commit_message = "Update CHANGELOG.md"
|
||||
self.git_provider.create_or_update_pr_file(
|
||||
file_path="CHANGELOG.md",
|
||||
branch=self.git_provider.get_pr_branch(),
|
||||
contents=new_file_content,
|
||||
message="[skip ci] Update CHANGELOG.md",
|
||||
message=commit_message,
|
||||
)
|
||||
|
||||
sleep(5) # wait for the file to be updated
|
||||
|
@ -13,7 +13,7 @@ google-cloud-aiplatform==1.38.0
|
||||
google-generativeai==0.8.3
|
||||
google-cloud-storage==2.10.0
|
||||
Jinja2==3.1.2
|
||||
litellm==1.61.20
|
||||
litellm==1.66.1
|
||||
loguru==0.7.2
|
||||
msrest==0.7.1
|
||||
openai>=1.55.3
|
||||
|
Reference in New Issue
Block a user