mirror of
https://github.com/qodo-ai/pr-agent.git
synced 2025-07-06 13:50:44 +08:00
Merge pull request #1244 from Codium-ai/tr/help_rag
Refactor S3 file handling and update Dockerfile to include local Chroma DB file
This commit is contained in:
@ -1,6 +1,7 @@
|
|||||||
FROM python:3.12.3 AS base
|
FROM python:3.12.3 AS base
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
ADD docs/chroma_db.zip /app/docs/chroma_db.zip
|
||||||
ADD pyproject.toml .
|
ADD pyproject.toml .
|
||||||
ADD requirements.txt .
|
ADD requirements.txt .
|
||||||
RUN pip install . && rm pyproject.toml requirements.txt
|
RUN pip install . && rm pyproject.toml requirements.txt
|
||||||
|
BIN
docs/chroma_db.zip
Normal file
BIN
docs/chroma_db.zip
Normal file
Binary file not shown.
@ -71,18 +71,19 @@ class PRHelpMessage:
|
|||||||
sim_results = []
|
sim_results = []
|
||||||
try:
|
try:
|
||||||
from langchain_chroma import Chroma
|
from langchain_chroma import Chroma
|
||||||
import boto3
|
from urllib import request
|
||||||
with tempfile.TemporaryDirectory() as temp_dir:
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
# Define the local file path within the temporary directory
|
# Define the local file path within the temporary directory
|
||||||
local_file_path = os.path.join(temp_dir, 'chroma_db.zip')
|
local_file_path = os.path.join(temp_dir, 'chroma_db.zip')
|
||||||
|
|
||||||
# Initialize the S3 client
|
|
||||||
s3 = boto3.client('s3')
|
|
||||||
|
|
||||||
# Download the file from S3 to the temporary directory
|
|
||||||
bucket = 'pr-agent'
|
bucket = 'pr-agent'
|
||||||
file_name = 'chroma_db.zip'
|
file_name = 'chroma_db.zip'
|
||||||
s3.download_file(bucket, file_name, local_file_path)
|
s3_url = f'https://{bucket}.s3.amazonaws.com/{file_name}'
|
||||||
|
request.urlretrieve(s3_url, local_file_path)
|
||||||
|
|
||||||
|
# # Download the file from S3 to the temporary directory
|
||||||
|
# s3 = boto3.client('s3')
|
||||||
|
# s3.download_file(bucket, file_name, local_file_path)
|
||||||
|
|
||||||
# Extract the contents of the zip file
|
# Extract the contents of the zip file
|
||||||
with zipfile.ZipFile(local_file_path, 'r') as zip_ref:
|
with zipfile.ZipFile(local_file_path, 'r') as zip_ref:
|
||||||
@ -102,8 +103,11 @@ class PRHelpMessage:
|
|||||||
try:
|
try:
|
||||||
from langchain_chroma import Chroma
|
from langchain_chroma import Chroma
|
||||||
get_logger().info("Loading the Chroma index...")
|
get_logger().info("Loading the Chroma index...")
|
||||||
with tempfile.TemporaryDirectory() as temp_dir:
|
|
||||||
db_path = "./docs/chroma_db.zip"
|
db_path = "./docs/chroma_db.zip"
|
||||||
|
if not os.path.exists(db_path):
|
||||||
|
get_logger().error("Local db not found")
|
||||||
|
return sim_results
|
||||||
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
|
|
||||||
# Extract the ZIP file
|
# Extract the ZIP file
|
||||||
with zipfile.ZipFile(db_path, 'r') as zip_ref:
|
with zipfile.ZipFile(db_path, 'r') as zip_ref:
|
||||||
@ -302,8 +306,6 @@ class PRHelpMessage:
|
|||||||
|
|
||||||
async def prepare_relevant_snippets(self, sim_results):
|
async def prepare_relevant_snippets(self, sim_results):
|
||||||
# Get relevant snippets
|
# Get relevant snippets
|
||||||
relevant_pages = []
|
|
||||||
relevant_snippets = []
|
|
||||||
relevant_snippets_full = []
|
relevant_snippets_full = []
|
||||||
relevant_pages_full = []
|
relevant_pages_full = []
|
||||||
relevant_snippets_full_header = []
|
relevant_snippets_full_header = []
|
||||||
@ -315,13 +317,6 @@ class PRHelpMessage:
|
|||||||
relevant_snippets_full.append(content)
|
relevant_snippets_full.append(content)
|
||||||
relevant_snippets_full_header.append(extract_header(content))
|
relevant_snippets_full_header.append(extract_header(content))
|
||||||
relevant_pages_full.append(page)
|
relevant_pages_full.append(page)
|
||||||
if not relevant_pages:
|
|
||||||
relevant_pages.append(page)
|
|
||||||
relevant_snippets.append(content)
|
|
||||||
elif score > th:
|
|
||||||
if page not in relevant_pages:
|
|
||||||
relevant_pages.append(page)
|
|
||||||
relevant_snippets.append(content)
|
|
||||||
# build the snippets string
|
# build the snippets string
|
||||||
relevant_snippets_str = ""
|
relevant_snippets_str = ""
|
||||||
for i, s in enumerate(relevant_snippets_full):
|
for i, s in enumerate(relevant_snippets_full):
|
||||||
|
Reference in New Issue
Block a user