From 5c7b65810c952728fea911d461840d938ee75605 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sat, 21 Sep 2024 19:11:46 +0300 Subject: [PATCH] Refactor S3 file handling and update Dockerfile to include local Chroma DB file --- docker/Dockerfile | 1 + pr_agent/tools/pr_help_message.py | 27 +++++++++++---------------- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 601e16ec..07f74cb5 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,6 +1,7 @@ FROM python:3.12.3 AS base WORKDIR /app +ADD docs/chroma_db.zip /app/docs/chroma_db.zip ADD pyproject.toml . ADD requirements.txt . RUN pip install . && rm pyproject.toml requirements.txt diff --git a/pr_agent/tools/pr_help_message.py b/pr_agent/tools/pr_help_message.py index 6dce844c..7bad18cd 100644 --- a/pr_agent/tools/pr_help_message.py +++ b/pr_agent/tools/pr_help_message.py @@ -71,18 +71,19 @@ class PRHelpMessage: sim_results = [] try: from langchain_chroma import Chroma - import boto3 + from urllib import request with tempfile.TemporaryDirectory() as temp_dir: # Define the local file path within the temporary directory local_file_path = os.path.join(temp_dir, 'chroma_db.zip') - # Initialize the S3 client - s3 = boto3.client('s3') - - # Download the file from S3 to the temporary directory bucket = 'pr-agent' file_name = 'chroma_db.zip' - s3.download_file(bucket, file_name, local_file_path) + s3_url = f'https://{bucket}.s3.amazonaws.com/{file_name}' + request.urlretrieve(s3_url, local_file_path) + + # # Download the file from S3 to the temporary directory + # s3 = boto3.client('s3') + # s3.download_file(bucket, file_name, local_file_path) # Extract the contents of the zip file with zipfile.ZipFile(local_file_path, 'r') as zip_ref: @@ -102,8 +103,11 @@ class PRHelpMessage: try: from langchain_chroma import Chroma get_logger().info("Loading the Chroma index...") + db_path = "./docs/chroma_db.zip" + if not os.path.exists(db_path): + get_logger().error("Local db not found") + return sim_results with tempfile.TemporaryDirectory() as temp_dir: - db_path = "./docs/chroma_db.zip" # Extract the ZIP file with zipfile.ZipFile(db_path, 'r') as zip_ref: @@ -302,8 +306,6 @@ class PRHelpMessage: async def prepare_relevant_snippets(self, sim_results): # Get relevant snippets - relevant_pages = [] - relevant_snippets = [] relevant_snippets_full = [] relevant_pages_full = [] relevant_snippets_full_header = [] @@ -315,13 +317,6 @@ class PRHelpMessage: relevant_snippets_full.append(content) relevant_snippets_full_header.append(extract_header(content)) relevant_pages_full.append(page) - if not relevant_pages: - relevant_pages.append(page) - relevant_snippets.append(content) - elif score > th: - if page not in relevant_pages: - relevant_pages.append(page) - relevant_snippets.append(content) # build the snippets string relevant_snippets_str = "" for i, s in enumerate(relevant_snippets_full):