diff --git a/pr_agent/tools/pr_similar_issue.py b/pr_agent/tools/pr_similar_issue.py index 7cf7b31b..025c16fe 100644 --- a/pr_agent/tools/pr_similar_issue.py +++ b/pr_agent/tools/pr_similar_issue.py @@ -1,3 +1,4 @@ +import time from enum import Enum from typing import List @@ -44,6 +45,13 @@ class PRSimilarIssue: # check if index exists, and if repo is already indexed run_from_scratch = False + if run_from_scratch: + if not index_name in pinecone.list_indexes(): + get_logger().info('Removing index...') + pinecone.init(api_key=api_key, environment=environment) + pinecone.delete_index(index_name) + get_logger().info('Done') + upsert = True pinecone.init(api_key=api_key, environment=environment) if not index_name in pinecone.list_indexes(): @@ -116,7 +124,16 @@ class PRSimilarIssue: relevant_comment_number_list = [] score_list = [] for r in res['matches']: - issue_number = int(r["id"].split('.')[0].split('_')[-1]) + # skip example issue + if 'example_issue_' in r["id"]: + continue + + try: + issue_number = int(r["id"].split('.')[0].split('_')[-1]) + except: + get_logger().debug(f"Failed to parse issue number from {r['id']}") + continue + if original_issue_number == issue_number: continue if issue_number not in relevant_issues_number_list: @@ -237,6 +254,7 @@ class PRSimilarIssue: if not upsert: get_logger().info('Creating index from scratch...') ds.to_pinecone_index(self.index_name, api_key=api_key, environment=environment) + time.sleep(15) # wait for pinecone to finalize indexing before querying else: get_logger().info('Upserting index...') namespace = "" @@ -244,6 +262,7 @@ class PRSimilarIssue: concurrency: int = 10 pinecone.init(api_key=api_key, environment=environment) ds._upsert_to_index(self.index_name, namespace, batch_size, concurrency) + time.sleep(5) # wait for pinecone to finalize upserting before querying get_logger().info('Done')