This commit is contained in:
mrT23
2023-09-06 09:43:23 +03:00
parent bc95cf5b8e
commit 8962c9cf8a
2 changed files with 24 additions and 20 deletions

View File

@ -100,7 +100,7 @@ polling_interval_seconds = 30
[pr_similar_issue] [pr_similar_issue]
skip_comments = false skip_comments = false
force_update_dataset = false force_update_dataset = false
max_issues_to_scan = 1000 max_issues_to_scan = 500
[pinecone] [pinecone]
# fill and place in .secrets.toml # fill and place in .secrets.toml

View File

@ -183,7 +183,7 @@ class PRSimilarIssue:
if num_words_comment < 10 or not isinstance(comment_body, str): if num_words_comment < 10 or not isinstance(comment_body, str):
continue continue
if len(issue_str) < 8000 or \ if len(comment_body) < 8000 or \
self.token_handler.count_tokens(comment_body) < MAX_TOKENS[MODEL]: self.token_handler.count_tokens(comment_body) < MAX_TOKENS[MODEL]:
comment_record = Record( comment_record = Record(
id=issue_key + ".comment_" + str(j + 1), id=issue_key + ".comment_" + str(j + 1),
@ -205,9 +205,13 @@ class PRSimilarIssue:
embeds = [record['embedding'] for record in res['data']] embeds = [record['embedding'] for record in res['data']]
except: except:
embeds = [] embeds = []
logging.error('Failed to embed entire list, embedding one by one...')
for i, text in enumerate(list_to_encode): for i, text in enumerate(list_to_encode):
try:
res = openai.Embedding.create(input=[text], engine=MODEL) res = openai.Embedding.create(input=[text], engine=MODEL)
embeds.append(res['data'][0]['embedding']) embeds.append(res['data'][0]['embedding'])
except:
embeds.append([0] * 1536)
df["values"] = embeds df["values"] = embeds
meta = DatasetMetadata.empty() meta = DatasetMetadata.empty()
meta.dense_model.dimension = len(embeds[0]) meta.dense_model.dimension = len(embeds[0])