mirror of
https://github.com/qodo-ai/pr-agent.git
synced 2025-07-05 13:20:39 +08:00
stable
This commit is contained in:
@ -100,7 +100,7 @@ polling_interval_seconds = 30
|
|||||||
[pr_similar_issue]
|
[pr_similar_issue]
|
||||||
skip_comments = false
|
skip_comments = false
|
||||||
force_update_dataset = false
|
force_update_dataset = false
|
||||||
max_issues_to_scan = 1000
|
max_issues_to_scan = 500
|
||||||
|
|
||||||
[pinecone]
|
[pinecone]
|
||||||
# fill and place in .secrets.toml
|
# fill and place in .secrets.toml
|
||||||
|
@ -176,24 +176,24 @@ class PRSimilarIssue:
|
|||||||
level=IssueLevel.ISSUE)
|
level=IssueLevel.ISSUE)
|
||||||
)
|
)
|
||||||
corpus.append(issue_record)
|
corpus.append(issue_record)
|
||||||
if comments:
|
if comments:
|
||||||
for j, comment in enumerate(comments):
|
for j, comment in enumerate(comments):
|
||||||
comment_body = comment.body
|
comment_body = comment.body
|
||||||
num_words_comment = len(comment_body.split())
|
num_words_comment = len(comment_body.split())
|
||||||
if num_words_comment < 10 or not isinstance(comment_body, str):
|
if num_words_comment < 10 or not isinstance(comment_body, str):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if len(issue_str) < 8000 or \
|
if len(comment_body) < 8000 or \
|
||||||
self.token_handler.count_tokens(comment_body) < MAX_TOKENS[MODEL]:
|
self.token_handler.count_tokens(comment_body) < MAX_TOKENS[MODEL]:
|
||||||
comment_record = Record(
|
comment_record = Record(
|
||||||
id=issue_key + ".comment_" + str(j + 1),
|
id=issue_key + ".comment_" + str(j + 1),
|
||||||
text=comment_body,
|
text=comment_body,
|
||||||
metadata=Metadata(repo=repo_name_for_index,
|
metadata=Metadata(repo=repo_name_for_index,
|
||||||
username=username, # use issue username for all comments
|
username=username, # use issue username for all comments
|
||||||
created_at=created_at,
|
created_at=created_at,
|
||||||
level=IssueLevel.COMMENT)
|
level=IssueLevel.COMMENT)
|
||||||
)
|
)
|
||||||
corpus.append(comment_record)
|
corpus.append(comment_record)
|
||||||
df = pd.DataFrame(corpus.dict()["documents"])
|
df = pd.DataFrame(corpus.dict()["documents"])
|
||||||
logging.info('Done')
|
logging.info('Done')
|
||||||
|
|
||||||
@ -205,9 +205,13 @@ class PRSimilarIssue:
|
|||||||
embeds = [record['embedding'] for record in res['data']]
|
embeds = [record['embedding'] for record in res['data']]
|
||||||
except:
|
except:
|
||||||
embeds = []
|
embeds = []
|
||||||
|
logging.error('Failed to embed entire list, embedding one by one...')
|
||||||
for i, text in enumerate(list_to_encode):
|
for i, text in enumerate(list_to_encode):
|
||||||
res = openai.Embedding.create(input=[text], engine=MODEL)
|
try:
|
||||||
embeds.append(res['data'][0]['embedding'])
|
res = openai.Embedding.create(input=[text], engine=MODEL)
|
||||||
|
embeds.append(res['data'][0]['embedding'])
|
||||||
|
except:
|
||||||
|
embeds.append([0] * 1536)
|
||||||
df["values"] = embeds
|
df["values"] = embeds
|
||||||
meta = DatasetMetadata.empty()
|
meta = DatasetMetadata.empty()
|
||||||
meta.dense_model.dimension = len(embeds[0])
|
meta.dense_model.dimension = len(embeds[0])
|
||||||
|
Reference in New Issue
Block a user