From 6693aa3cbc278d1879c9afa0b3fbc6ced6a71f52 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 20 Aug 2023 15:01:06 +0300 Subject: [PATCH 01/16] semi stable --- pr_agent/agent/pr_agent.py | 4 +- pr_agent/algo/utils.py | 8 ++- pr_agent/git_providers/github_provider.py | 29 +++++++++ pr_agent/tools/pr_similar_issue.py | 77 +++++++++++++++++++++++ 4 files changed, 115 insertions(+), 3 deletions(-) create mode 100644 pr_agent/tools/pr_similar_issue.py diff --git a/pr_agent/agent/pr_agent.py b/pr_agent/agent/pr_agent.py index 2ab13d69..c1ab4803 100644 --- a/pr_agent/agent/pr_agent.py +++ b/pr_agent/agent/pr_agent.py @@ -9,6 +9,7 @@ from pr_agent.git_providers import get_git_provider from pr_agent.tools.pr_code_suggestions import PRCodeSuggestions from pr_agent.tools.pr_description import PRDescription from pr_agent.tools.pr_information_from_user import PRInformationFromUser +from pr_agent.tools.pr_similar_issue import PRSimilarIssue from pr_agent.tools.pr_questions import PRQuestions from pr_agent.tools.pr_reviewer import PRReviewer from pr_agent.tools.pr_update_changelog import PRUpdateChangelog @@ -29,6 +30,7 @@ command2class = { "update_changelog": PRUpdateChangelog, "config": PRConfig, "settings": PRConfig, + "similar_issue": PRSimilarIssue, } commands = list(command2class.keys()) @@ -73,7 +75,7 @@ class PRAgent: elif action in command2class: if notify: notify() - await command2class[action](pr_url, args=args).run() + await command2class[action](pr_url, *args).run() else: return False return True diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index 725d75ec..14fdda59 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -253,8 +253,12 @@ def update_settings_from_args(args: List[str]) -> List[str]: key, value = vals key = key.strip().upper() value = value.strip() - get_settings().set(key, value) - logging.info(f'Updated setting {key} to: "{value}"') + if key in get_settings(): + get_settings().set(key, value) + logging.info(f'Updated setting {key} to: "{value}"') + else: + logging.info(f'No argument: {key}') + other_args.append(arg) else: other_args.append(arg) return other_args diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index be0fa645..c010158d 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -312,6 +312,35 @@ class GithubProvider(GitProvider): return repo_name, pr_number + @staticmethod + def _parse_issue_url(issue_url: str) -> Tuple[str, int]: + parsed_url = urlparse(issue_url) + + if 'github.com' not in parsed_url.netloc: + raise ValueError("The provided URL is not a valid GitHub URL") + + path_parts = parsed_url.path.strip('/').split('/') + if 'api.github.com' in parsed_url.netloc: + if len(path_parts) < 5 or path_parts[3] != 'issues': + raise ValueError("The provided URL does not appear to be a GitHub ISSUE URL") + repo_name = '/'.join(path_parts[1:3]) + try: + issue_number = int(path_parts[4]) + except ValueError as e: + raise ValueError("Unable to convert issue number to integer") from e + return repo_name, issue_number + + if len(path_parts) < 4 or path_parts[2] != 'issues': + raise ValueError("The provided URL does not appear to be a GitHub PR issue") + + repo_name = '/'.join(path_parts[:2]) + try: + issue_number = int(path_parts[3]) + except ValueError as e: + raise ValueError("Unable to convert issue number to integer") from e + + return repo_name, issue_number + def _get_github_client(self): deployment_type = get_settings().get("GITHUB.DEPLOYMENT_TYPE", "user") diff --git a/pr_agent/tools/pr_similar_issue.py b/pr_agent/tools/pr_similar_issue.py new file mode 100644 index 00000000..497f2f5d --- /dev/null +++ b/pr_agent/tools/pr_similar_issue.py @@ -0,0 +1,77 @@ +import copy +import json +import logging +from typing import List, Tuple + +from jinja2 import Environment, StrictUndefined + +from pr_agent.algo.ai_handler import AiHandler +from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models +from pr_agent.algo.token_handler import TokenHandler +from pr_agent.algo.utils import load_yaml +from pr_agent.config_loader import get_settings +from pr_agent.git_providers import get_git_provider +from pr_agent.git_providers.git_provider import get_main_pr_language + + +class PRSimilarIssue: + def __init__(self, pr_url: str, issue_url: str, args: list = None): + load_data_from_local = True + if not load_data_from_local: + self.git_provider = get_git_provider()() + repo_name, issue_number = self.git_provider._parse_issue_url(issue_url.split('=')[-1]) + self.git_provider.repo = repo_name + self.git_provider.repo_obj = self.git_provider.github_client.get_repo(repo_name) + repo_obj = self.git_provider.repo_obj + + def _process_issue(issue): + header = body = issue_str = comments_str = "" + if issue.pull_request: + return header, body, issue_str, comments_str + header = issue.title + body = issue.body + comments_obj = list(issue.get_comments()) + comments_str = "" + for i, comment in enumerate(comments_obj): + comments_str += f"comment {i}:\n{comment.body}\n\n\n" + issue_str = f"Issue Header: \"{header}\"\n\nIssue Body:\n{body}" + return header, body, issue_str, comments_str + + main_issue = repo_obj.get_issue(issue_number) + assert not main_issue.pull_request + _, _, main_issue_str, main_comments_str = _process_issue(main_issue) + + issues_str_list = [] + comments_str_list = [] + issues = list(repo_obj.get_issues(state='all')) # 'open', 'closed', 'all' + for i, issue in enumerate(issues): + if issue.url == main_issue.url: + continue + if issue.pull_request: + continue + _, _, issue_str, comments_str = _process_issue(issue) + issues_str_list.append(issue_str) + comments_str_list.append(comments_str) + + json_output = {} + json_output['main_issue'] = {} + json_output['main_issue']['issue'] = main_issue_str + json_output['main_issue']['comment'] = main_comments_str + json_output['issues'] = {} + for i in range(len(issues_str_list)): + json_output['issues'][f'issue_{i}'] = {} + json_output['issues'][f'issue_{i}']['issue'] = issues_str_list[i] + json_output['issues'][f'issue_{i}'][f'comments'] = comments_str_list[i] + + jsonFile = open("/Users/talrid/Desktop/issues_data.json", "w") + jsonFile.write(json.dumps(json_output)) + jsonFile.close() + else: + jsonFile = open("/Users/talrid/Desktop/issues_data.json", "r") + json_output=json.loads(jsonFile.read()) + + from langchain.document_loaders import TextLoader + from langchain.text_splitter import CharacterTextSplitter + text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) + + aaa=3 From 901c1dc3f0e170e0c9b88a59e1cde9ab3a3de789 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Tue, 5 Sep 2023 08:40:05 +0300 Subject: [PATCH 02/16] issue tool --- pr_agent/algo/__init__.py | 1 + pr_agent/algo/token_handler.py | 5 +- pr_agent/algo/utils.py | 43 ++-- pr_agent/cli.py | 10 +- pr_agent/git_providers/github_provider.py | 2 +- pr_agent/settings/configuration.toml | 12 +- pr_agent/tools/pr_similar_issue.py | 293 +++++++++++++++++----- requirements.txt | 4 +- 8 files changed, 287 insertions(+), 83 deletions(-) diff --git a/pr_agent/algo/__init__.py b/pr_agent/algo/__init__.py index 798fc6c5..82a2af40 100644 --- a/pr_agent/algo/__init__.py +++ b/pr_agent/algo/__init__.py @@ -1,4 +1,5 @@ MAX_TOKENS = { + 'text-embedding-ada-002': 8000, 'gpt-3.5-turbo': 4000, 'gpt-3.5-turbo-0613': 4000, 'gpt-3.5-turbo-0301': 4000, diff --git a/pr_agent/algo/token_handler.py b/pr_agent/algo/token_handler.py index f018a92b..d7eff9d7 100644 --- a/pr_agent/algo/token_handler.py +++ b/pr_agent/algo/token_handler.py @@ -21,7 +21,7 @@ class TokenHandler: method. """ - def __init__(self, pr, vars: dict, system, user): + def __init__(self, pr=None, vars: dict = {}, system="", user=""): """ Initializes the TokenHandler object. @@ -32,7 +32,8 @@ class TokenHandler: - user: The user string. """ self.encoder = get_token_encoder() - self.prompt_tokens = self._get_system_user_tokens(pr, self.encoder, vars, system, user) + if pr is not None: + self.prompt_tokens = self._get_system_user_tokens(pr, self.encoder, vars, system, user) def _get_system_user_tokens(self, pr, encoder, vars: dict, system, user): """ diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index 0124c3d6..7ac4b468 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -32,33 +32,37 @@ def convert_to_markdown(output_data: dict) -> str: emojis = { "Main theme": "๐ŸŽฏ", + "PR summary": "๐Ÿ“", "Type of PR": "๐Ÿ“Œ", "Score": "๐Ÿ…", "Relevant tests added": "๐Ÿงช", "Unrelated changes": "โš ๏ธ", "Focused PR": "โœจ", "Security concerns": "๐Ÿ”’", - "General PR suggestions": "๐Ÿ’ก", + "General suggestions": "๐Ÿ’ก", "Insights from user's answers": "๐Ÿ“", "Code feedback": "๐Ÿค–", } for key, value in output_data.items(): - if not value: + if value is None or value == '' or value == {}: continue if isinstance(value, dict): markdown_text += f"## {key}\n\n" markdown_text += convert_to_markdown(value) elif isinstance(value, list): - if key.lower() == 'code feedback': - markdown_text += "\n" # just looks nicer with additional line breaks emoji = emojis.get(key, "") - markdown_text += f"- {emoji} **{key}:**\n\n" + if key.lower() == 'code feedback': + markdown_text += f"\n\n- **
{ emoji } Code feedback:**\n\n" + else: + markdown_text += f"- {emoji} **{key}:**\n\n" for item in value: if isinstance(item, dict) and key.lower() == 'code feedback': markdown_text += parse_code_suggestion(item) elif item: markdown_text += f" - {item}\n" + if key.lower() == 'code feedback': + markdown_text += "
\n\n" elif value != 'n/a': emoji = emojis.get(key, "") markdown_text += f"- {emoji} **{key}:** {value}\n" @@ -164,7 +168,7 @@ def fix_json_escape_char(json_message=None): Raises: None - """ + """ try: result = json.loads(json_message) except Exception as e: @@ -191,7 +195,7 @@ def convert_str_to_datetime(date_str): Example: >>> convert_str_to_datetime('Mon, 01 Jan 2022 12:00:00 UTC') datetime.datetime(2022, 1, 1, 12, 0, 0) - """ + """ datetime_format = '%a, %d %b %Y %H:%M:%S %Z' return datetime.strptime(date_str, datetime_format) @@ -245,27 +249,34 @@ def update_settings_from_args(args: List[str]) -> List[str]: arg = arg.strip() if arg.startswith('--'): arg = arg.strip('-').strip() - vals = arg.split('=') + vals = arg.split('=', 1) if len(vals) != 2: - logging.error(f'Invalid argument format: {arg}') + if len(vals) > 2: # --extended is a valid argument + logging.error(f'Invalid argument format: {arg}') other_args.append(arg) continue key, value = _fix_key_value(*vals) - if key in get_settings(): - get_settings().set(key, value) - logging.info(f'Updated setting {key} to: "{value}"') - else: - logging.info(f'No argument: {key}') - other_args.append(arg) + get_settings().set(key, value) + logging.info(f'Updated setting {key} to: "{value}"') else: other_args.append(arg) return other_args +def _fix_key_value(key: str, value: str): + key = key.strip().upper() + value = value.strip() + try: + value = yaml.safe_load(value) + except Exception as e: + logging.error(f"Failed to parse YAML for config override {key}={value}", exc_info=e) + return key, value + + def load_yaml(review_text: str) -> dict: review_text = review_text.removeprefix('```yaml').rstrip('`') try: - data = yaml.load(review_text, Loader=yaml.SafeLoader) + data = yaml.safe_load(review_text) except Exception as e: logging.error(f"Failed to parse AI prediction: {e}") data = try_fix_yaml(review_text) diff --git a/pr_agent/cli.py b/pr_agent/cli.py index 01c1a7ec..7c4508d9 100644 --- a/pr_agent/cli.py +++ b/pr_agent/cli.py @@ -5,6 +5,7 @@ import os from pr_agent.agent.pr_agent import PRAgent, commands from pr_agent.config_loader import get_settings +from pr_agent.tools.pr_similar_issue import PRSimilarIssue def run(inargs=None): @@ -37,14 +38,19 @@ Configuration: To edit any configuration parameter from 'configuration.toml', just add -config_path=. For example: 'python cli.py --pr_url=... review --pr_reviewer.extra_instructions="focus on the file: ..."' """) - parser.add_argument('--pr_url', type=str, help='The URL of the PR to review', required=True) + parser.add_argument('--pr_url', type=str, help='The URL of the PR to review') + parser.add_argument('--issue_url', type=str, help='The URL of the Issue to review', default=None) parser.add_argument('command', type=str, help='The', choices=commands, default='review') parser.add_argument('rest', nargs=argparse.REMAINDER, default=[]) args = parser.parse_args(inargs) logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO")) command = args.command.lower() get_settings().set("CONFIG.CLI_MODE", True) - result = asyncio.run(PRAgent().handle_request(args.pr_url, command + " " + " ".join(args.rest))) + if args.issue_url: + result = asyncio.run(PRAgent().handle_request(args.issue_url, command + " " + " ".join(args.rest))) + # result = asyncio.run(PRSimilarIssue(args.issue_url, cli_mode=True, args=command + " " + " ".join(args.rest)).run()) + else: + result = asyncio.run(PRAgent().handle_request(args.pr_url, command + " " + " ".join(args.rest))) if not result: parser.print_help() diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 7e93d18c..0521716b 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -32,7 +32,7 @@ class GithubProvider(GitProvider): self.diff_files = None self.git_files = None self.incremental = incremental - if pr_url: + if pr_url and 'pull' in pr_url: self.set_pr(pr_url) self.last_commit_id = list(self.pr.get_commits())[-1] diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index f8abd555..9bfdf3a3 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -84,4 +84,14 @@ polling_interval_seconds = 30 [local] # LocalGitProvider settings - uncomment to use paths other than default # description_path= "path/to/description.md" -# review_path= "path/to/review.md" \ No newline at end of file +# review_path= "path/to/review.md" + +[pr_similar_issue] +skip_comments = false +force_update_dataset = false +max_issues_to_scan = 1000 + +[pinecone] +# fill and place in .secrets.toml +#api_key = ... +# environment = "gcp-starter" \ No newline at end of file diff --git a/pr_agent/tools/pr_similar_issue.py b/pr_agent/tools/pr_similar_issue.py index 497f2f5d..94dc10d3 100644 --- a/pr_agent/tools/pr_similar_issue.py +++ b/pr_agent/tools/pr_similar_issue.py @@ -1,77 +1,250 @@ import copy import json import logging +from enum import Enum from typing import List, Tuple +import pinecone +import openai +import pandas as pd +from pydantic import BaseModel, Field -from jinja2 import Environment, StrictUndefined - -from pr_agent.algo.ai_handler import AiHandler -from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models +from pr_agent.algo import MAX_TOKENS from pr_agent.algo.token_handler import TokenHandler -from pr_agent.algo.utils import load_yaml from pr_agent.config_loader import get_settings from pr_agent.git_providers import get_git_provider -from pr_agent.git_providers.git_provider import get_main_pr_language +from pinecone_datasets import Dataset, DatasetMetadata + +MODEL = "text-embedding-ada-002" class PRSimilarIssue: - def __init__(self, pr_url: str, issue_url: str, args: list = None): - load_data_from_local = True - if not load_data_from_local: - self.git_provider = get_git_provider()() - repo_name, issue_number = self.git_provider._parse_issue_url(issue_url.split('=')[-1]) - self.git_provider.repo = repo_name - self.git_provider.repo_obj = self.git_provider.github_client.get_repo(repo_name) - repo_obj = self.git_provider.repo_obj + def __init__(self, issue_url: str, args: list = None): + if get_settings().config.git_provider != "github": + raise Exception("Only github is supported for similar issue tool") - def _process_issue(issue): - header = body = issue_str = comments_str = "" - if issue.pull_request: - return header, body, issue_str, comments_str - header = issue.title - body = issue.body - comments_obj = list(issue.get_comments()) - comments_str = "" - for i, comment in enumerate(comments_obj): - comments_str += f"comment {i}:\n{comment.body}\n\n\n" - issue_str = f"Issue Header: \"{header}\"\n\nIssue Body:\n{body}" - return header, body, issue_str, comments_str + self.cli_mode = get_settings().CONFIG.CLI_MODE + self.max_issues_to_scan = get_settings().pr_similar_issue.max_issues_to_scan + self.issue_url = issue_url + self.git_provider = get_git_provider()() + repo_name, issue_number = self.git_provider._parse_issue_url(issue_url.split('=')[-1]) + self.git_provider.repo = repo_name + self.git_provider.repo_obj = self.git_provider.github_client.get_repo(repo_name) + self.token_handler = TokenHandler() + repo_obj = self.git_provider.repo_obj + repo_name_for_index = self.repo_name_for_index = repo_obj.full_name.lower().replace('/', '-').replace('_/', '-') + index_name = self.index_name = "codium-ai-pr-agent-issues" - main_issue = repo_obj.get_issue(issue_number) - assert not main_issue.pull_request - _, _, main_issue_str, main_comments_str = _process_issue(main_issue) + # assuming pinecone api key and environment are set in secrets file + try: + api_key = get_settings().pinecone.api_key + environment = get_settings().pinecone.environment + except Exception: + if not self.cli_mode: + repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1]) + issue_main = self.git_provider.repo_obj.get_issue(original_issue_number) + issue_main.create_comment("Please set pinecone api key and environment in secrets file") + raise Exception("Please set pinecone api key and environment in secrets file") - issues_str_list = [] - comments_str_list = [] - issues = list(repo_obj.get_issues(state='all')) # 'open', 'closed', 'all' - for i, issue in enumerate(issues): - if issue.url == main_issue.url: - continue - if issue.pull_request: - continue - _, _, issue_str, comments_str = _process_issue(issue) - issues_str_list.append(issue_str) - comments_str_list.append(comments_str) - - json_output = {} - json_output['main_issue'] = {} - json_output['main_issue']['issue'] = main_issue_str - json_output['main_issue']['comment'] = main_comments_str - json_output['issues'] = {} - for i in range(len(issues_str_list)): - json_output['issues'][f'issue_{i}'] = {} - json_output['issues'][f'issue_{i}']['issue'] = issues_str_list[i] - json_output['issues'][f'issue_{i}'][f'comments'] = comments_str_list[i] - - jsonFile = open("/Users/talrid/Desktop/issues_data.json", "w") - jsonFile.write(json.dumps(json_output)) - jsonFile.close() + # check if index exists, and if repo is already indexed + run_from_scratch = False + upsert = True + pinecone.init(api_key=api_key, environment=environment) + if not index_name in pinecone.list_indexes(): + run_from_scratch = True + upsert = False else: - jsonFile = open("/Users/talrid/Desktop/issues_data.json", "r") - json_output=json.loads(jsonFile.read()) + if get_settings().pr_similar_issue.force_update_dataset: + upsert = True + else: + pinecone_index = pinecone.Index(index_name=index_name) + res = pinecone_index.fetch([f"example_issue_{repo_name_for_index}"]).to_dict() + if res["vectors"]: + upsert = False - from langchain.document_loaders import TextLoader - from langchain.text_splitter import CharacterTextSplitter - text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) + if run_from_scratch or upsert: # index the entire repo + logging.info('Indexing the entire repo...') - aaa=3 + logging.info('Getting issues...') + issues = list(repo_obj.get_issues(state='all')) + logging.info('Done') + self._update_index_with_issues(issues, repo_name_for_index, upsert=upsert) + else: # update index if needed + pinecone_index = pinecone.Index(index_name=index_name) + issues_to_update = [] + issues_paginated_list = repo_obj.get_issues(state='all') + counter = 1 + for issue in issues_paginated_list: + if issue.pull_request: + continue + issue_str, comments, number = self._process_issue(issue) + issue_key = f"issue_{number}" + id = issue_key + "." + "issue" + res = pinecone_index.fetch([id]).to_dict() + is_new_issue = True + for vector in res["vectors"].values(): + if vector['metadata']['repo'] == repo_name_for_index: + is_new_issue = False + break + if is_new_issue: + counter += 1 + issues_to_update.append(issue) + else: + break + + if issues_to_update: + logging.info(f'Updating index with {counter} new issues...') + self._update_index_with_issues(issues_to_update, repo_name_for_index, upsert=True) + else: + logging.info('No new issues to update') + + async def run(self): + repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1]) + issue_main = self.git_provider.repo_obj.get_issue(original_issue_number) + issue_str, comments, number = self._process_issue(issue_main) + openai.api_key = get_settings().openai.key + + res = openai.Embedding.create(input=[issue_str], engine=MODEL) + embeds = [record['embedding'] for record in res['data']] + pinecone_index = pinecone.Index(index_name=self.index_name) + res = pinecone_index.query(embeds[0], + top_k=5, + filter={"repo": self.repo_name_for_index}, + include_metadata=True).to_dict() + relevant_issues_number_list = [] + for r in res['matches']: + issue_number = int(r["id"].split('.')[0].split('_')[-1]) + if original_issue_number == issue_number: + continue + if issue_number not in relevant_issues_number_list: + relevant_issues_number_list.append(issue_number) + + similar_issues_str = "Similar Issues:\n\n" + for i, issue_number_similar in enumerate(relevant_issues_number_list): + issue = self.git_provider.repo_obj.get_issue(issue_number_similar) + title = issue.title + url = issue.html_url + similar_issues_str += f"{i + 1}. [{title}]({url})\n\n" + if get_settings().config.publish_output: + response = issue_main.create_comment(similar_issues_str) + logging.info(similar_issues_str) + + def _process_issue(self, issue): + header = issue.title + body = issue.body + number = issue.number + if get_settings().pinecone.skip_comments: + comments = [] + else: + comments = list(issue.get_comments()) + issue_str = f"Issue Header: \"{header}\"\n\nIssue Body:\n{body}" + return issue_str, comments, number + + def _update_index_with_issues(self, issues_list, repo_name_for_index, upsert=False): + logging.info('Processing issues...') + corpus = Corpus() + example_issue_record = Record( + id=f"example_issue_{repo_name_for_index}", + text="example_issue", + metadata=Metadata(repo=repo_name_for_index) + ) + corpus.append(example_issue_record) + + counter = 0 + for issue in issues_list: + + if issue.pull_request: + continue + + counter += 1 + if counter >= self.max_issues_to_scan: + logging.info(f"Scanned {self.max_issues_to_scan} issues, stopping") + break + + issue_str, comments, number = self._process_issue(issue) + issue_key = f"issue_{number}" + username = issue.user.login + created_at = str(issue.created_at) + if len(issue_str) < 8000 or \ + self.token_handler.count_tokens(issue_str) < MAX_TOKENS[MODEL]: # fast reject first + issue_record = Record( + id=issue_key + "." + "issue", + text=issue_str, + metadata=Metadata(repo=repo_name_for_index, + username=username, + created_at=created_at, + level=IssueLevel.ISSUE) + ) + corpus.append(issue_record) + if comments: + for j, comment in enumerate(comments): + comment_body = comment.body + num_words_comment = len(comment_body.split()) + if num_words_comment < 10: + continue + + if len(issue_str) < 8000 or \ + self.token_handler.count_tokens(comment_body) < MAX_TOKENS[MODEL]: + comment_record = Record( + id=issue_key + ".comment_" + str(j + 1), + text=comment_body, + metadata=Metadata(repo=repo_name_for_index, + username=username, # use issue username for all comments + created_at=created_at, + level=IssueLevel.COMMENT) + ) + corpus.append(comment_record) + df = pd.DataFrame(corpus.dict()["documents"]) + logging.info('Done') + + logging.info('Embedding...') + openai.api_key = get_settings().openai.key + list_to_encode = list(df["text"].values) + res = openai.Embedding.create(input=list_to_encode, engine=MODEL) + embeds = [record['embedding'] for record in res['data']] + df["values"] = embeds + meta = DatasetMetadata.empty() + meta.dense_model.dimension = len(embeds[0]) + ds = Dataset.from_pandas(df, meta) + logging.info('Done') + + api_key = get_settings().pinecone.api_key + environment = get_settings().pinecone.environment + if not upsert: + logging.info('Creating index...') + ds.to_pinecone_index(self.index_name, api_key=api_key, environment=environment) + else: + logging.info('Upserting index...') + namespace = "" + batch_size: int = 100 + concurrency: int = 10 + pinecone.init(api_key=api_key, environment=environment) + ds._upsert_to_index(self.index_name, namespace, batch_size, concurrency) + logging.info('Done') + + +class IssueLevel(str, Enum): + ISSUE = "issue" + COMMENT = "comment" + + +class Metadata(BaseModel): + repo: str + username: str = Field(default="@codium") + created_at: str = Field(default="01-01-1970 00:00:00.00000") + level: IssueLevel = Field(default=IssueLevel.ISSUE) + + class Config: + use_enum_values = True + + +class Record(BaseModel): + id: str + text: str + metadata: Metadata + + +class Corpus(BaseModel): + documents: List[Record] = Field(default=[]) + + def append(self, r: Record): + self.documents.append(r) diff --git a/requirements.txt b/requirements.txt index 99efa846..5d4caaa6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,4 +18,6 @@ boto3~=1.28.25 google-cloud-storage==2.10.0 ujson==5.8.0 azure-devops==7.1.0b3 -msrest==0.7.1 \ No newline at end of file +msrest==0.7.1 +pinecone-client==2.2.2 +pinecone_datasets==0.6.1 \ No newline at end of file From bc95cf5b8e6b0879644b35495a2824ccbe4431ce Mon Sep 17 00:00:00 2001 From: mrT23 Date: Wed, 6 Sep 2023 09:12:25 +0300 Subject: [PATCH 03/16] stable --- Usage.md | 12 ++++++------ pr_agent/cli.py | 9 ++++++--- pr_agent/tools/pr_similar_issue.py | 19 +++++++++++++------ 3 files changed, 25 insertions(+), 15 deletions(-) diff --git a/Usage.md b/Usage.md index 336de974..f8624d7e 100644 --- a/Usage.md +++ b/Usage.md @@ -50,12 +50,12 @@ When running from your local repo (CLI), your local configuration file will be u Examples for invoking the different tools via the CLI: -- **Review**: `python cli.py --pr_url= /review` -- **Describe**: `python cli.py --pr_url= /describe` -- **Improve**: `python cli.py --pr_url= /improve` -- **Ask**: `python cli.py --pr_url= /ask "Write me a poem about this PR"` -- **Reflect**: `python cli.py --pr_url= /reflect` -- **Update Changelog**: `python cli.py --pr_url= /update_changelog` +- **Review**: `python cli.py --pr_url= review` +- **Describe**: `python cli.py --pr_url= describe` +- **Improve**: `python cli.py --pr_url= improve` +- **Ask**: `python cli.py --pr_url= ask "Write me a poem about this PR"` +- **Reflect**: `python cli.py --pr_url= reflect` +- **Update Changelog**: `python cli.py --pr_url= update_changelog` `` is the url of the relevant PR (for example: https://github.com/Codium-ai/pr-agent/pull/50). diff --git a/pr_agent/cli.py b/pr_agent/cli.py index 7c4508d9..07c37f5e 100644 --- a/pr_agent/cli.py +++ b/pr_agent/cli.py @@ -5,7 +5,6 @@ import os from pr_agent.agent.pr_agent import PRAgent, commands from pr_agent.config_loader import get_settings -from pr_agent.tools.pr_similar_issue import PRSimilarIssue def run(inargs=None): @@ -18,6 +17,7 @@ For example: - cli.py --pr_url=... improve - cli.py --pr_url=... ask "write me a poem about this PR" - cli.py --pr_url=... reflect +- cli.py --issue_url=... similar_issue Supported commands: -review / review_pr - Add a review that includes a summary of the PR and specific suggestions for improvement. @@ -38,17 +38,20 @@ Configuration: To edit any configuration parameter from 'configuration.toml', just add -config_path=. For example: 'python cli.py --pr_url=... review --pr_reviewer.extra_instructions="focus on the file: ..."' """) - parser.add_argument('--pr_url', type=str, help='The URL of the PR to review') + parser.add_argument('--pr_url', type=str, help='The URL of the PR to review', default=None) parser.add_argument('--issue_url', type=str, help='The URL of the Issue to review', default=None) parser.add_argument('command', type=str, help='The', choices=commands, default='review') parser.add_argument('rest', nargs=argparse.REMAINDER, default=[]) args = parser.parse_args(inargs) + if not args.pr_url and not args.issue_url: + parser.print_help() + return + logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO")) command = args.command.lower() get_settings().set("CONFIG.CLI_MODE", True) if args.issue_url: result = asyncio.run(PRAgent().handle_request(args.issue_url, command + " " + " ".join(args.rest))) - # result = asyncio.run(PRSimilarIssue(args.issue_url, cli_mode=True, args=command + " " + " ".join(args.rest)).run()) else: result = asyncio.run(PRAgent().handle_request(args.pr_url, command + " " + " ".join(args.rest))) if not result: diff --git a/pr_agent/tools/pr_similar_issue.py b/pr_agent/tools/pr_similar_issue.py index 94dc10d3..50ec4c6b 100644 --- a/pr_agent/tools/pr_similar_issue.py +++ b/pr_agent/tools/pr_similar_issue.py @@ -132,7 +132,7 @@ class PRSimilarIssue: header = issue.title body = issue.body number = issue.number - if get_settings().pinecone.skip_comments: + if get_settings().pr_similar_issue.skip_comments: comments = [] else: comments = list(issue.get_comments()) @@ -151,11 +151,12 @@ class PRSimilarIssue: counter = 0 for issue in issues_list: - if issue.pull_request: continue counter += 1 + if counter % 100 == 0: + logging.info(f"Scanned {counter} issues") if counter >= self.max_issues_to_scan: logging.info(f"Scanned {self.max_issues_to_scan} issues, stopping") break @@ -179,7 +180,7 @@ class PRSimilarIssue: for j, comment in enumerate(comments): comment_body = comment.body num_words_comment = len(comment_body.split()) - if num_words_comment < 10: + if num_words_comment < 10 or not isinstance(comment_body, str): continue if len(issue_str) < 8000 or \ @@ -199,8 +200,14 @@ class PRSimilarIssue: logging.info('Embedding...') openai.api_key = get_settings().openai.key list_to_encode = list(df["text"].values) - res = openai.Embedding.create(input=list_to_encode, engine=MODEL) - embeds = [record['embedding'] for record in res['data']] + try: + res = openai.Embedding.create(input=list_to_encode, engine=MODEL) + embeds = [record['embedding'] for record in res['data']] + except: + embeds = [] + for i, text in enumerate(list_to_encode): + res = openai.Embedding.create(input=[text], engine=MODEL) + embeds.append(res['data'][0]['embedding']) df["values"] = embeds meta = DatasetMetadata.empty() meta.dense_model.dimension = len(embeds[0]) @@ -210,7 +217,7 @@ class PRSimilarIssue: api_key = get_settings().pinecone.api_key environment = get_settings().pinecone.environment if not upsert: - logging.info('Creating index...') + logging.info('Creating index from scratch...') ds.to_pinecone_index(self.index_name, api_key=api_key, environment=environment) else: logging.info('Upserting index...') From 8962c9cf8ada62f271b451c8d6bf6dcfc6ec6c98 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Wed, 6 Sep 2023 09:43:23 +0300 Subject: [PATCH 04/16] stable --- pr_agent/settings/configuration.toml | 2 +- pr_agent/tools/pr_similar_issue.py | 42 +++++++++++++++------------- 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index be44ab03..2188e8cc 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -100,7 +100,7 @@ polling_interval_seconds = 30 [pr_similar_issue] skip_comments = false force_update_dataset = false -max_issues_to_scan = 1000 +max_issues_to_scan = 500 [pinecone] # fill and place in .secrets.toml diff --git a/pr_agent/tools/pr_similar_issue.py b/pr_agent/tools/pr_similar_issue.py index 50ec4c6b..98d6a1f6 100644 --- a/pr_agent/tools/pr_similar_issue.py +++ b/pr_agent/tools/pr_similar_issue.py @@ -176,24 +176,24 @@ class PRSimilarIssue: level=IssueLevel.ISSUE) ) corpus.append(issue_record) - if comments: - for j, comment in enumerate(comments): - comment_body = comment.body - num_words_comment = len(comment_body.split()) - if num_words_comment < 10 or not isinstance(comment_body, str): - continue + if comments: + for j, comment in enumerate(comments): + comment_body = comment.body + num_words_comment = len(comment_body.split()) + if num_words_comment < 10 or not isinstance(comment_body, str): + continue - if len(issue_str) < 8000 or \ - self.token_handler.count_tokens(comment_body) < MAX_TOKENS[MODEL]: - comment_record = Record( - id=issue_key + ".comment_" + str(j + 1), - text=comment_body, - metadata=Metadata(repo=repo_name_for_index, - username=username, # use issue username for all comments - created_at=created_at, - level=IssueLevel.COMMENT) - ) - corpus.append(comment_record) + if len(comment_body) < 8000 or \ + self.token_handler.count_tokens(comment_body) < MAX_TOKENS[MODEL]: + comment_record = Record( + id=issue_key + ".comment_" + str(j + 1), + text=comment_body, + metadata=Metadata(repo=repo_name_for_index, + username=username, # use issue username for all comments + created_at=created_at, + level=IssueLevel.COMMENT) + ) + corpus.append(comment_record) df = pd.DataFrame(corpus.dict()["documents"]) logging.info('Done') @@ -205,9 +205,13 @@ class PRSimilarIssue: embeds = [record['embedding'] for record in res['data']] except: embeds = [] + logging.error('Failed to embed entire list, embedding one by one...') for i, text in enumerate(list_to_encode): - res = openai.Embedding.create(input=[text], engine=MODEL) - embeds.append(res['data'][0]['embedding']) + try: + res = openai.Embedding.create(input=[text], engine=MODEL) + embeds.append(res['data'][0]['embedding']) + except: + embeds.append([0] * 1536) df["values"] = embeds meta = DatasetMetadata.empty() meta.dense_model.dimension = len(embeds[0]) From 8af8fd8e5dc6f129d5afd9a0b1e5427fc46ecc85 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Wed, 6 Sep 2023 17:43:43 +0300 Subject: [PATCH 05/16] github action --- .github/workflows/pr-agent-review.yaml | 7 +++++-- pr_agent/agent/pr_agent.py | 2 +- pr_agent/settings/.secrets_template.toml | 4 ++++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pr-agent-review.yaml b/.github/workflows/pr-agent-review.yaml index 9dcf59b8..eb811a38 100644 --- a/.github/workflows/pr-agent-review.yaml +++ b/.github/workflows/pr-agent-review.yaml @@ -21,7 +21,10 @@ jobs: id: pragent uses: Codium-ai/pr-agent@main env: - OPENAI_KEY: ${{ secrets.OPENAI_KEY }} - OPENAI_ORG: ${{ secrets.OPENAI_ORG }} # optional + OPENAI.KEY: ${{ secrets.OPENAI_KEY }} + OPENAI.ORG: ${{ secrets.OPENAI_ORG }} # optional GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PINECONE.API_KEY: ${{ secrets.PINECONE_API_KEY }} + PINECONE.ENVIRONMENT: ${{ secrets.PINECONE_ENVIRONMENT }} + diff --git a/pr_agent/agent/pr_agent.py b/pr_agent/agent/pr_agent.py index 9f0886d8..07c34c51 100644 --- a/pr_agent/agent/pr_agent.py +++ b/pr_agent/agent/pr_agent.py @@ -78,7 +78,7 @@ class PRAgent: elif action in command2class: if notify: notify() - await command2class[action](pr_url, *args).run() + await command2class[action](pr_url, args=args).run() else: return False return True diff --git a/pr_agent/settings/.secrets_template.toml b/pr_agent/settings/.secrets_template.toml index 0ac75519..16c121ff 100644 --- a/pr_agent/settings/.secrets_template.toml +++ b/pr_agent/settings/.secrets_template.toml @@ -16,6 +16,10 @@ key = "" # Acquire through https://platform.openai.com #deployment_id = "" # The deployment name you chose when you deployed the engine #fallback_deployments = [] # For each fallback model specified in configuration.toml in the [config] section, specify the appropriate deployment_id +[pinecone] +api_key = "..." +environment = "gcp-starter" + [anthropic] key = "" # Optional, uncomment if you want to use Anthropic. Acquire through https://www.anthropic.com/ From 8321792a8d4a13a933c8a7b3edaaa47235733839 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Wed, 6 Sep 2023 18:12:16 +0300 Subject: [PATCH 06/16] == --- requirements.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/requirements.txt b/requirements.txt index 5d4caaa6..cc5254fd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,14 +7,14 @@ Jinja2==3.1.2 tiktoken==0.4.0 uvicorn==0.22.0 python-gitlab==3.15.0 -pytest~=7.4.0 -aiohttp~=3.8.4 +pytest==7.4.0 +aiohttp==3.8.4 atlassian-python-api==3.39.0 -GitPython~=3.1.32 +GitPython==3.1.32 PyYAML==6.0 starlette-context==0.3.6 -litellm~=0.1.504 -boto3~=1.28.25 +litellm==0.1.504 +boto3==1.28.25 google-cloud-storage==2.10.0 ujson==5.8.0 azure-devops==7.1.0b3 From 9889d26d3e6bed28a50ba83dbda716c3d5e4c565 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Thu, 7 Sep 2023 12:31:22 +0300 Subject: [PATCH 07/16] merged main --- requirements.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index a4c6756f..dc617838 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ Jinja2==3.1.2 tiktoken==0.4.0 uvicorn==0.22.0 python-gitlab==3.15.0 -pytest~==7.4.0 +pytest==7.4.0 aiohttp==3.8.4 atlassian-python-api==3.39.0 GitPython==3.1.32 @@ -18,4 +18,6 @@ boto3==1.28.25 google-cloud-storage==2.10.0 ujson==5.8.0 azure-devops==7.1.0b3 -msrest==0.7.1 \ No newline at end of file +msrest==0.7.1 +pinecone-client +pinecone_datasets==0.6.1 \ No newline at end of file From 6fb8a882af41e2c43ca45eacec29d6adbbe89c1a Mon Sep 17 00:00:00 2001 From: mrT23 Date: Thu, 7 Sep 2023 12:41:31 +0300 Subject: [PATCH 08/16] ordering requirements.txt --- requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index dc617838..0360181f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,8 @@ fastapi==0.99.0 PyGithub==1.59.* retry==0.9.2 openai==0.27.8 +pinecone-client +pinecone_datasets Jinja2==3.1.2 tiktoken==0.4.0 uvicorn==0.22.0 @@ -18,6 +20,4 @@ boto3==1.28.25 google-cloud-storage==2.10.0 ujson==5.8.0 azure-devops==7.1.0b3 -msrest==0.7.1 -pinecone-client -pinecone_datasets==0.6.1 \ No newline at end of file +msrest==0.7.1 \ No newline at end of file From 498ad3d19c6448d1dd9e7b0db178d258e6862efd Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 10 Sep 2023 07:36:25 +0300 Subject: [PATCH 09/16] upgrade pip --- docker/Dockerfile | 1 + requirements.txt | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 951f846c..7789e3d9 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -3,6 +3,7 @@ FROM python:3.10 as base WORKDIR /app ADD pyproject.toml . ADD requirements.txt . +RUN pip install --upgrade pip RUN pip install . && rm pyproject.toml requirements.txt ENV PYTHONPATH=/app diff --git a/requirements.txt b/requirements.txt index 0360181f..1e968236 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,10 @@ +pinecone-client==2.2.2 +pinecone_datasets==0.6.1 dynaconf==3.1.12 fastapi==0.99.0 PyGithub==1.59.* retry==0.9.2 openai==0.27.8 -pinecone-client -pinecone_datasets Jinja2==3.1.2 tiktoken==0.4.0 uvicorn==0.22.0 From b42b2536b5526b58f20be3ebd23369de00354e3e Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 10 Sep 2023 07:39:01 +0300 Subject: [PATCH 10/16] upgrade pip --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 1e968236..c2065e89 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -pinecone-client==2.2.2 -pinecone_datasets==0.6.1 +pinecone_datasets +pinecone-client dynaconf==3.1.12 fastapi==0.99.0 PyGithub==1.59.* From fe500845b725d6566e7191bd06df648bb5c92cba Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 10 Sep 2023 07:46:51 +0300 Subject: [PATCH 11/16] upgrade pip --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index c2065e89..43380845 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -pinecone_datasets pinecone-client +pinecone_datasets dynaconf==3.1.12 fastapi==0.99.0 PyGithub==1.59.* From 7f5f2d2d1a8d53ea3b061be7c5178ee397bc4b7f Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 10 Sep 2023 08:07:39 +0300 Subject: [PATCH 12/16] solved dependencies --- docker/Dockerfile | 1 - requirements.txt | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 7789e3d9..951f846c 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -3,7 +3,6 @@ FROM python:3.10 as base WORKDIR /app ADD pyproject.toml . ADD requirements.txt . -RUN pip install --upgrade pip RUN pip install . && rm pyproject.toml requirements.txt ENV PYTHONPATH=/app diff --git a/requirements.txt b/requirements.txt index 43380845..ce8145c7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ pinecone-client -pinecone_datasets +pinecone_datasets @ git+https://github.com/mrT23/pinecone-datasets.git@main dynaconf==3.1.12 fastapi==0.99.0 PyGithub==1.59.* From 245f29e58a8bdb8e317af9056512225085183aca Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 10 Sep 2023 08:22:42 +0300 Subject: [PATCH 13/16] solved dependencies --- requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index ce8145c7..9c74d66d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,3 @@ -pinecone-client -pinecone_datasets @ git+https://github.com/mrT23/pinecone-datasets.git@main dynaconf==3.1.12 fastapi==0.99.0 PyGithub==1.59.* @@ -20,4 +18,6 @@ boto3==1.28.25 google-cloud-storage==2.10.0 ujson==5.8.0 azure-devops==7.1.0b3 -msrest==0.7.1 \ No newline at end of file +msrest==0.7.1 +pinecone-client +pinecone_datasets @ git+https://github.com/mrT23/pinecone-datasets.git@main \ No newline at end of file From bd588b4509f1432cb96bb9bacabdeb340ac1595d Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 10 Sep 2023 07:36:25 +0300 Subject: [PATCH 14/16] solved dependencies --- requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 0360181f..9c74d66d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,8 +3,6 @@ fastapi==0.99.0 PyGithub==1.59.* retry==0.9.2 openai==0.27.8 -pinecone-client -pinecone_datasets Jinja2==3.1.2 tiktoken==0.4.0 uvicorn==0.22.0 @@ -20,4 +18,6 @@ boto3==1.28.25 google-cloud-storage==2.10.0 ujson==5.8.0 azure-devops==7.1.0b3 -msrest==0.7.1 \ No newline at end of file +msrest==0.7.1 +pinecone-client +pinecone_datasets @ git+https://github.com/mrT23/pinecone-datasets.git@main \ No newline at end of file From bf6a235add7734e4808f710075f8c77bdd2f55e8 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 10 Sep 2023 13:16:05 +0300 Subject: [PATCH 15/16] pinecone-datasets==0.6.1 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 9c74d66d..04015e84 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,4 +20,4 @@ ujson==5.8.0 azure-devops==7.1.0b3 msrest==0.7.1 pinecone-client -pinecone_datasets @ git+https://github.com/mrT23/pinecone-datasets.git@main \ No newline at end of file +pinecone-datasets==0.6.1 \ No newline at end of file From f9f0f220c27397ed3ceaa412b5c0d74049e61274 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 10 Sep 2023 13:31:36 +0300 Subject: [PATCH 16/16] pinecone-datasets --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 04015e84..f044e8d9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,4 +20,4 @@ ujson==5.8.0 azure-devops==7.1.0b3 msrest==0.7.1 pinecone-client -pinecone-datasets==0.6.1 \ No newline at end of file +pinecone-datasets @ git+https://github.com/mrT23/pinecone-datasets.git@main \ No newline at end of file