From bc95cf5b8e6b0879644b35495a2824ccbe4431ce Mon Sep 17 00:00:00 2001
From: mrT23 <tal.r@codium.ai>
Date: Wed, 6 Sep 2023 09:12:25 +0300
Subject: [PATCH] stable

---
 Usage.md                           | 12 ++++++------
 pr_agent/cli.py                    |  9 ++++++---
 pr_agent/tools/pr_similar_issue.py | 19 +++++++++++++------
 3 files changed, 25 insertions(+), 15 deletions(-)
diff --git a/Usage.md b/Usage.md
index 336de974..f8624d7e 100644
--- a/Usage.md
+++ b/Usage.md
@@ -50,12 +50,12 @@ When running from your local repo (CLI), your local configuration file will be u
 
 Examples for invoking the different tools via the CLI:
 
-- **Review**:       `python cli.py --pr_url=<pr_url>  /review`
-- **Describe**:     `python cli.py --pr_url=<pr_url>  /describe`
-- **Improve**:      `python cli.py --pr_url=<pr_url>  /improve`
-- **Ask**:          `python cli.py --pr_url=<pr_url>  /ask "Write me a poem about this PR"`
-- **Reflect**:      `python cli.py --pr_url=<pr_url>  /reflect`
-- **Update Changelog**:      `python cli.py --pr_url=<pr_url>  /update_changelog`
+- **Review**:       `python cli.py --pr_url=<pr_url>  review`
+- **Describe**:     `python cli.py --pr_url=<pr_url>  describe`
+- **Improve**:      `python cli.py --pr_url=<pr_url>  improve`
+- **Ask**:          `python cli.py --pr_url=<pr_url>  ask "Write me a poem about this PR"`
+- **Reflect**:      `python cli.py --pr_url=<pr_url>  reflect`
+- **Update Changelog**:      `python cli.py --pr_url=<pr_url>  update_changelog`
 
 `<pr_url>` is the url of the relevant PR (for example: https://github.com/Codium-ai/pr-agent/pull/50).
 
diff --git a/pr_agent/cli.py b/pr_agent/cli.py
index 7c4508d9..07c37f5e 100644
--- a/pr_agent/cli.py
+++ b/pr_agent/cli.py
@@ -5,7 +5,6 @@ import os
 
 from pr_agent.agent.pr_agent import PRAgent, commands
 from pr_agent.config_loader import get_settings
-from pr_agent.tools.pr_similar_issue import PRSimilarIssue
 
 
 def run(inargs=None):
@@ -18,6 +17,7 @@ For example:
 - cli.py --pr_url=... improve
 - cli.py --pr_url=... ask "write me a poem about this PR"
 - cli.py --pr_url=... reflect
+- cli.py --issue_url=... similar_issue
 
 Supported commands:
 -review / review_pr - Add a review that includes a summary of the PR and specific suggestions for improvement.
@@ -38,17 +38,20 @@ Configuration:
 To edit any configuration parameter from 'configuration.toml', just add -config_path=<value>.
 For example: 'python cli.py --pr_url=... review --pr_reviewer.extra_instructions="focus on the file: ..."'
 """)
-    parser.add_argument('--pr_url', type=str, help='The URL of the PR to review')
+    parser.add_argument('--pr_url', type=str, help='The URL of the PR to review', default=None)
     parser.add_argument('--issue_url', type=str, help='The URL of the Issue to review', default=None)
     parser.add_argument('command', type=str, help='The', choices=commands, default='review')
     parser.add_argument('rest', nargs=argparse.REMAINDER, default=[])
     args = parser.parse_args(inargs)
+    if not args.pr_url and not args.issue_url:
+        parser.print_help()
+        return
+
     logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"))
     command = args.command.lower()
     get_settings().set("CONFIG.CLI_MODE", True)
     if args.issue_url:
         result = asyncio.run(PRAgent().handle_request(args.issue_url, command + " " + " ".join(args.rest)))
-        # result = asyncio.run(PRSimilarIssue(args.issue_url, cli_mode=True, args=command + " " + " ".join(args.rest)).run())
     else:
         result = asyncio.run(PRAgent().handle_request(args.pr_url, command + " " + " ".join(args.rest)))
     if not result:
diff --git a/pr_agent/tools/pr_similar_issue.py b/pr_agent/tools/pr_similar_issue.py
index 94dc10d3..50ec4c6b 100644
--- a/pr_agent/tools/pr_similar_issue.py
+++ b/pr_agent/tools/pr_similar_issue.py
@@ -132,7 +132,7 @@ class PRSimilarIssue:
         header = issue.title
         body = issue.body
         number = issue.number
-        if get_settings().pinecone.skip_comments:
+        if get_settings().pr_similar_issue.skip_comments:
             comments = []
         else:
             comments = list(issue.get_comments())
@@ -151,11 +151,12 @@ class PRSimilarIssue:
 
         counter = 0
         for issue in issues_list:
-
             if issue.pull_request:
                 continue
 
             counter += 1
+            if counter % 100 == 0:
+                logging.info(f"Scanned {counter} issues")
             if counter >= self.max_issues_to_scan:
                 logging.info(f"Scanned {self.max_issues_to_scan} issues, stopping")
                 break
@@ -179,7 +180,7 @@ class PRSimilarIssue:
                 for j, comment in enumerate(comments):
                     comment_body = comment.body
                     num_words_comment = len(comment_body.split())
-                    if num_words_comment < 10:
+                    if num_words_comment < 10 or not isinstance(comment_body, str):
                         continue
 
                     if len(issue_str) < 8000 or \
@@ -199,8 +200,14 @@ class PRSimilarIssue:
         logging.info('Embedding...')
         openai.api_key = get_settings().openai.key
         list_to_encode = list(df["text"].values)
-        res = openai.Embedding.create(input=list_to_encode, engine=MODEL)
-        embeds = [record['embedding'] for record in res['data']]
+        try:
+            res = openai.Embedding.create(input=list_to_encode, engine=MODEL)
+            embeds = [record['embedding'] for record in res['data']]
+        except:
+            embeds = []
+            for i, text in enumerate(list_to_encode):
+                res = openai.Embedding.create(input=[text], engine=MODEL)
+                embeds.append(res['data'][0]['embedding'])
         df["values"] = embeds
         meta = DatasetMetadata.empty()
         meta.dense_model.dimension = len(embeds[0])
@@ -210,7 +217,7 @@ class PRSimilarIssue:
         api_key = get_settings().pinecone.api_key
         environment = get_settings().pinecone.environment
         if not upsert:
-            logging.info('Creating index...')
+            logging.info('Creating index from scratch...')
             ds.to_pinecone_index(self.index_name, api_key=api_key, environment=environment)
         else:
             logging.info('Upserting index...')