From f4037e0dfa7d19b9c1e43ff17ac5976218442cec Mon Sep 17 00:00:00 2001
From: mrT23 <tal.r@codium.ai>
Date: Wed, 24 Jan 2024 19:40:58 +0200
Subject: [PATCH] feat: Add LanceDB support for similar_issue tool and refactor
 SOC2 compliance feature name

---
 README.md                          | 3 +++
 docs/REVIEW.md                     | 4 ++--
 pr_agent/servers/help.py           | 2 +-
 pr_agent/tools/pr_similar_issue.py | 2 +-
 4 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/README.md b/README.md
index c66ec2cd..84c72e5c 100644
--- a/README.md
+++ b/README.md
@@ -50,6 +50,9 @@ pr_commands = [
 Meaning that by default, for each PR the `describe`, `review`, and `improve` tools will be triggered automatically, and the `improve` tool will present the suggestions in a single comment.  
 You can of course overwrite these defaults by adding a `.pr_agent.toml` file to your repo. See [here](https://github.com/Codium-ai/pr-agent/blob/main/Usage.md#working-with-github-app).
 
+### Jan 10, 2024
+[LanceDB](https://lancedb.com/) is now supported as a locally hosted VectorDB for the `similar_issue` tool. See [here](./docs/SIMILAR_ISSUE.md) for more details.
+
 
 ## Overview
 <div style="text-align:left;">
diff --git a/docs/REVIEW.md b/docs/REVIEW.md
index 83616a61..4d997e2b 100644
--- a/docs/REVIEW.md
+++ b/docs/REVIEW.md
@@ -45,7 +45,7 @@ To edit [configurations](./../pr_agent/settings/configuration.toml#L19)  related
 - `require_estimate_effort_to_review`: if set to true, the tool will add a section that estimates thed effort needed to review the PR. Default is true.
 #### SOC2 ticket compliance 💎
 This sub-tool checks if the PR description properly contains a ticket to a project management system (e.g., Jira, Asana, Trello, etc.), as required by SOC2 compliance. If not, it will add a label to the PR: "Missing SOC2 ticket".
-- `require_soc2_review`: If set to true, the SOC2 ticket checker sub-tool will be enabled. Default is false.
+- `require_soc2_ticket`: If set to true, the SOC2 ticket checker sub-tool will be enabled. Default is false.
 - `soc2_ticket_prompt`: The prompt for the SOC2 ticket review. Default is: `Does the PR description include a link to ticket in a project management system (e.g., Jira, Asana, Trello, etc.) ?`. Edit this field if your compliance requirements are different.
 #### Adding PR labels
 - `enable_review_labels_security`: if set to true, the tool will publish a 'possible security issue' label if it detects a security issue. Default is true.
@@ -103,7 +103,7 @@ The `review` tool provides a collection of possible feedbacks about a PR.
 It is recommended to review the [Configuration options](#configuration-options) section, and choose the relevant options for your use case.
 
 Some of the feature that are disabled by default are quite useful, and should be considered for enabling. For example: 
-`require_score_review`, `require_soc2_review`, `enable_review_labels_effort`, and more.
+`require_score_review`, `require_soc2_ticket`, `enable_review_labels_effort`, and more.
 
 On the other hand, if you find one of the enabled features to be irrelevant for your use case, disable it. No default configuration can fit all use cases.
 
diff --git a/pr_agent/servers/help.py b/pr_agent/servers/help.py
index 505016c2..ce0da4e8 100644
--- a/pr_agent/servers/help.py
+++ b/pr_agent/servers/help.py
@@ -95,7 +95,7 @@ The `review` tool can auto-generate two specific types of labels for a PR:
 The `review` tool provides a collection of possible feedbacks about a PR.
 It is recommended to review the [possible options](https://github.com/Codium-ai/pr-agent/blob/main/docs/REVIEW.md#enabledisable-features), and choose the ones relevant for your use case.
 Some of the feature that are disabled by default are quite useful, and should be considered for enabling. For example: 
-`require_score_review`, `require_soc2_review`, `enable_review_labels_effort`, and more.
+`require_score_review`, `require_soc2_ticket`, `enable_review_labels_effort`, and more.
 """
         output += "\n\n</details></td></tr>\n\n"
 
diff --git a/pr_agent/tools/pr_similar_issue.py b/pr_agent/tools/pr_similar_issue.py
index a38cb13d..485331c0 100644
--- a/pr_agent/tools/pr_similar_issue.py
+++ b/pr_agent/tools/pr_similar_issue.py
@@ -5,7 +5,6 @@ from typing import List
 import openai
 import pandas as pd
 import pinecone
-import lancedb
 from pinecone_datasets import Dataset, DatasetMetadata
 from pydantic import BaseModel, Field
 
@@ -108,6 +107,7 @@ class PRSimilarIssue:
                     get_logger().info('No new issues to update')
         
         elif get_settings().pr_similar_issue.vectordb == "lancedb":
+            import lancedb # import lancedb only if needed
             self.db = lancedb.connect(get_settings().lancedb.uri)
             self.table = None