Merge commit 'deda4baa871d3dcd5b1692beea4d3c30db4f1955' into docs/pr_compression_doc

This commit is contained in:
Hussam.lawen
2023-07-06 17:46:58 +03:00
4 changed files with 26 additions and 23 deletions

View File

@ -159,10 +159,11 @@ Here is a quick overview of the different sub-tools of PR Reviewer:
- PR type classification - PR type classification
- Is the PR covered by relevant tests - Is the PR covered by relevant tests
- Is the PR minimal and focused - Is the PR minimal and focused
- Are there security concerns
- PR Feedback - PR Feedback
- General PR suggestions - General PR suggestions
- Code suggestions - Code suggestions
- Security concerns
This is how a typical output of the PR Reviewer looks like: This is how a typical output of the PR Reviewer looks like:
@ -174,21 +175,23 @@ This is how a typical output of the PR Reviewer looks like:
- 📌 **Type of PR:** Enhancement - 📌 **Type of PR:** Enhancement
- 🧪 **Relevant tests added:** No - 🧪 **Relevant tests added:** No
-**Minimal and focused:** Yes, the PR is focused on adding two new handlers for language extension and token counting. -**Minimal and focused:** Yes, the PR is focused on adding two new handlers for language extension and token counting.
- 🔒 **Security concerns:** No, the PR does not introduce possible security concerns or issues.
#### PR Feedback #### PR Feedback
- 💡 **General PR suggestions:** The PR is generally well-structured and the code is clean. However, it would be beneficial to add some tests to ensure the new handlers work as expected. Also, consider adding docstrings to the new functions and classes to improve code readability and maintainability. - 💡 **General PR suggestions:** The PR is generally well-structured and the code is clean. However, it would be beneficial to add some tests to ensure the new handlers work as expected. Also, consider adding docstrings to the new functions and classes to improve code readability and maintainability.
- 🤖 **Code suggestions:** - 🤖 **Code suggestions:**
- **suggestion 1:**
- **relevant file:** pr_agent/algo/language_handler.py
- **suggestion content:** Consider using a set instead of a list for 'bad_extensions' as checking membership in a set is faster than in a list. [medium]
- **suggestion 2:**
- **relevant file:** pr_agent/algo/language_handler.py - **relevant file:** pr_agent/algo/language_handler.py
- **suggestion content:** In the 'filter_bad_extensions' function, you are splitting the filename on '.' and taking the last element to get the extension. This might not work as expected if the filename contains multiple '.' characters. Consider using 'os.path.splitext' to get the file extension more reliably. [important]
- 🔒 **Security concerns:** No, the PR does not introduce possible security concerns or issues. **suggestion content:** Consider using a set instead of a list for 'bad_extensions' as checking membership in a set is faster than in a list. [medium]
- **relevant file:** pr_agent/algo/language_handler.py
**suggestion content:** In the 'filter_bad_extensions' function, you are splitting the filename on '.' and taking the last element to get the extension. This might not work as expected if the filename contains multiple '.' characters. Consider using 'os.path.splitext' to get the file extension more reliably. [important]
--- ---
@ -231,16 +234,12 @@ This is a comparison of the regular and extended code suggestions modes:
--- ---
Example for regular suggestion: Example for regular suggestion:
- **suggestion 1:**
- **relevant file:** sql.py - **relevant file:** sql.py
- **suggestion content:** Remove hardcoded sensitive information like username and password. Use environment variables or a secure method to store these values. [important] - **suggestion content:** Remove hardcoded sensitive information like username and password. Use environment variables or a secure method to store these values. [important]
--- ---
Example for extended suggestion: Example for extended suggestion:
- **suggestion 1:**
- **relevant file:** sql.py - **relevant file:** sql.py
- **suggestion content:** Remove hardcoded sensitive information (username and password) [important] - **suggestion content:** Remove hardcoded sensitive information (username and password) [important]
- **why:** Hardcoding sensitive information is a security risk. It's better to use environment variables or a secure way to store these values. - **why:** Hardcoding sensitive information is a security risk. It's better to use environment variables or a secure way to store these values.

View File

@ -78,7 +78,7 @@ def pr_generate_extended_diff(pr_languages: list, token_handler: TokenHandler) -
return patches_extended, total_tokens return patches_extended, total_tokens
def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler) -> Tuple(list, list, list): def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler) -> Tuple[list, list, list]:
# Apply Diff Minimization techniques to reduce the number of tokens: # Apply Diff Minimization techniques to reduce the number of tokens:
# 0. Start from the largest diff patch to smaller ones # 0. Start from the largest diff patch to smaller ones
# 1. Don't use extend context lines around diff # 1. Don't use extend context lines around diff
@ -87,8 +87,8 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler) ->
# 4. Minimize all remaining files when you reach token limit # 4. Minimize all remaining files when you reach token limit
patches = [] patches = []
modified_files = [] modified_files_list = []
deleted_files = [] deleted_files_list = []
# sort each one of the languages in top_langs by the number of tokens in the diff # sort each one of the languages in top_langs by the number of tokens in the diff
sorted_files = [] sorted_files = []
for lang in top_langs: for lang in top_langs:
@ -107,27 +107,31 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler) ->
patch = handle_patch_deletions(patch, original_file_content_str, patch = handle_patch_deletions(patch, original_file_content_str,
new_file_content_str, file.filename) new_file_content_str, file.filename)
if patch is None: if patch is None:
if not deleted_files: if not deleted_files_list:
total_tokens += token_handler.count_tokens(DELETED_FILES_) total_tokens += token_handler.count_tokens(DELETED_FILES_)
deleted_files.append(file.filename) deleted_files_list.append(file.filename)
total_tokens += token_handler.count_tokens(file.filename) + 1 total_tokens += token_handler.count_tokens(file.filename) + 1
continue continue
new_patch_tokens = token_handler.count_tokens(patch) new_patch_tokens = token_handler.count_tokens(patch)
# Hard Stop, no more tokens
if total_tokens > token_handler.limit - OUTPUT_BUFFER_TOKENS // 2: if total_tokens > token_handler.limit - OUTPUT_BUFFER_TOKENS // 2:
logging.warning(f"File was fully skipped, no more tokens: {file.filename}.") logging.warning(f"File was fully skipped, no more tokens: {file.filename}.")
continue # Hard Stop, no more tokens continue
# If the patch is too large, just show the file name
if total_tokens + new_patch_tokens > token_handler.limit - OUTPUT_BUFFER_TOKENS: if total_tokens + new_patch_tokens > token_handler.limit - OUTPUT_BUFFER_TOKENS:
# Current logic is to skip the patch if it's too large # Current logic is to skip the patch if it's too large
# TODO: Option for alternative logic to remove hunks from the patch to reduce the number of tokens # TODO: Option for alternative logic to remove hunks from the patch to reduce the number of tokens
# until we meet the requirements # until we meet the requirements
if settings.config.verbosity_level >= 2: if settings.config.verbosity_level >= 2:
logging.warning(f"Patch too large, minimizing it, {file.filename}") logging.warning(f"Patch too large, minimizing it, {file.filename}")
patch = None if not modified_files_list:
if not modified_files:
total_tokens += token_handler.count_tokens(MORE_MODIFIED_FILES_) total_tokens += token_handler.count_tokens(MORE_MODIFIED_FILES_)
modified_files.append(file.filename) modified_files_list.append(file.filename)
total_tokens += token_handler.count_tokens(file.filename) + 1 total_tokens += token_handler.count_tokens(file.filename) + 1
continue
if patch: if patch:
patch_final = f"## {file.filename}\n\n{patch}\n" patch_final = f"## {file.filename}\n\n{patch}\n"
patches.append(patch_final) patches.append(patch_final)
@ -135,7 +139,7 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler) ->
if settings.config.verbosity_level >= 2: if settings.config.verbosity_level >= 2:
logging.info(f"Tokens: {total_tokens}, last filename: {file.filename}") logging.info(f"Tokens: {total_tokens}, last filename: {file.filename}")
return patches, modified_files, deleted_files return patches, modified_files_list, deleted_files_list
def load_large_diff(file, new_file_content_str: str, original_file_content_str: str, patch: str) -> str: def load_large_diff(file, new_file_content_str: str, original_file_content_str: str, patch: str) -> str:

View File

@ -65,7 +65,7 @@ class PRQuestions:
def _prepare_pr_answer(self) -> str: def _prepare_pr_answer(self) -> str:
answer_str = f"Question: {self.question_str}\n\n" answer_str = f"Question: {self.question_str}\n\n"
answer_str += f"Answer: {self.prediction.strip()}\n\n" answer_str += f"Answer:\n{self.prediction.strip()}\n\n"
if settings.config.verbosity_level >= 2: if settings.config.verbosity_level >= 2:
logging.info(f"answer_str:\n{answer_str}") logging.info(f"answer_str:\n{answer_str}")
return answer_str return answer_str

View File

@ -26,7 +26,7 @@ class PRReviewer:
"title": self.git_provider.pr.title, "title": self.git_provider.pr.title,
"branch": self.git_provider.get_pr_branch(), "branch": self.git_provider.get_pr_branch(),
"description": self.git_provider.pr.body, "description": self.git_provider.pr.body,
"language": self.git_provider.get_main_pr_language(), "language": self.main_language,
"diff": "", # empty diff for initial calculation "diff": "", # empty diff for initial calculation
"require_tests": settings.pr_reviewer.require_tests_review, "require_tests": settings.pr_reviewer.require_tests_review,
"require_security": settings.pr_reviewer.require_security_review, "require_security": settings.pr_reviewer.require_security_review,