diff --git a/docs/docs/tools/describe.md b/docs/docs/tools/describe.md index 0f2c45a7..a967a6c5 100644 --- a/docs/docs/tools/describe.md +++ b/docs/docs/tools/describe.md @@ -56,6 +56,21 @@ Everything below this marker is treated as previously auto-generated content and {width=512} +### Sequence Diagram Support +When the `enable_pr_diagram` option is enabled in your configuration, the `/describe` tool will include a `Mermaid` sequence diagram in the PR description. + +This diagram represents interactions between components/functions based on the diff content. + +### How to enable + +In your configuration: + +``` +toml +[pr_description] +enable_pr_diagram = true +``` + ## Configuration options !!! example "Possible configurations" @@ -109,6 +124,10 @@ Everything below this marker is treated as previously auto-generated content and
Mermaid
sequence diagram (in code block format) describing component interactions based on the code changes. Default is false.' + parts[i] + '
'
- return ''.join(parts)
+ return ''.join(parts)
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 5290b749..18f6e383 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
aiohttp==3.9.5
-anthropic>=0.48
+anthropic>=0.52.0
#anthropic[vertex]==0.47.1
atlassian-python-api==3.41.4
azure-devops==7.1.0b3
@@ -13,7 +13,7 @@ google-cloud-aiplatform==1.38.0
google-generativeai==0.8.3
google-cloud-storage==2.10.0
Jinja2==3.1.2
-litellm==1.69.3
+litellm==1.70.4
loguru==0.7.2
msrest==0.7.1
openai>=1.55.3
diff --git a/tests/unittest/test_clip_tokens.py b/tests/unittest/test_clip_tokens.py
index 79de6294..a42ef929 100644
--- a/tests/unittest/test_clip_tokens.py
+++ b/tests/unittest/test_clip_tokens.py
@@ -1,13 +1,302 @@
-
-# Generated by CodiumAI
-
import pytest
-
+from unittest.mock import patch, MagicMock
from pr_agent.algo.utils import clip_tokens
+from pr_agent.algo.token_handler import TokenEncoder
class TestClipTokens:
- def test_clip(self):
+ """Comprehensive test suite for the clip_tokens function."""
+
+ def test_empty_input_text(self):
+ """Test that empty input returns empty string."""
+ assert clip_tokens("", 10) == ""
+ assert clip_tokens(None, 10) is None
+
+ def test_text_under_token_limit(self):
+ """Test that text under the token limit is returned unchanged."""
+ text = "Short text"
+ max_tokens = 100
+ result = clip_tokens(text, max_tokens)
+ assert result == text
+
+ def test_text_exactly_at_token_limit(self):
+ """Test text that is exactly at the token limit."""
+ text = "This is exactly at the limit"
+ # Mock the token encoder to return exact limit
+ with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
+ mock_tokenizer = MagicMock()
+ mock_tokenizer.encode.return_value = [1] * 10 # Exactly 10 tokens
+ mock_encoder.return_value = mock_tokenizer
+
+ result = clip_tokens(text, 10)
+ assert result == text
+
+ def test_text_over_token_limit_with_three_dots(self):
+ """Test text over token limit with three dots addition."""
+ text = "This is a longer text that should be clipped when it exceeds the token limit"
+ max_tokens = 5
+
+ with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
+ mock_tokenizer = MagicMock()
+ mock_tokenizer.encode.return_value = [1] * 20 # 20 tokens
+ mock_encoder.return_value = mock_tokenizer
+
+ result = clip_tokens(text, max_tokens)
+ assert result.endswith("\n...(truncated)")
+ assert len(result) < len(text)
+
+ def test_text_over_token_limit_without_three_dots(self):
+ """Test text over token limit without three dots addition."""
+ text = "This is a longer text that should be clipped"
+ max_tokens = 5
+
+ with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
+ mock_tokenizer = MagicMock()
+ mock_tokenizer.encode.return_value = [1] * 20 # 20 tokens
+ mock_encoder.return_value = mock_tokenizer
+
+ result = clip_tokens(text, max_tokens, add_three_dots=False)
+ assert not result.endswith("\n...(truncated)")
+ assert len(result) < len(text)
+
+ def test_negative_max_tokens(self):
+ """Test that negative max_tokens returns empty string."""
+ text = "Some text"
+ result = clip_tokens(text, -1)
+ assert result == ""
+
+ result = clip_tokens(text, -100)
+ assert result == ""
+
+ def test_zero_max_tokens(self):
+ """Test that zero max_tokens returns empty string."""
+ text = "Some text"
+ result = clip_tokens(text, 0)
+ assert result == ""
+
+ def test_delete_last_line_functionality(self):
+ """Test the delete_last_line parameter functionality."""
+ text = "Line 1\nLine 2\nLine 3\nLine 4"
+ max_tokens = 5
+
+ with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
+ mock_tokenizer = MagicMock()
+ mock_tokenizer.encode.return_value = [1] * 20 # 20 tokens
+ mock_encoder.return_value = mock_tokenizer
+
+ # Without delete_last_line
+ result_normal = clip_tokens(text, max_tokens, delete_last_line=False)
+
+ # With delete_last_line
+ result_deleted = clip_tokens(text, max_tokens, delete_last_line=True)
+
+ # The result with delete_last_line should be shorter or equal
+ assert len(result_deleted) <= len(result_normal)
+
+ def test_pre_computed_num_input_tokens(self):
+ """Test using pre-computed num_input_tokens parameter."""
+ text = "This is a test text"
+ max_tokens = 10
+ num_input_tokens = 15
+
+ # Should not call the encoder when num_input_tokens is provided
+ with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
+ mock_encoder.return_value = None # Should not be called
+
+ result = clip_tokens(text, max_tokens, num_input_tokens=num_input_tokens)
+ assert result.endswith("\n...(truncated)")
+ mock_encoder.assert_not_called()
+
+ def test_pre_computed_tokens_under_limit(self):
+ """Test pre-computed tokens under the limit."""
+ text = "Short text"
+ max_tokens = 20
+ num_input_tokens = 5
+
+ with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
+ mock_encoder.return_value = None # Should not be called
+
+ result = clip_tokens(text, max_tokens, num_input_tokens=num_input_tokens)
+ assert result == text
+ mock_encoder.assert_not_called()
+
+ def test_special_characters_and_unicode(self):
+ """Test text with special characters and Unicode content."""
+ text = "Special chars: @#$%^&*()_+ áéÃóú ä¸ë¬¸ 🚀 emoji"
+ max_tokens = 5
+
+ with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
+ mock_tokenizer = MagicMock()
+ mock_tokenizer.encode.return_value = [1] * 20 # 20 tokens
+ mock_encoder.return_value = mock_tokenizer
+
+ result = clip_tokens(text, max_tokens)
+ assert isinstance(result, str)
+ assert len(result) < len(text)
+
+ def test_multiline_text_handling(self):
+ """Test handling of multiline text."""
+ text = "Line 1\nLine 2\nLine 3\nLine 4\nLine 5"
+ max_tokens = 5
+
+ with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
+ mock_tokenizer = MagicMock()
+ mock_tokenizer.encode.return_value = [1] * 20 # 20 tokens
+ mock_encoder.return_value = mock_tokenizer
+
+ result = clip_tokens(text, max_tokens)
+ assert isinstance(result, str)
+
+ def test_very_long_text(self):
+ """Test with very long text."""
+ text = "A" * 10000 # Very long text
+ max_tokens = 10
+
+ with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
+ mock_tokenizer = MagicMock()
+ mock_tokenizer.encode.return_value = [1] * 5000 # Many tokens
+ mock_encoder.return_value = mock_tokenizer
+
+ result = clip_tokens(text, max_tokens)
+ assert len(result) < len(text)
+ assert result.endswith("\n...(truncated)")
+
+ def test_encoder_exception_handling(self):
+ """Test handling of encoder exceptions."""
+ text = "Test text"
+ max_tokens = 10
+
+ with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
+ mock_encoder.side_effect = Exception("Encoder error")
+
+ # Should return original text when encoder fails
+ result = clip_tokens(text, max_tokens)
+ assert result == text
+
+ def test_zero_division_scenario(self):
+ """Test scenario that could lead to division by zero."""
+ text = "Test"
+ max_tokens = 10
+
+ with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
+ mock_tokenizer = MagicMock()
+ mock_tokenizer.encode.return_value = [] # Empty tokens (could cause division by zero)
+ mock_encoder.return_value = mock_tokenizer
+
+ result = clip_tokens(text, max_tokens)
+ # Should handle gracefully and return original text
+ assert result == text
+
+ def test_various_edge_cases(self):
+ """Test various edge cases."""
+ # Single character
+ assert clip_tokens("A", 1000) == "A"
+
+ # Only whitespace
+ text = " \n \t "
+ with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
+ mock_tokenizer = MagicMock()
+ mock_tokenizer.encode.return_value = [1] * 10
+ mock_encoder.return_value = mock_tokenizer
+
+ result = clip_tokens(text, 5)
+ assert isinstance(result, str)
+
+ # Text with only newlines
+ text = "\n\n\n\n"
+ with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
+ mock_tokenizer = MagicMock()
+ mock_tokenizer.encode.return_value = [1] * 10
+ mock_encoder.return_value = mock_tokenizer
+
+ result = clip_tokens(text, 2, delete_last_line=True)
+ assert isinstance(result, str)
+
+ def test_parameter_combinations(self):
+ """Test different parameter combinations."""
+ text = "Multi\nline\ntext\nfor\ntesting"
+ max_tokens = 5
+
+ with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
+ mock_tokenizer = MagicMock()
+ mock_tokenizer.encode.return_value = [1] * 20
+ mock_encoder.return_value = mock_tokenizer
+
+ # Test all combinations
+ combinations = [
+ (True, True), # add_three_dots=True, delete_last_line=True
+ (True, False), # add_three_dots=True, delete_last_line=False
+ (False, True), # add_three_dots=False, delete_last_line=True
+ (False, False), # add_three_dots=False, delete_last_line=False
+ ]
+
+ for add_dots, delete_line in combinations:
+ result = clip_tokens(text, max_tokens,
+ add_three_dots=add_dots,
+ delete_last_line=delete_line)
+ assert isinstance(result, str)
+ if add_dots and len(result) > 0:
+ assert result.endswith("\n...(truncated)") or result == text
+
+ def test_num_output_chars_zero_scenario(self):
+ """Test scenario where num_output_chars becomes zero or negative."""
+ text = "Short"
+ max_tokens = 1
+
+ with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
+ mock_tokenizer = MagicMock()
+ mock_tokenizer.encode.return_value = [1] * 1000 # Many tokens for short text
+ mock_encoder.return_value = mock_tokenizer
+
+ result = clip_tokens(text, max_tokens)
+ # When num_output_chars is 0 or negative, should return empty string
+ assert result == ""
+
+ def test_logging_on_exception(self):
+ """Test that exceptions are properly logged."""
+ text = "Test text"
+ max_tokens = 10
+
+ # Patch the logger at the module level where it's imported
+ with patch('pr_agent.algo.utils.get_logger') as mock_logger:
+ mock_log_instance = MagicMock()
+ mock_logger.return_value = mock_log_instance
+
+ with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
+ mock_encoder.side_effect = Exception("Test exception")
+
+ result = clip_tokens(text, max_tokens)
+
+ # Should log the warning
+ mock_log_instance.warning.assert_called_once()
+ # Should return original text
+ assert result == text
+
+ def test_factor_safety_calculation(self):
+ """Test that the 0.9 factor (10% reduction) works correctly."""
+ text = "Test text that should be reduced by 10 percent for safety"
+ max_tokens = 10
+
+ with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
+ mock_tokenizer = MagicMock()
+ mock_tokenizer.encode.return_value = [1] * 20 # 20 tokens
+ mock_encoder.return_value = mock_tokenizer
+
+ result = clip_tokens(text, max_tokens)
+
+ # The result should be shorter due to the 0.9 factor
+ # Characters per token = len(text) / 20
+ # Expected chars = int(0.9 * (len(text) / 20) * 10)
+ expected_chars = int(0.9 * (len(text) / 20) * 10)
+
+ # Result should be around expected_chars length (plus truncation text)
+ if result.endswith("\n...(truncated)"):
+ actual_content = result[:-len("\n...(truncated)")]
+ assert len(actual_content) <= expected_chars + 5 # Some tolerance
+
+ # Test the original basic functionality to ensure backward compatibility
+ def test_clip_original_functionality(self):
+ """Test original functionality from the existing test."""
text = "line1\nline2\nline3\nline4\nline5\nline6"
max_tokens = 25
result = clip_tokens(text, max_tokens)
@@ -16,4 +305,4 @@ class TestClipTokens:
max_tokens = 10
result = clip_tokens(text, max_tokens)
expected_results = 'line1\nline2\nline3\n\n...(truncated)'
- assert result == expected_results
+ assert result == expected_results
\ No newline at end of file
diff --git a/tests/unittest/test_try_fix_yaml.py b/tests/unittest/test_try_fix_yaml.py
index 826d7312..98773c81 100644
--- a/tests/unittest/test_try_fix_yaml.py
+++ b/tests/unittest/test_try_fix_yaml.py
@@ -53,12 +53,12 @@ code_suggestions:
- relevant_file: |
src/index2.ts
label: |
- enhancment
+ enhancement
```
We can further improve the code by using the `const` keyword instead of `var` in the `src/index.ts` file.
'''
- expected_output = {'code_suggestions': [{'relevant_file': 'src/index.ts\n', 'label': 'best practice\n'}, {'relevant_file': 'src/index2.ts\n', 'label': 'enhancment'}]}
+ expected_output = {'code_suggestions': [{'relevant_file': 'src/index.ts\n', 'label': 'best practice\n'}, {'relevant_file': 'src/index2.ts\n', 'label': 'enhancement'}]}
assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='label') == expected_output
@@ -76,10 +76,178 @@ code_suggestions:
- relevant_file: |
src/index2.ts
label: |
- enhancment
+ enhancement
```
We can further improve the code by using the `const` keyword instead of `var` in the `src/index.ts` file.
'''
- expected_output = {'code_suggestions': [{'relevant_file': 'src/index.ts\n', 'label': 'best practice\n'}, {'relevant_file': 'src/index2.ts\n', 'label': 'enhancment'}]}
+ expected_output = {'code_suggestions': [{'relevant_file': 'src/index.ts\n', 'label': 'best practice\n'}, {'relevant_file': 'src/index2.ts\n', 'label': 'enhancement'}]}
assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='label') == expected_output
+
+
+ def test_with_brackets_yaml_content(self):
+ review_text = '''\
+{
+code_suggestions:
+- relevant_file: |
+ src/index.ts
+ label: |
+ best practice
+
+- relevant_file: |
+ src/index2.ts
+ label: |
+ enhancement
+}
+'''
+ expected_output = {'code_suggestions': [{'relevant_file': 'src/index.ts\n', 'label': 'best practice\n'}, {'relevant_file': 'src/index2.ts\n', 'label': 'enhancement'}]}
+ assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='label') == expected_output
+
+ def test_tab_indent_yaml(self):
+ review_text = '''\
+code_suggestions:
+- relevant_file: |
+ src/index.ts
+ label: |
+\tbest practice
+
+- relevant_file: |
+ src/index2.ts
+ label: |
+ enhancement
+'''
+ expected_output = {'code_suggestions': [{'relevant_file': 'src/index.ts\n', 'label': 'best practice\n'}, {'relevant_file': 'src/index2.ts\n', 'label': 'enhancement\n'}]}
+ assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='label') == expected_output
+
+
+ def test_leading_plus_mark_code(self):
+ review_text = '''\
+code_suggestions:
+- relevant_file: |
+ src/index.ts
+ label: |
+ best practice
+ existing_code: |
++ var router = createBrowserRouter([
+ improved_code: |
++ const router = createBrowserRouter([
+'''
+ expected_output = {'code_suggestions': [{
+ 'relevant_file': 'src/index.ts\n',
+ 'label': 'best practice\n',
+ 'existing_code': 'var router = createBrowserRouter([\n',
+ 'improved_code': 'const router = createBrowserRouter([\n'
+ }]}
+ assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='improved_code') == expected_output
+
+
+ def test_inconsistent_indentation_in_block_scalar_yaml(self):
+ """
+ This test case represents a situation where the AI outputs the opening '{' with 5 spaces
+ (resulting in an inferred indent level of 5), while the closing '}' is output with only 4 spaces.
+ This inconsistency makes it impossible for the YAML parser to automatically determine the correct
+ indent level, causing a parsing failure.
+
+ The root cause may be the LLM miscounting spaces or misunderstanding the active block scalar context
+ while generating YAML output.
+ """
+
+ review_text = '''\
+code_suggestions:
+- relevant_file: |
+ tsconfig.json
+ existing_code: |
+ {
+ "key1": "value1",
+ "key2": {
+ "subkey": "value"
+ }
+ }
+'''
+ expected_json = '''\
+ {
+ "key1": "value1",
+ "key2": {
+ "subkey": "value"
+ }
+}
+'''
+ expected_output = {
+ 'code_suggestions': [{
+ 'relevant_file': 'tsconfig.json\n',
+ 'existing_code': expected_json
+ }]
+ }
+ assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='existing_code') == expected_output
+
+
+ def test_inconsistent_and_insufficient_indentation_in_block_scalar_yaml(self):
+ """
+ This test case reproduces a YAML parsing failure where the block scalar content
+ generated by the AI includes inconsistent and insufficient indentation levels.
+
+ The root cause may be the LLM miscounting spaces or misunderstanding the active block scalar context
+ while generating YAML output.
+ """
+
+ review_text = '''\
+code_suggestions:
+- relevant_file: |
+ tsconfig.json
+ existing_code: |
+ {
+ "key1": "value1",
+ "key2": {
+ "subkey": "value"
+ }
+ }
+'''
+ expected_json = '''\
+{
+ "key1": "value1",
+ "key2": {
+ "subkey": "value"
+ }
+}
+'''
+ expected_output = {
+ 'code_suggestions': [{
+ 'relevant_file': 'tsconfig.json\n',
+ 'existing_code': expected_json
+ }]
+ }
+ assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='existing_code') == expected_output
+
+
+ def test_wrong_indentation_code_block_scalar(self):
+ review_text = '''\
+code_suggestions:
+- relevant_file: |
+ a.c
+ existing_code: |
+ int sum(int a, int b) {
+ return a + b;
+ }
+
+ int sub(int a, int b) {
+ return a - b;
+ }
+'''
+ expected_code_block = '''\
+int sum(int a, int b) {
+ return a + b;
+}
+
+int sub(int a, int b) {
+ return a - b;
+}
+'''
+ expected_output = {
+ "code_suggestions": [
+ {
+ "relevant_file": "a.c\n",
+ "existing_code": expected_code_block
+ }
+ ]
+ }
+ assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='existing_code') == expected_output