Compare commits

...

18 Commits

Author SHA1 Message Date
613ccb4c34 Add support for base_url in GitHub SDK 2023-11-21 16:48:36 +02:00
e95a6a8b07 Merge pull request #466 from Codium-ai/ok/gitlab_fix
Fix a bug in GitLab webhook
2023-11-21 16:36:40 +02:00
2add584fbc Fix a bug in GitLab webhook 2023-11-21 16:28:01 +02:00
54d7d59177 Update Usage.md 2023-11-20 20:06:07 +02:00
b3129c7dd9 Merge pull request #464 from Codium-ai/tr/more_protections
Refactor YAML parsing for improved error handling
2023-11-20 02:28:57 -08:00
3f76d95495 ScannerError 2023-11-20 10:35:35 +02:00
1b600cd85f Refactor YAML parsing for improved error handling 2023-11-20 10:30:59 +02:00
26cc26129c Merge pull request #463 from Codium-ai/tr/more_protections
minor fix
2023-11-19 07:45:39 -08:00
d1d7903e39 minor fix 2023-11-19 17:44:11 +02:00
dff4d1befc Merge pull request #462 from Codium-ai/tr/more_protections
Enhancements in YAML Parsing and Error Handling
2023-11-19 07:40:06 -08:00
3504a64269 protections 2023-11-19 17:35:40 +02:00
83247cadec protections 2023-11-19 17:30:57 +02:00
5ca1748b93 Merge pull request #460 from Codium-ai/tr/update_instructions
GFM mode for 'review' instructions
2023-11-19 01:20:36 -08:00
c7a681038d gfm instructions 2023-11-19 11:11:11 +02:00
eb977b4c24 gfm instructions 2023-11-19 11:02:11 +02:00
14a934b146 Update Usage.md 2023-11-17 10:41:52 +02:00
bc28d657b2 Merge pull request #438 from koid/fix/remove-unnecessary-setup
Removal of Redundant Logger Setup
2023-11-15 10:35:13 -08:00
f0991526b5 remove unnecessary setup_logger 2023-11-08 16:56:44 +09:00
12 changed files with 79 additions and 52 deletions

View File

@ -101,6 +101,7 @@ python3 -m pr_agent.cli --pr_url <pr_url> ask <your question>
python3 -m pr_agent.cli --pr_url <pr_url> describe
python3 -m pr_agent.cli --pr_url <pr_url> improve
python3 -m pr_agent.cli --pr_url <pr_url> add_docs
python3 -m pr_agent.cli --pr_url <pr_url> generate_labels
python3 -m pr_agent.cli --issue_url <issue_url> similar_issue
...
```

View File

@ -37,7 +37,9 @@ To ignore files or directories, edit the **[ignore.toml](/pr_agent/settings/igno
- `IGNORE.GLOB`
- `IGNORE.REGEX`
See [dynaconf envvars documentation](https://www.dynaconf.com/envvars/).
For example, to ignore python files in a PR, set:
`ignore.glob = ['*.py']`
#### git provider
The [git_provider](pr_agent/settings/configuration.toml#L4) field in the configuration file determines the GIT provider that will be used by PR-Agent. Currently, the following providers are supported:
@ -310,6 +312,7 @@ To use Google's Vertex AI platform and its associated models (chat-bison/codecha
```
[config] # in configuration.toml
model = "vertex_ai/codechat-bison"
fallback_models="vertex_ai/codechat-bison"
[vertexai] # in .secrets.toml
vertex_project = "my-google-cloud-project"

View File

@ -286,37 +286,39 @@ def _fix_key_value(key: str, value: str):
return key, value
def load_yaml(review_text: str) -> dict:
review_text = review_text.removeprefix('```yaml').rstrip('`')
def load_yaml(response_text: str) -> dict:
response_text = response_text.removeprefix('```yaml').rstrip('`')
try:
data = yaml.safe_load(review_text)
data = yaml.safe_load(response_text)
except Exception as e:
get_logger().error(f"Failed to parse AI prediction: {e}")
data = try_fix_yaml(review_text)
data = try_fix_yaml(response_text)
return data
def try_fix_yaml(review_text: str) -> dict:
review_text_lines = review_text.split('\n')
def try_fix_yaml(response_text: str) -> dict:
response_text_lines = response_text.split('\n')
keys = ['relevant line:', 'suggestion content:', 'relevant file:']
# first fallback - try to convert 'relevant line: ...' to relevant line: |-\n ...'
review_text_lines_copy = review_text_lines.copy()
for i in range(0, len(review_text_lines_copy)):
if 'relevant line:' in review_text_lines_copy[i] and not '|-' in review_text_lines_copy[i]:
review_text_lines_copy[i] = review_text_lines_copy[i].replace('relevant line: ',
'relevant line: |-\n ')
response_text_lines_copy = response_text_lines.copy()
for i in range(0, len(response_text_lines_copy)):
for key in keys:
if key in response_text_lines_copy[i] and not '|-' in response_text_lines_copy[i]:
response_text_lines_copy[i] = response_text_lines_copy[i].replace(f'{key}',
f'{key} |-\n ')
try:
data = yaml.load('\n'.join(review_text_lines_copy), Loader=yaml.SafeLoader)
get_logger().info(f"Successfully parsed AI prediction after adding |-\n to relevant line")
data = yaml.safe_load('\n'.join(response_text_lines_copy))
get_logger().info(f"Successfully parsed AI prediction after adding |-\n")
return data
except:
get_logger().debug(f"Failed to parse AI prediction after adding |-\n to relevant line")
get_logger().info(f"Failed to parse AI prediction after adding |-\n")
# second fallback - try to remove last lines
data = {}
for i in range(1, len(review_text_lines)):
review_text_lines_tmp = '\n'.join(review_text_lines[:-i])
for i in range(1, len(response_text_lines)):
response_text_lines_tmp = '\n'.join(response_text_lines[:-i])
try:
data = yaml.load(review_text_lines_tmp, Loader=yaml.SafeLoader)
data = yaml.safe_load(response_text_lines_tmp,)
get_logger().info(f"Successfully parsed AI prediction after removing {i} lines")
break
except:

View File

@ -405,7 +405,7 @@ class GithubProvider(GitProvider):
raise ValueError("GitHub app installation ID is required when using GitHub app deployment")
auth = AppAuthentication(app_id=app_id, private_key=private_key,
installation_id=self.installation_id)
return Github(app_auth=auth)
return Github(app_auth=auth, base_url=get_settings().github.base_url)
if deployment_type == 'user':
try:
@ -414,7 +414,7 @@ class GithubProvider(GitProvider):
raise ValueError(
"GitHub token is required when using user deployment. See: "
"https://github.com/Codium-ai/pr-agent#method-2-run-from-source") from e
return Github(auth=Auth.Token(token))
return Github(auth=Auth.Token(token), base_url=get_settings().github.base_url)
def _get_repo(self):
if hasattr(self, 'repo_obj') and \

View File

@ -38,7 +38,7 @@ async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request):
try:
secret_dict = json.loads(secret)
gitlab_token = secret_dict["gitlab_token"]
log_context["sender"] = secret_dict["id"]
log_context["sender"] = secret_dict.get("token_name", secret_dict.get("id", "unknown"))
context["settings"] = copy.deepcopy(global_settings)
context["settings"].gitlab.personal_access_token = gitlab_token
except Exception as e:

View File

@ -1,12 +1,14 @@
commands_text = "> **/review [-i]**: Request a review of your Pull Request. For an incremental review, which only " \
"considers changes since the last review, include the '-i' option.\n" \
"> **/describe**: Modify the PR title and description based on the contents of the PR.\n" \
"> **/improve [--extended]**: Suggest improvements to the code in the PR. Extended mode employs several calls, and provides a more thorough feedback. \n" \
"> **/ask \\<QUESTION\\>**: Pose a question about the PR.\n" \
"> **/update_changelog**: Update the changelog based on the PR's contents.\n\n" \
">To edit any configuration parameter from **configuration.toml**, add --config_path=new_value\n" \
commands_text = "> **/review**: Request a review of your Pull Request.\n" \
"> **/describe**: Update the PR title and description based on the contents of the PR.\n" \
"> **/improve [--extended]**: Suggest code improvements. Extended mode provides a higher quality feedback.\n" \
"> **/ask \\<QUESTION\\>**: Ask a question about the PR.\n" \
"> **/update_changelog**: Update the changelog based on the PR's contents.\n" \
"> **/add_docs**: Generate docstring for new components introduced in the PR.\n" \
"> **/generate_labels**: Generate labels for the PR based on the PR's contents.\n" \
"> see the [tools guide](https://github.com/Codium-ai/pr-agent/blob/main/docs/TOOLS_GUIDE.md) for more details.\n\n" \
">To edit any configuration parameter from the [configuration.toml](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml), add --config_path=new_value.\n" \
">For example: /review --pr_reviewer.extra_instructions=\"focus on the file: ...\" \n" \
">To list the possible configuration parameters, use the **/config** command.\n" \
">To list the possible configuration parameters, add a **/config** comment.\n" \
def bot_help_text(user: str):

View File

@ -3,10 +3,8 @@ from mangum import Mangum
from starlette.middleware import Middleware
from starlette_context.middleware import RawContextMiddleware
from pr_agent.log import setup_logger
from pr_agent.servers.github_app import router
setup_logger()
middleware = [Middleware(RawContextMiddleware)]
app = FastAPI(middleware=middleware)

View File

@ -79,6 +79,7 @@ extra_instructions = ""
# The type of deployment to create. Valid values are 'app' or 'user'.
deployment_type = "user"
ratelimit_retries = 5
base_url = "https://api.github.com"
[github_action]
# auto_review = true # set as env var in .github/workflows/pr-agent.yaml

View File

@ -90,16 +90,19 @@ Code suggestions:
Example output:
```yaml
Code suggestions:
- relevant file: |-
- relevant file: |-
src/file1.py
suggestion content: |-
Add a docstring to func1()
existing code: |-
def func1():
relevant lines start: 12
relevant lines end: 12
relevant lines start: |-
12
relevant lines end: |-
12
improved code: |-
...
...
```

View File

@ -249,11 +249,15 @@ class PRReviewer:
# Add help text if not in CLI mode
if not get_settings().get("CONFIG.CLI_MODE", False):
markdown_text += "\n### How to use\n"
if self.git_provider.is_supported("gfm_markdown"):
markdown_text += "\n**<details><summary> Instructions**</summary>\n"
bot_user = "[bot]" if get_settings().github_app.override_deployment_type else get_settings().github_app.bot_user
if user and bot_user not in user:
markdown_text += bot_help_text(user)
else:
markdown_text += actions_help_text
if self.git_provider.is_supported("gfm_markdown"):
markdown_text += "\n</details>\n"
# Add custom labels from the review prediction (effort, security)
self.set_review_labels(data)
@ -274,14 +278,7 @@ class PRReviewer:
if get_settings().pr_reviewer.num_code_suggestions == 0:
return
review_text = self.prediction.strip()
review_text = review_text.removeprefix('```yaml').rstrip('`')
try:
data = yaml.load(review_text, Loader=SafeLoader)
except Exception as e:
get_logger().error(f"Failed to parse AI prediction: {e}")
data = try_fix_yaml(review_text)
data = load_yaml(self.prediction.strip())
comments: List[str] = []
for suggestion in data.get('PR Feedback', {}).get('Code feedback', []):
relevant_file = suggestion.get('relevant file', '').strip()

View File

@ -8,6 +8,7 @@ import pinecone
from pinecone_datasets import Dataset, DatasetMetadata
from pydantic import BaseModel, Field
from pr_agent.algo import MAX_TOKENS
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import get_max_tokens
from pr_agent.config_loader import get_settings

View File

@ -2,6 +2,9 @@
# Generated by CodiumAI
import pytest
import yaml
from yaml.scanner import ScannerError
from pr_agent.algo.utils import load_yaml
@ -12,7 +15,7 @@ class TestLoadYaml:
expected_output = {'name': 'John Smith', 'age': 35}
assert load_yaml(yaml_str) == expected_output
def test_load_complicated_yaml(self):
def test_load_invalid_yaml1(self):
yaml_str = \
'''\
PR Analysis:
@ -26,7 +29,23 @@ PR Feedback:
Code feedback:
- relevant file: pr_agent/settings/pr_description_prompts.toml
suggestion: Consider using a more descriptive variable name than 'user' for the command prompt. A more descriptive name would make the code more readable and maintainable. [medium]
relevant line: 'user="""PR Info:'
relevant line: user="""PR Info: aaa
Security concerns: No'''
expected_output = {'PR Analysis': {'Main theme': 'Enhancing the `/describe` command prompt by adding title and description', 'Type of PR': 'Enhancement', 'Relevant tests added': False, 'Focused PR': 'Yes, the PR is focused on enhancing the `/describe` command prompt.'}, 'PR Feedback': {'General suggestions': 'The PR seems to be well-structured and focused on a specific enhancement. However, it would be beneficial to add tests to ensure the new feature works as expected.', 'Code feedback': [{'relevant file': 'pr_agent/settings/pr_description_prompts.toml', 'suggestion': "Consider using a more descriptive variable name than 'user' for the command prompt. A more descriptive name would make the code more readable and maintainable. [medium]", 'relevant line': 'user="""PR Info:'}], 'Security concerns': False}}
with pytest.raises(ScannerError):
yaml.safe_load(yaml_str)
expected_output = {'PR Analysis': {'Main theme': 'Enhancing the `/describe` command prompt by adding title and description', 'Type of PR': 'Enhancement', 'Relevant tests added': False, 'Focused PR': 'Yes, the PR is focused on enhancing the `/describe` command prompt.'}, 'PR Feedback': {'General suggestions': 'The PR seems to be well-structured and focused on a specific enhancement. However, it would be beneficial to add tests to ensure the new feature works as expected.', 'Code feedback': [{'relevant file': 'pr_agent/settings/pr_description_prompts.toml', 'suggestion': "Consider using a more descriptive variable name than 'user' for the command prompt. A more descriptive name would make the code more readable and maintainable. [medium]", 'relevant line': 'user="""PR Info: aaa'}], 'Security concerns': False}}
assert load_yaml(yaml_str) == expected_output
def test_load_invalid_yaml2(self):
yaml_str = '''\
- relevant file: src/app.py:
suggestion content: The print statement is outside inside the if __name__ ==: \
'''
with pytest.raises(ScannerError):
yaml.safe_load(yaml_str)
expected_output =[{'relevant file': 'src/app.py:',
'suggestion content': 'The print statement is outside inside the if __name__ ==: '}]
assert load_yaml(yaml_str) == expected_output