diff --git a/Usage.md b/Usage.md index 6176eaf0..e80fea7b 100644 --- a/Usage.md +++ b/Usage.md @@ -261,6 +261,30 @@ All PR-Agent tools have a parameter called `extra_instructions`, that enables to /update_changelog --pr_update_changelog.extra_instructions="Make sure to update also the version ..." ``` +#### Patch Extra Lines +By default, around any change in your PR, git patch provides 3 lines of context above and below the change. +``` +@@ -12,5 +12,5 @@ def func1(): + code line that already existed in the file... + code line that already existed in the file... + code line that already existed in the file.... +-code line that was removed in the PR ++new code line added in the PR + code line that already existed in the file... + code line that already existed in the file... + code line that already existed in the file... +``` + +For the `review`, `describe`, `ask` and `add_docs` tools, if the token budget allows, PR-Agent tries to increase the number of lines of context, via the parameter: +``` +[config] +patch_extra_lines=3 +``` + +Increasing this number provides more context to the model, but will also increase the token budget. +If the PR is too large (see [PR Compression strategy](./PR_COMPRESSION.md)), PR-Agent automatically sets this number to 0, using the original git patch. + + #### Azure DevOps provider To use Azure DevOps provider use the following settings in configuration.toml: ``` diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index 1c34e603..4d717202 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -21,7 +21,6 @@ MORE_MODIFIED_FILES_ = "More modified files:\n" OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD = 1000 OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD = 600 -PATCH_EXTRA_LINES = 3 def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: str, add_line_numbers_to_hunks: bool = False, disable_extra_lines: bool = False) -> str: @@ -44,8 +43,9 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: s """ if disable_extra_lines: - global PATCH_EXTRA_LINES PATCH_EXTRA_LINES = 0 + else: + PATCH_EXTRA_LINES = get_settings().config.patch_extra_lines try: diff_files = git_provider.get_diff_files() @@ -57,8 +57,8 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: s pr_languages = sort_files_by_main_languages(git_provider.get_languages(), diff_files) # generate a standard diff string, with patch extension - patches_extended, total_tokens, patches_extended_tokens = pr_generate_extended_diff(pr_languages, token_handler, - add_line_numbers_to_hunks) + patches_extended, total_tokens, patches_extended_tokens = pr_generate_extended_diff( + pr_languages, token_handler, add_line_numbers_to_hunks, patch_extra_lines=PATCH_EXTRA_LINES) # if we are under the limit, return the full diff if total_tokens + OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD < MAX_TOKENS[model]: @@ -80,7 +80,8 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: s def pr_generate_extended_diff(pr_languages: list, token_handler: TokenHandler, - add_line_numbers_to_hunks: bool) -> Tuple[list, int, list]: + add_line_numbers_to_hunks: bool, + patch_extra_lines: int = 0) -> Tuple[list, int, list]: """ Generate a standard diff string with patch extension, while counting the number of tokens used and applying diff minimization techniques if needed. @@ -102,7 +103,7 @@ def pr_generate_extended_diff(pr_languages: list, continue # extend each patch with extra lines of context - extended_patch = extend_patch(original_file_content_str, patch, num_lines=PATCH_EXTRA_LINES) + extended_patch = extend_patch(original_file_content_str, patch, num_lines=patch_extra_lines) full_extended_patch = f"\n\n## {file.filename}\n\n{extended_patch}\n" if add_line_numbers_to_hunks: diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 187ded56..9a03055f 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -10,6 +10,7 @@ use_repo_settings_file=true ai_timeout=180 max_description_tokens = 500 max_commits_tokens = 500 +patch_extra_lines = 3 secret_provider="google_cloud_storage" cli_mode=false diff --git a/pr_agent/settings/pr_add_docs.toml b/pr_agent/settings/pr_add_docs.toml index b552ec86..31b7195c 100644 --- a/pr_agent/settings/pr_add_docs.toml +++ b/pr_agent/settings/pr_add_docs.toml @@ -42,7 +42,9 @@ Specific instructions: {%- if extra_instructions %} Extra instructions from the user: +' {{ extra_instructions }} +' {%- endif %} You must use the following YAML schema to format your answer: diff --git a/pr_agent/settings/pr_code_suggestions_prompts.toml b/pr_agent/settings/pr_code_suggestions_prompts.toml index 68083945..a3eb93a1 100644 --- a/pr_agent/settings/pr_code_suggestions_prompts.toml +++ b/pr_agent/settings/pr_code_suggestions_prompts.toml @@ -1,6 +1,6 @@ [pr_code_suggestions_prompt] system="""You are a language model called PR-Code-Reviewer, that specializes in suggesting code improvements for Pull Request (PR). -Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR. +Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR (the '+' lines in the diff). Example for a PR Diff input: ' @@ -31,14 +31,13 @@ __old hunk__ ' Specific instructions: -- Provide up to {{ num_code_suggestions }} code suggestions. +- Provide up to {{ num_code_suggestions }} code suggestions. Try to provide diverse and insightful suggestions. - Prioritize suggestions that address major problems, issues and bugs in the code. As a second priority, suggestions should focus on best practices, code readability, maintainability, enhancments, performance, and other aspects. - Don't suggest to add docstring, type hints, or comments. - Try to provide diverse and insightful suggestions. +- Don't suggest to add docstring, type hints, or comments. - Suggestions should refer only to code from the '__new hunk__' sections, and focus on new lines of code (lines starting with '+'). - Avoid making suggestions that have already been implemented in the PR code. For example, if you want to add logs, or change a variable to const, or anything else, make sure it isn't already in the '__new hunk__' code. - For each suggestion, make sure to take into consideration also the context, meaning the lines before and after the relevant code. +- Avoid making suggestions that have already been implemented in the PR code. For example, if you want to add logs, or change a variable to const, or anything else, make sure it isn't already in the '__new hunk__' code. +- For each suggestion, make sure to take into consideration also the context, meaning the lines before and after the relevant code. - Provide the exact line numbers range (inclusive) for each issue. - Assume there is additional relevant code, that is not included in the diff. @@ -46,7 +45,9 @@ Specific instructions: {%- if extra_instructions %} Extra instructions from the user: +' {{ extra_instructions }} +' {%- endif %} You must use the following YAML schema to format your answer: diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml index 43dd8e3b..c2c8e654 100644 --- a/pr_agent/settings/pr_description_prompts.toml +++ b/pr_agent/settings/pr_description_prompts.toml @@ -7,7 +7,9 @@ Your task is to provide full description of the PR content. {%- if extra_instructions %} Extra instructions from the user: +' {{ extra_instructions }} +' {% endif %} You must use the following YAML schema to format your answer: diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index c0599e50..657027af 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -35,7 +35,9 @@ The review should focus on new code added in the PR (lines starting with '+'), a {%- if extra_instructions %} Extra instructions from the user: +' {{ extra_instructions }} +' {% endif %} You must use the following YAML schema to format your answer: @@ -129,8 +131,7 @@ PR Feedback: Security concerns: type: string description: >- - yes\\no question: does this PR code introduce possible security concerns or - issues, like SQL injection, XSS, CSRF, and others ? If answered 'yes',explain your answer shortly + yes\\no question: does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? If answered 'yes', explain your answer briefly. {%- endif %} ``` @@ -196,7 +197,9 @@ Here are questions to better understand the PR. Use the answers to provide bette {{question_str|trim}} User answers: +' {{answer_str|trim}} +' ###### {%- endif %} diff --git a/pr_agent/settings/pr_update_changelog_prompts.toml b/pr_agent/settings/pr_update_changelog_prompts.toml index 78b6a0b5..e9133e34 100644 --- a/pr_agent/settings/pr_update_changelog_prompts.toml +++ b/pr_agent/settings/pr_update_changelog_prompts.toml @@ -8,7 +8,9 @@ Your task is to update the CHANGELOG.md file of the project, to shortly summariz {%- if extra_instructions %} Extra instructions from the user: +' {{ extra_instructions }} +' {%- endif %} """ diff --git a/pr_agent/tools/pr_add_docs.py b/pr_agent/tools/pr_add_docs.py index 4cc9102a..2769e9a9 100644 --- a/pr_agent/tools/pr_add_docs.py +++ b/pr_agent/tools/pr_add_docs.py @@ -68,7 +68,7 @@ class PRAddDocs: self.token_handler, model, add_line_numbers_to_hunks=True, - disable_extra_lines=True) + disable_extra_lines=False) logging.info('Getting AI prediction...') self.prediction = await self._get_prediction(model)