Patch Extra Lines

This commit is contained in:
mrT23
2023-10-05 08:38:43 +03:00
parent 428e6382bd
commit 44239f1a79
4 changed files with 20 additions and 7 deletions

View File

@ -261,6 +261,17 @@ All PR-Agent tools have a parameter called `extra_instructions`, that enables to
/update_changelog --pr_update_changelog.extra_instructions="Make sure to update also the version ..." /update_changelog --pr_update_changelog.extra_instructions="Make sure to update also the version ..."
``` ```
#### Patch Extra Lines
By default, around any change in your PR, git patch provides 3 lines of context above and below the change.
For the `review`, `describe`, `ask` and `add_docs` tools, if the token budget allows, PR-Agent tries to increase the number of lines of context, via the parameter:
```
[config]
patch_extra_lines=3
```
Increasing this number provides more context to the model, but will also increase the token budget.
If the PR is too large (see [PR Compression strategy](./PR_COMPRESSION.md)), we automatically set this number to 0
#### Azure DevOps provider #### Azure DevOps provider
To use Azure DevOps provider use the following settings in configuration.toml: To use Azure DevOps provider use the following settings in configuration.toml:
``` ```

View File

@ -21,7 +21,6 @@ MORE_MODIFIED_FILES_ = "More modified files:\n"
OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD = 1000 OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD = 1000
OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD = 600 OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD = 600
PATCH_EXTRA_LINES = 3
def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: str, def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: str,
add_line_numbers_to_hunks: bool = False, disable_extra_lines: bool = False) -> str: add_line_numbers_to_hunks: bool = False, disable_extra_lines: bool = False) -> str:
@ -44,8 +43,9 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: s
""" """
if disable_extra_lines: if disable_extra_lines:
global PATCH_EXTRA_LINES
PATCH_EXTRA_LINES = 0 PATCH_EXTRA_LINES = 0
else:
PATCH_EXTRA_LINES = get_settings().config.patch_extra_lines
try: try:
diff_files = git_provider.get_diff_files() diff_files = git_provider.get_diff_files()
@ -57,8 +57,8 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: s
pr_languages = sort_files_by_main_languages(git_provider.get_languages(), diff_files) pr_languages = sort_files_by_main_languages(git_provider.get_languages(), diff_files)
# generate a standard diff string, with patch extension # generate a standard diff string, with patch extension
patches_extended, total_tokens, patches_extended_tokens = pr_generate_extended_diff(pr_languages, token_handler, patches_extended, total_tokens, patches_extended_tokens = pr_generate_extended_diff(
add_line_numbers_to_hunks) pr_languages, token_handler, add_line_numbers_to_hunks, patch_extra_lines=PATCH_EXTRA_LINES)
# if we are under the limit, return the full diff # if we are under the limit, return the full diff
if total_tokens + OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD < MAX_TOKENS[model]: if total_tokens + OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD < MAX_TOKENS[model]:
@ -80,7 +80,8 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: s
def pr_generate_extended_diff(pr_languages: list, def pr_generate_extended_diff(pr_languages: list,
token_handler: TokenHandler, token_handler: TokenHandler,
add_line_numbers_to_hunks: bool) -> Tuple[list, int, list]: add_line_numbers_to_hunks: bool,
patch_extra_lines: int = 0) -> Tuple[list, int, list]:
""" """
Generate a standard diff string with patch extension, while counting the number of tokens used and applying diff Generate a standard diff string with patch extension, while counting the number of tokens used and applying diff
minimization techniques if needed. minimization techniques if needed.
@ -102,7 +103,7 @@ def pr_generate_extended_diff(pr_languages: list,
continue continue
# extend each patch with extra lines of context # extend each patch with extra lines of context
extended_patch = extend_patch(original_file_content_str, patch, num_lines=PATCH_EXTRA_LINES) extended_patch = extend_patch(original_file_content_str, patch, num_lines=patch_extra_lines)
full_extended_patch = f"\n\n## {file.filename}\n\n{extended_patch}\n" full_extended_patch = f"\n\n## {file.filename}\n\n{extended_patch}\n"
if add_line_numbers_to_hunks: if add_line_numbers_to_hunks:

View File

@ -10,6 +10,7 @@ use_repo_settings_file=true
ai_timeout=180 ai_timeout=180
max_description_tokens = 500 max_description_tokens = 500
max_commits_tokens = 500 max_commits_tokens = 500
patch_extra_lines = 3
secret_provider="google_cloud_storage" secret_provider="google_cloud_storage"
cli_mode=false cli_mode=false

View File

@ -68,7 +68,7 @@ class PRAddDocs:
self.token_handler, self.token_handler,
model, model,
add_line_numbers_to_hunks=True, add_line_numbers_to_hunks=True,
disable_extra_lines=True) disable_extra_lines=False)
logging.info('Getting AI prediction...') logging.info('Getting AI prediction...')
self.prediction = await self._get_prediction(model) self.prediction = await self._get_prediction(model)