From 34096059ff746cccddf3a42a58b9960a786dec11 Mon Sep 17 00:00:00 2001 From: idavidov Date: Tue, 25 Jul 2023 13:05:56 +0300 Subject: [PATCH 01/10] quick and dirty response for github API ratelimit, until some smart solution will be implemented --- pr_agent/algo/pr_processing.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index 20933d51..bfe72387 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -10,6 +10,7 @@ from pr_agent.algo.token_handler import TokenHandler from pr_agent.algo.utils import load_large_diff from pr_agent.config_loader import settings from pr_agent.git_providers.git_provider import GitProvider +from github import GithubException DELETED_FILES_ = "Deleted files:\n" @@ -40,7 +41,10 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: s global PATCH_EXTRA_LINES PATCH_EXTRA_LINES = 0 - diff_files = list(git_provider.get_diff_files()) + try: + diff_files = list(git_provider.get_diff_files()) + except GithubException.RateLimitExceededException as e: + logging.warning("Rate limit exceeded for GitHub API.") # get pr languages pr_languages = sort_files_by_main_languages(git_provider.get_languages(), diff_files) From 8f482cd41ab2c2b5f2a83f82e8440cff4f7b0748 Mon Sep 17 00:00:00 2001 From: Ilya Davidov Date: Tue, 25 Jul 2023 13:23:19 +0300 Subject: [PATCH 02/10] show how much time until rate limit reset Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- pr_agent/algo/pr_processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index bfe72387..c55ef138 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -44,7 +44,7 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: s try: diff_files = list(git_provider.get_diff_files()) except GithubException.RateLimitExceededException as e: - logging.warning("Rate limit exceeded for GitHub API.") + logging.warning(f"Rate limit exceeded for GitHub API. The rate limit will be reset at {e.reset}.") # get pr languages pr_languages = sort_files_by_main_languages(git_provider.get_languages(), diff_files) From d1a8a610e942457a9cff22afb34af1c7ad4aadab Mon Sep 17 00:00:00 2001 From: idavidov Date: Tue, 25 Jul 2023 13:38:55 +0300 Subject: [PATCH 03/10] Revert "show how much time until rate limit reset" This reverts commit 8f482cd41ab2c2b5f2a83f82e8440cff4f7b0748. --- pr_agent/algo/pr_processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index c55ef138..bfe72387 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -44,7 +44,7 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: s try: diff_files = list(git_provider.get_diff_files()) except GithubException.RateLimitExceededException as e: - logging.warning(f"Rate limit exceeded for GitHub API. The rate limit will be reset at {e.reset}.") + logging.warning("Rate limit exceeded for GitHub API.") # get pr languages pr_languages = sort_files_by_main_languages(git_provider.get_languages(), diff_files) From 404cc0a00e84d50dc8cc8b5162fe0003362c15fd Mon Sep 17 00:00:00 2001 From: idavidov Date: Tue, 25 Jul 2023 14:20:20 +0300 Subject: [PATCH 04/10] small change to show message and fail --- pr_agent/algo/pr_processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index bfe72387..491d3e92 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -44,7 +44,7 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: s try: diff_files = list(git_provider.get_diff_files()) except GithubException.RateLimitExceededException as e: - logging.warning("Rate limit exceeded for GitHub API.") + logging.error(f"Rate limit exceeded for GitHub API. original message {e}") # get pr languages pr_languages = sort_files_by_main_languages(git_provider.get_languages(), diff_files) From 55637a5620af65d813eaeb7d3ffbd97fde0d7eb8 Mon Sep 17 00:00:00 2001 From: idavidov Date: Tue, 25 Jul 2023 14:42:54 +0300 Subject: [PATCH 05/10] added retry decorator similar to used in ai_handler following @okotek suggestion --- pr_agent/algo/pr_processing.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index 491d3e92..17ea2356 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -11,6 +11,7 @@ from pr_agent.algo.utils import load_large_diff from pr_agent.config_loader import settings from pr_agent.git_providers.git_provider import GitProvider from github import GithubException +from retry import retry DELETED_FILES_ = "Deleted files:\n" @@ -20,7 +21,10 @@ OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD = 1000 OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD = 600 PATCH_EXTRA_LINES = 3 +GITHUB_RETRIES=1 +@retry(exceptions=(APIError, Timeout, TryAgain, AttributeError, RateLimitError, GithubException.RateLimitExceededException), + tries=GITHUB_RETRIES, delay=2, backoff=2, jitter=(1, 3)) def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: str, add_line_numbers_to_hunks: bool = False, disable_extra_lines: bool = False) -> str: """ @@ -45,6 +49,7 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: s diff_files = list(git_provider.get_diff_files()) except GithubException.RateLimitExceededException as e: logging.error(f"Rate limit exceeded for GitHub API. original message {e}") + raise # get pr languages pr_languages = sort_files_by_main_languages(git_provider.get_languages(), diff_files) From b6f6c903a042f2bf65d91af940d03017960ae6df Mon Sep 17 00:00:00 2001 From: idavidov Date: Tue, 25 Jul 2023 15:12:02 +0300 Subject: [PATCH 06/10] moved @retry to github_provider.py and fetch number of retries from settings --- pr_agent/algo/pr_processing.py | 7 +------ pr_agent/git_providers/github_provider.py | 6 ++++-- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index 17ea2356..1e96d2f1 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -10,8 +10,7 @@ from pr_agent.algo.token_handler import TokenHandler from pr_agent.algo.utils import load_large_diff from pr_agent.config_loader import settings from pr_agent.git_providers.git_provider import GitProvider -from github import GithubException -from retry import retry + DELETED_FILES_ = "Deleted files:\n" @@ -21,10 +20,6 @@ OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD = 1000 OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD = 600 PATCH_EXTRA_LINES = 3 -GITHUB_RETRIES=1 - -@retry(exceptions=(APIError, Timeout, TryAgain, AttributeError, RateLimitError, GithubException.RateLimitExceededException), - tries=GITHUB_RETRIES, delay=2, backoff=2, jitter=(1, 3)) def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: str, add_line_numbers_to_hunks: bool = False, disable_extra_lines: bool = False) -> str: """ diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 10a50412..64e4ab46 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -3,14 +3,14 @@ from datetime import datetime from typing import Optional, Tuple from urllib.parse import urlparse -from github import AppAuthentication, Github, Auth +from github import AppAuthentication, Github, Auth, GithubException from pr_agent.config_loader import settings from .git_provider import FilePatchInfo, GitProvider, IncrementalPR from ..algo.language_handler import is_valid_file from ..algo.utils import load_large_diff - +from retry import retry class GithubProvider(GitProvider): def __init__(self, pr_url: Optional[str] = None, incremental=IncrementalPR(False)): @@ -78,6 +78,8 @@ class GithubProvider(GitProvider): return self.file_set.values() return self.pr.get_files() + @retry(exceptions=(GithubException.RateLimitExceededException), + tries=settings.github.ratelimit_retries, delay=2, backoff=2, jitter=(1, 3)) def get_diff_files(self) -> list[FilePatchInfo]: files = self.get_files() diff_files = [] From 3b334805eec8870df0eb7dbd1758fdee37f776be Mon Sep 17 00:00:00 2001 From: idavidov Date: Tue, 25 Jul 2023 15:14:56 +0300 Subject: [PATCH 07/10] still need GithubException.RateLimitExceededException in pr_processing.py for correct exception catch --- pr_agent/algo/pr_processing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index 1e96d2f1..59b342a0 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -10,6 +10,7 @@ from pr_agent.algo.token_handler import TokenHandler from pr_agent.algo.utils import load_large_diff from pr_agent.config_loader import settings from pr_agent.git_providers.git_provider import GitProvider +from github import GithubException DELETED_FILES_ = "Deleted files:\n" From f6036e936ee0dbdad268314064af8586f8d6cdbc Mon Sep 17 00:00:00 2001 From: idavidov Date: Tue, 25 Jul 2023 15:23:40 +0300 Subject: [PATCH 08/10] + settings.github.ratelimit_retries setup in configuration.toml --- pr_agent/settings/configuration.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 3861df13..186b0855 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -27,6 +27,7 @@ num_code_suggestions=4 [github] # The type of deployment to create. Valid values are 'app' or 'user'. deployment_type = "user" +ratelimit_retries = 5 [gitlab] # URL to the gitlab service From 1229fba346664d5d347a15571a0066f8e55ea9e6 Mon Sep 17 00:00:00 2001 From: idavidov Date: Tue, 25 Jul 2023 16:37:13 +0300 Subject: [PATCH 09/10] + settings.github.ratelimit_retries setup in configuration.toml --- pr_agent/algo/pr_processing.py | 7 ++-- pr_agent/git_providers/git_provider.py | 5 +++ pr_agent/git_providers/github_provider.py | 44 +++++++++++++---------- 3 files changed, 33 insertions(+), 23 deletions(-) diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index 59b342a0..94cc1a00 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -10,8 +10,7 @@ from pr_agent.algo.token_handler import TokenHandler from pr_agent.algo.utils import load_large_diff from pr_agent.config_loader import settings from pr_agent.git_providers.git_provider import GitProvider -from github import GithubException - +from git_provider import RateLimitExceeded DELETED_FILES_ = "Deleted files:\n" @@ -43,8 +42,8 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: s try: diff_files = list(git_provider.get_diff_files()) - except GithubException.RateLimitExceededException as e: - logging.error(f"Rate limit exceeded for GitHub API. original message {e}") + except RateLimitExceededException as e: + logging.error(f"Rate limit exceeded for git provider API. original message {e}") raise # get pr languages diff --git a/pr_agent/git_providers/git_provider.py b/pr_agent/git_providers/git_provider.py index 3f7c1ef2..97dbe8ab 100644 --- a/pr_agent/git_providers/git_provider.py +++ b/pr_agent/git_providers/git_provider.py @@ -136,3 +136,8 @@ class IncrementalPR: self.commits_range = None self.first_new_commit_sha = None self.last_seen_commit_sha = None + + +class RateLimitExceeded(Exception): + """Raised when the git provider API rate limit has been exceeded.""" + pass \ No newline at end of file diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 64e4ab46..a9979a78 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -4,6 +4,7 @@ from typing import Optional, Tuple from urllib.parse import urlparse from github import AppAuthentication, Github, Auth, GithubException +from git_provider import RateLimitExceeded from pr_agent.config_loader import settings @@ -81,26 +82,31 @@ class GithubProvider(GitProvider): @retry(exceptions=(GithubException.RateLimitExceededException), tries=settings.github.ratelimit_retries, delay=2, backoff=2, jitter=(1, 3)) def get_diff_files(self) -> list[FilePatchInfo]: - files = self.get_files() - diff_files = [] - for file in files: - if is_valid_file(file.filename): - new_file_content_str = self._get_pr_file_content(file, self.pr.head.sha) - patch = file.patch - if self.incremental.is_incremental and self.file_set: - original_file_content_str = self._get_pr_file_content(file, self.incremental.last_seen_commit_sha) - patch = load_large_diff(file, - new_file_content_str, - original_file_content_str, - None) - self.file_set[file.filename] = patch - else: - original_file_content_str = self._get_pr_file_content(file, self.pr.base.sha) + try: + files = self.get_files() + diff_files = [] + for file in files: + if is_valid_file(file.filename): + new_file_content_str = self._get_pr_file_content(file, self.pr.head.sha) + patch = file.patch + if self.incremental.is_incremental and self.file_set: + original_file_content_str = self._get_pr_file_content(file, + self.incremental.last_seen_commit_sha) + patch = load_large_diff(file, + new_file_content_str, + original_file_content_str, + None) + self.file_set[file.filename] = patch + else: + original_file_content_str = self._get_pr_file_content(file, self.pr.base.sha) - diff_files.append( - FilePatchInfo(original_file_content_str, new_file_content_str, patch, file.filename)) - self.diff_files = diff_files - return diff_files + diff_files.append( + FilePatchInfo(original_file_content_str, new_file_content_str, patch, file.filename)) + self.diff_files = diff_files + return diff_files + except GithubException.RateLimitExceededException as e: + logging.error(f"Rate limit exceeded for GitHub API. Original message: {e}") + raise RateLimitExceeded("Rate limit exceeded for GitHub API.") from e def publish_description(self, pr_title: str, pr_body: str): self.pr.edit(title=pr_title, body=pr_body) From 8ae5faca530a62910525f1fa69071183af6d42e3 Mon Sep 17 00:00:00 2001 From: Ori Kotek Date: Tue, 25 Jul 2023 16:52:18 +0300 Subject: [PATCH 10/10] Fix cyclic dependency --- pr_agent/algo/pr_processing.py | 3 ++- pr_agent/git_providers/git_provider.py | 4 ---- pr_agent/git_providers/github_provider.py | 11 ++++++----- pr_agent/servers/utils.py | 4 ++++ 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index 94cc1a00..45ef40b2 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -3,6 +3,8 @@ from __future__ import annotations import logging from typing import Tuple, Union, Callable, List +from github import RateLimitExceededException + from pr_agent.algo import MAX_TOKENS from pr_agent.algo.git_patch_processing import convert_to_hunks_with_lines_numbers, extend_patch, handle_patch_deletions from pr_agent.algo.language_handler import sort_files_by_main_languages @@ -10,7 +12,6 @@ from pr_agent.algo.token_handler import TokenHandler from pr_agent.algo.utils import load_large_diff from pr_agent.config_loader import settings from pr_agent.git_providers.git_provider import GitProvider -from git_provider import RateLimitExceeded DELETED_FILES_ = "Deleted files:\n" diff --git a/pr_agent/git_providers/git_provider.py b/pr_agent/git_providers/git_provider.py index 97dbe8ab..677c2eb1 100644 --- a/pr_agent/git_providers/git_provider.py +++ b/pr_agent/git_providers/git_provider.py @@ -137,7 +137,3 @@ class IncrementalPR: self.first_new_commit_sha = None self.last_seen_commit_sha = None - -class RateLimitExceeded(Exception): - """Raised when the git provider API rate limit has been exceeded.""" - pass \ No newline at end of file diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index a9979a78..7f617937 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -3,15 +3,16 @@ from datetime import datetime from typing import Optional, Tuple from urllib.parse import urlparse -from github import AppAuthentication, Github, Auth, GithubException -from git_provider import RateLimitExceeded +from github import AppAuthentication, Auth, Github, GithubException +from retry import retry from pr_agent.config_loader import settings -from .git_provider import FilePatchInfo, GitProvider, IncrementalPR from ..algo.language_handler import is_valid_file from ..algo.utils import load_large_diff -from retry import retry +from .git_provider import FilePatchInfo, GitProvider, IncrementalPR +from ..servers.utils import RateLimitExceeded + class GithubProvider(GitProvider): def __init__(self, pr_url: Optional[str] = None, incremental=IncrementalPR(False)): @@ -79,7 +80,7 @@ class GithubProvider(GitProvider): return self.file_set.values() return self.pr.get_files() - @retry(exceptions=(GithubException.RateLimitExceededException), + @retry(exceptions=RateLimitExceeded, tries=settings.github.ratelimit_retries, delay=2, backoff=2, jitter=(1, 3)) def get_diff_files(self) -> list[FilePatchInfo]: try: diff --git a/pr_agent/servers/utils.py b/pr_agent/servers/utils.py index 942ac449..c24b880c 100644 --- a/pr_agent/servers/utils.py +++ b/pr_agent/servers/utils.py @@ -21,3 +21,7 @@ def verify_signature(payload_body, secret_token, signature_header): if not hmac.compare_digest(expected_signature, signature_header): raise HTTPException(status_code=403, detail="Request signatures didn't match!") + +class RateLimitExceeded(Exception): + """Raised when the git provider API rate limit has been exceeded.""" + pass