catch more rate limit errors
This commit is contained in:
parent
99bbcdbc2f
commit
429f854fb8
|
|
@ -11,6 +11,9 @@ from typing import Any, Dict, List, Literal, Optional, Sequence, ContextManager
|
|||
|
||||
import litellm
|
||||
from anthropic import APIError, APITimeoutError, InternalServerError, RateLimitError
|
||||
from openai import RateLimitError as OpenAIRateLimitError
|
||||
from litellm.exceptions import RateLimitError as LiteLLMRateLimitError
|
||||
from google.api_core.exceptions import ResourceExhausted
|
||||
from langchain_core.language_models import BaseChatModel
|
||||
from langchain_core.messages import (
|
||||
BaseMessage,
|
||||
|
|
@ -838,13 +841,30 @@ def _execute_test_command_wrapper(original_prompt, config, test_attempts, auto_t
|
|||
|
||||
|
||||
def _handle_api_error(e, attempt, max_retries, base_delay):
|
||||
# 1. Check if this is a ValueError with 429 code or rate limit phrases
|
||||
if isinstance(e, ValueError):
|
||||
error_str = str(e).lower()
|
||||
if "code" not in error_str or "429" not in error_str:
|
||||
rate_limit_phrases = ["429", "rate limit", "too many requests", "quota exceeded"]
|
||||
if "code" not in error_str and not any(phrase in error_str for phrase in rate_limit_phrases):
|
||||
raise e
|
||||
|
||||
# 2. Check for status_code or http_status attribute equal to 429
|
||||
if hasattr(e, 'status_code') and e.status_code == 429:
|
||||
pass # This is a rate limit error, continue with retry logic
|
||||
elif hasattr(e, 'http_status') and e.http_status == 429:
|
||||
pass # This is a rate limit error, continue with retry logic
|
||||
# 3. Check for rate limit phrases in error message
|
||||
elif isinstance(e, Exception) and not isinstance(e, ValueError):
|
||||
error_str = str(e).lower()
|
||||
if not any(phrase in error_str for phrase in ["rate limit", "too many requests", "quota exceeded", "429"]) and not ("rate" in error_str and "limit" in error_str):
|
||||
# This doesn't look like a rate limit error, but we'll still retry other API errors
|
||||
pass
|
||||
|
||||
# Apply common retry logic for all identified errors
|
||||
if attempt == max_retries - 1:
|
||||
logger.error("Max retries reached, failing: %s", str(e))
|
||||
raise RuntimeError(f"Max retries ({max_retries}) exceeded. Last error: {e}")
|
||||
|
||||
logger.warning("API error (attempt %d/%d): %s", attempt + 1, max_retries, str(e))
|
||||
delay = base_delay * (2**attempt)
|
||||
print_error(
|
||||
|
|
@ -979,6 +999,9 @@ def run_agent_with_retry(
|
|||
InternalServerError,
|
||||
APITimeoutError,
|
||||
RateLimitError,
|
||||
OpenAIRateLimitError,
|
||||
LiteLLMRateLimitError,
|
||||
ResourceExhausted,
|
||||
APIError,
|
||||
ValueError,
|
||||
) as e:
|
||||
|
|
|
|||
|
|
@ -376,9 +376,51 @@ def test_handle_api_error_valueerror():
|
|||
|
||||
from ra_aid.agent_utils import _handle_api_error
|
||||
|
||||
# ValueError not containing "code" or "429" should be re-raised
|
||||
# ValueError not containing "code" or rate limit phrases should be re-raised
|
||||
with pytest.raises(ValueError):
|
||||
_handle_api_error(ValueError("some error"), 0, 5, 1)
|
||||
_handle_api_error(ValueError("some unrelated error"), 0, 5, 1)
|
||||
|
||||
# ValueError with "429" should be handled without raising
|
||||
_handle_api_error(ValueError("error code 429"), 0, 5, 1)
|
||||
|
||||
# ValueError with "rate limit" phrase should be handled without raising
|
||||
_handle_api_error(ValueError("hit rate limit"), 0, 5, 1)
|
||||
|
||||
# ValueError with "too many requests" phrase should be handled without raising
|
||||
_handle_api_error(ValueError("too many requests, try later"), 0, 5, 1)
|
||||
|
||||
# ValueError with "quota exceeded" phrase should be handled without raising
|
||||
_handle_api_error(ValueError("quota exceeded for this month"), 0, 5, 1)
|
||||
|
||||
|
||||
def test_handle_api_error_status_code():
|
||||
from ra_aid.agent_utils import _handle_api_error
|
||||
|
||||
# Error with status_code=429 attribute should be handled without raising
|
||||
error_with_status = Exception("Rate limited")
|
||||
error_with_status.status_code = 429
|
||||
_handle_api_error(error_with_status, 0, 5, 1)
|
||||
|
||||
# Error with http_status=429 attribute should be handled without raising
|
||||
error_with_http_status = Exception("Too many requests")
|
||||
error_with_http_status.http_status = 429
|
||||
_handle_api_error(error_with_http_status, 0, 5, 1)
|
||||
|
||||
|
||||
def test_handle_api_error_rate_limit_phrases():
|
||||
from ra_aid.agent_utils import _handle_api_error
|
||||
|
||||
# Generic exception with "rate limit" phrase should be handled without raising
|
||||
_handle_api_error(Exception("You have exceeded your rate limit"), 0, 5, 1)
|
||||
|
||||
# Generic exception with "too many requests" phrase should be handled without raising
|
||||
_handle_api_error(Exception("Too many requests, please slow down"), 0, 5, 1)
|
||||
|
||||
# Generic exception with "quota exceeded" phrase should be handled without raising
|
||||
_handle_api_error(Exception("API quota exceeded for this billing period"), 0, 5, 1)
|
||||
|
||||
# Generic exception with "rate" and "limit" separate but in message should be handled
|
||||
_handle_api_error(Exception("You hit the rate at which we limit requests"), 0, 5, 1)
|
||||
|
||||
|
||||
def test_handle_api_error_max_retries():
|
||||
|
|
@ -614,3 +656,11 @@ def test_run_agent_with_retry_handles_api_badrequest_error(monkeypatch):
|
|||
assert "Agent has crashed: Unretryable API error" in result
|
||||
# Verify the agent is marked as crashed
|
||||
assert is_crashed()
|
||||
|
||||
def test_handle_api_error_resource_exhausted():
|
||||
from google.api_core.exceptions import ResourceExhausted
|
||||
from ra_aid.agent_utils import _handle_api_error
|
||||
|
||||
# ResourceExhausted exception should be handled without raising
|
||||
resource_exhausted_error = ResourceExhausted("429 Resource has been exhausted (e.g. check quota).")
|
||||
_handle_api_error(resource_exhausted_error, 0, 5, 1)
|
||||
Loading…
Reference in New Issue