catch more rate limit errors
This commit is contained in:
parent
99bbcdbc2f
commit
429f854fb8
|
|
@ -11,6 +11,9 @@ from typing import Any, Dict, List, Literal, Optional, Sequence, ContextManager
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from anthropic import APIError, APITimeoutError, InternalServerError, RateLimitError
|
from anthropic import APIError, APITimeoutError, InternalServerError, RateLimitError
|
||||||
|
from openai import RateLimitError as OpenAIRateLimitError
|
||||||
|
from litellm.exceptions import RateLimitError as LiteLLMRateLimitError
|
||||||
|
from google.api_core.exceptions import ResourceExhausted
|
||||||
from langchain_core.language_models import BaseChatModel
|
from langchain_core.language_models import BaseChatModel
|
||||||
from langchain_core.messages import (
|
from langchain_core.messages import (
|
||||||
BaseMessage,
|
BaseMessage,
|
||||||
|
|
@ -838,13 +841,30 @@ def _execute_test_command_wrapper(original_prompt, config, test_attempts, auto_t
|
||||||
|
|
||||||
|
|
||||||
def _handle_api_error(e, attempt, max_retries, base_delay):
|
def _handle_api_error(e, attempt, max_retries, base_delay):
|
||||||
|
# 1. Check if this is a ValueError with 429 code or rate limit phrases
|
||||||
if isinstance(e, ValueError):
|
if isinstance(e, ValueError):
|
||||||
error_str = str(e).lower()
|
error_str = str(e).lower()
|
||||||
if "code" not in error_str or "429" not in error_str:
|
rate_limit_phrases = ["429", "rate limit", "too many requests", "quota exceeded"]
|
||||||
|
if "code" not in error_str and not any(phrase in error_str for phrase in rate_limit_phrases):
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
# 2. Check for status_code or http_status attribute equal to 429
|
||||||
|
if hasattr(e, 'status_code') and e.status_code == 429:
|
||||||
|
pass # This is a rate limit error, continue with retry logic
|
||||||
|
elif hasattr(e, 'http_status') and e.http_status == 429:
|
||||||
|
pass # This is a rate limit error, continue with retry logic
|
||||||
|
# 3. Check for rate limit phrases in error message
|
||||||
|
elif isinstance(e, Exception) and not isinstance(e, ValueError):
|
||||||
|
error_str = str(e).lower()
|
||||||
|
if not any(phrase in error_str for phrase in ["rate limit", "too many requests", "quota exceeded", "429"]) and not ("rate" in error_str and "limit" in error_str):
|
||||||
|
# This doesn't look like a rate limit error, but we'll still retry other API errors
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Apply common retry logic for all identified errors
|
||||||
if attempt == max_retries - 1:
|
if attempt == max_retries - 1:
|
||||||
logger.error("Max retries reached, failing: %s", str(e))
|
logger.error("Max retries reached, failing: %s", str(e))
|
||||||
raise RuntimeError(f"Max retries ({max_retries}) exceeded. Last error: {e}")
|
raise RuntimeError(f"Max retries ({max_retries}) exceeded. Last error: {e}")
|
||||||
|
|
||||||
logger.warning("API error (attempt %d/%d): %s", attempt + 1, max_retries, str(e))
|
logger.warning("API error (attempt %d/%d): %s", attempt + 1, max_retries, str(e))
|
||||||
delay = base_delay * (2**attempt)
|
delay = base_delay * (2**attempt)
|
||||||
print_error(
|
print_error(
|
||||||
|
|
@ -979,6 +999,9 @@ def run_agent_with_retry(
|
||||||
InternalServerError,
|
InternalServerError,
|
||||||
APITimeoutError,
|
APITimeoutError,
|
||||||
RateLimitError,
|
RateLimitError,
|
||||||
|
OpenAIRateLimitError,
|
||||||
|
LiteLLMRateLimitError,
|
||||||
|
ResourceExhausted,
|
||||||
APIError,
|
APIError,
|
||||||
ValueError,
|
ValueError,
|
||||||
) as e:
|
) as e:
|
||||||
|
|
|
||||||
|
|
@ -376,9 +376,51 @@ def test_handle_api_error_valueerror():
|
||||||
|
|
||||||
from ra_aid.agent_utils import _handle_api_error
|
from ra_aid.agent_utils import _handle_api_error
|
||||||
|
|
||||||
# ValueError not containing "code" or "429" should be re-raised
|
# ValueError not containing "code" or rate limit phrases should be re-raised
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
_handle_api_error(ValueError("some error"), 0, 5, 1)
|
_handle_api_error(ValueError("some unrelated error"), 0, 5, 1)
|
||||||
|
|
||||||
|
# ValueError with "429" should be handled without raising
|
||||||
|
_handle_api_error(ValueError("error code 429"), 0, 5, 1)
|
||||||
|
|
||||||
|
# ValueError with "rate limit" phrase should be handled without raising
|
||||||
|
_handle_api_error(ValueError("hit rate limit"), 0, 5, 1)
|
||||||
|
|
||||||
|
# ValueError with "too many requests" phrase should be handled without raising
|
||||||
|
_handle_api_error(ValueError("too many requests, try later"), 0, 5, 1)
|
||||||
|
|
||||||
|
# ValueError with "quota exceeded" phrase should be handled without raising
|
||||||
|
_handle_api_error(ValueError("quota exceeded for this month"), 0, 5, 1)
|
||||||
|
|
||||||
|
|
||||||
|
def test_handle_api_error_status_code():
|
||||||
|
from ra_aid.agent_utils import _handle_api_error
|
||||||
|
|
||||||
|
# Error with status_code=429 attribute should be handled without raising
|
||||||
|
error_with_status = Exception("Rate limited")
|
||||||
|
error_with_status.status_code = 429
|
||||||
|
_handle_api_error(error_with_status, 0, 5, 1)
|
||||||
|
|
||||||
|
# Error with http_status=429 attribute should be handled without raising
|
||||||
|
error_with_http_status = Exception("Too many requests")
|
||||||
|
error_with_http_status.http_status = 429
|
||||||
|
_handle_api_error(error_with_http_status, 0, 5, 1)
|
||||||
|
|
||||||
|
|
||||||
|
def test_handle_api_error_rate_limit_phrases():
|
||||||
|
from ra_aid.agent_utils import _handle_api_error
|
||||||
|
|
||||||
|
# Generic exception with "rate limit" phrase should be handled without raising
|
||||||
|
_handle_api_error(Exception("You have exceeded your rate limit"), 0, 5, 1)
|
||||||
|
|
||||||
|
# Generic exception with "too many requests" phrase should be handled without raising
|
||||||
|
_handle_api_error(Exception("Too many requests, please slow down"), 0, 5, 1)
|
||||||
|
|
||||||
|
# Generic exception with "quota exceeded" phrase should be handled without raising
|
||||||
|
_handle_api_error(Exception("API quota exceeded for this billing period"), 0, 5, 1)
|
||||||
|
|
||||||
|
# Generic exception with "rate" and "limit" separate but in message should be handled
|
||||||
|
_handle_api_error(Exception("You hit the rate at which we limit requests"), 0, 5, 1)
|
||||||
|
|
||||||
|
|
||||||
def test_handle_api_error_max_retries():
|
def test_handle_api_error_max_retries():
|
||||||
|
|
@ -614,3 +656,11 @@ def test_run_agent_with_retry_handles_api_badrequest_error(monkeypatch):
|
||||||
assert "Agent has crashed: Unretryable API error" in result
|
assert "Agent has crashed: Unretryable API error" in result
|
||||||
# Verify the agent is marked as crashed
|
# Verify the agent is marked as crashed
|
||||||
assert is_crashed()
|
assert is_crashed()
|
||||||
|
|
||||||
|
def test_handle_api_error_resource_exhausted():
|
||||||
|
from google.api_core.exceptions import ResourceExhausted
|
||||||
|
from ra_aid.agent_utils import _handle_api_error
|
||||||
|
|
||||||
|
# ResourceExhausted exception should be handled without raising
|
||||||
|
resource_exhausted_error = ResourceExhausted("429 Resource has been exhausted (e.g. check quota).")
|
||||||
|
_handle_api_error(resource_exhausted_error, 0, 5, 1)
|
||||||
Loading…
Reference in New Issue