catch more rate limit errors

This commit is contained in:
AI Christianson 2025-02-27 22:42:06 -05:00
parent 99bbcdbc2f
commit 429f854fb8
2 changed files with 76 additions and 3 deletions

View File

@ -11,6 +11,9 @@ from typing import Any, Dict, List, Literal, Optional, Sequence, ContextManager
import litellm import litellm
from anthropic import APIError, APITimeoutError, InternalServerError, RateLimitError from anthropic import APIError, APITimeoutError, InternalServerError, RateLimitError
from openai import RateLimitError as OpenAIRateLimitError
from litellm.exceptions import RateLimitError as LiteLLMRateLimitError
from google.api_core.exceptions import ResourceExhausted
from langchain_core.language_models import BaseChatModel from langchain_core.language_models import BaseChatModel
from langchain_core.messages import ( from langchain_core.messages import (
BaseMessage, BaseMessage,
@ -838,13 +841,30 @@ def _execute_test_command_wrapper(original_prompt, config, test_attempts, auto_t
def _handle_api_error(e, attempt, max_retries, base_delay): def _handle_api_error(e, attempt, max_retries, base_delay):
# 1. Check if this is a ValueError with 429 code or rate limit phrases
if isinstance(e, ValueError): if isinstance(e, ValueError):
error_str = str(e).lower() error_str = str(e).lower()
if "code" not in error_str or "429" not in error_str: rate_limit_phrases = ["429", "rate limit", "too many requests", "quota exceeded"]
if "code" not in error_str and not any(phrase in error_str for phrase in rate_limit_phrases):
raise e raise e
# 2. Check for status_code or http_status attribute equal to 429
if hasattr(e, 'status_code') and e.status_code == 429:
pass # This is a rate limit error, continue with retry logic
elif hasattr(e, 'http_status') and e.http_status == 429:
pass # This is a rate limit error, continue with retry logic
# 3. Check for rate limit phrases in error message
elif isinstance(e, Exception) and not isinstance(e, ValueError):
error_str = str(e).lower()
if not any(phrase in error_str for phrase in ["rate limit", "too many requests", "quota exceeded", "429"]) and not ("rate" in error_str and "limit" in error_str):
# This doesn't look like a rate limit error, but we'll still retry other API errors
pass
# Apply common retry logic for all identified errors
if attempt == max_retries - 1: if attempt == max_retries - 1:
logger.error("Max retries reached, failing: %s", str(e)) logger.error("Max retries reached, failing: %s", str(e))
raise RuntimeError(f"Max retries ({max_retries}) exceeded. Last error: {e}") raise RuntimeError(f"Max retries ({max_retries}) exceeded. Last error: {e}")
logger.warning("API error (attempt %d/%d): %s", attempt + 1, max_retries, str(e)) logger.warning("API error (attempt %d/%d): %s", attempt + 1, max_retries, str(e))
delay = base_delay * (2**attempt) delay = base_delay * (2**attempt)
print_error( print_error(
@ -979,6 +999,9 @@ def run_agent_with_retry(
InternalServerError, InternalServerError,
APITimeoutError, APITimeoutError,
RateLimitError, RateLimitError,
OpenAIRateLimitError,
LiteLLMRateLimitError,
ResourceExhausted,
APIError, APIError,
ValueError, ValueError,
) as e: ) as e:

View File

@ -376,9 +376,51 @@ def test_handle_api_error_valueerror():
from ra_aid.agent_utils import _handle_api_error from ra_aid.agent_utils import _handle_api_error
# ValueError not containing "code" or "429" should be re-raised # ValueError not containing "code" or rate limit phrases should be re-raised
with pytest.raises(ValueError): with pytest.raises(ValueError):
_handle_api_error(ValueError("some error"), 0, 5, 1) _handle_api_error(ValueError("some unrelated error"), 0, 5, 1)
# ValueError with "429" should be handled without raising
_handle_api_error(ValueError("error code 429"), 0, 5, 1)
# ValueError with "rate limit" phrase should be handled without raising
_handle_api_error(ValueError("hit rate limit"), 0, 5, 1)
# ValueError with "too many requests" phrase should be handled without raising
_handle_api_error(ValueError("too many requests, try later"), 0, 5, 1)
# ValueError with "quota exceeded" phrase should be handled without raising
_handle_api_error(ValueError("quota exceeded for this month"), 0, 5, 1)
def test_handle_api_error_status_code():
from ra_aid.agent_utils import _handle_api_error
# Error with status_code=429 attribute should be handled without raising
error_with_status = Exception("Rate limited")
error_with_status.status_code = 429
_handle_api_error(error_with_status, 0, 5, 1)
# Error with http_status=429 attribute should be handled without raising
error_with_http_status = Exception("Too many requests")
error_with_http_status.http_status = 429
_handle_api_error(error_with_http_status, 0, 5, 1)
def test_handle_api_error_rate_limit_phrases():
from ra_aid.agent_utils import _handle_api_error
# Generic exception with "rate limit" phrase should be handled without raising
_handle_api_error(Exception("You have exceeded your rate limit"), 0, 5, 1)
# Generic exception with "too many requests" phrase should be handled without raising
_handle_api_error(Exception("Too many requests, please slow down"), 0, 5, 1)
# Generic exception with "quota exceeded" phrase should be handled without raising
_handle_api_error(Exception("API quota exceeded for this billing period"), 0, 5, 1)
# Generic exception with "rate" and "limit" separate but in message should be handled
_handle_api_error(Exception("You hit the rate at which we limit requests"), 0, 5, 1)
def test_handle_api_error_max_retries(): def test_handle_api_error_max_retries():
@ -614,3 +656,11 @@ def test_run_agent_with_retry_handles_api_badrequest_error(monkeypatch):
assert "Agent has crashed: Unretryable API error" in result assert "Agent has crashed: Unretryable API error" in result
# Verify the agent is marked as crashed # Verify the agent is marked as crashed
assert is_crashed() assert is_crashed()
def test_handle_api_error_resource_exhausted():
from google.api_core.exceptions import ResourceExhausted
from ra_aid.agent_utils import _handle_api_error
# ResourceExhausted exception should be handled without raising
resource_exhausted_error = ResourceExhausted("429 Resource has been exhausted (e.g. check quota).")
_handle_api_error(resource_exhausted_error, 0, 5, 1)