catch more rate limit errors

2025-02-27 22:42:06 -05:00 · 2025-02-27 22:42:06 -05:00 · 429f854fb8
parent 99bbcdbc2f
commit 429f854fb8
2 changed files with 76 additions and 3 deletions
--- a/ra_aid/agent_utils.py
+++ b/ra_aid/agent_utils.py
@ -11,6 +11,9 @@ from typing import Any, Dict, List, Literal, Optional, Sequence, ContextManager
 import litellm
 from anthropic import APIError, APITimeoutError, InternalServerError, RateLimitError
 from openai import RateLimitError as OpenAIRateLimitError
 from litellm.exceptions import RateLimitError as LiteLLMRateLimitError
 from google.api_core.exceptions import ResourceExhausted
 from langchain_core.language_models import BaseChatModel
 from langchain_core.messages import (
    BaseMessage,
@ -838,13 +841,30 @@ def _execute_test_command_wrapper(original_prompt, config, test_attempts, auto_t
 def _handle_api_error(e, attempt, max_retries, base_delay):
    # 1. Check if this is a ValueError with 429 code or rate limit phrases
    if isinstance(e, ValueError):
        error_str = str(e).lower()
-        if "code" not in error_str or "429" not in error_str:
+        rate_limit_phrases = ["429", "rate limit", "too many requests", "quota exceeded"]
        if "code" not in error_str and not any(phrase in error_str for phrase in rate_limit_phrases):
            raise e
    # 2. Check for status_code or http_status attribute equal to 429
    if hasattr(e, 'status_code') and e.status_code == 429:
        pass  # This is a rate limit error, continue with retry logic
    elif hasattr(e, 'http_status') and e.http_status == 429:
        pass  # This is a rate limit error, continue with retry logic
    # 3. Check for rate limit phrases in error message
    elif isinstance(e, Exception) and not isinstance(e, ValueError):
        error_str = str(e).lower()
        if not any(phrase in error_str for phrase in ["rate limit", "too many requests", "quota exceeded", "429"]) and not ("rate" in error_str and "limit" in error_str):
            # This doesn't look like a rate limit error, but we'll still retry other API errors
            pass
    # Apply common retry logic for all identified errors
    if attempt == max_retries - 1:
        logger.error("Max retries reached, failing: %s", str(e))
        raise RuntimeError(f"Max retries ({max_retries}) exceeded. Last error: {e}")
    logger.warning("API error (attempt %d/%d): %s", attempt + 1, max_retries, str(e))
    delay = base_delay * (2**attempt)
    print_error(
@ -979,6 +999,9 @@ def run_agent_with_retry(
                    InternalServerError,
                    APITimeoutError,
                    RateLimitError,
                    OpenAIRateLimitError,
                    LiteLLMRateLimitError,
                    ResourceExhausted,
                    APIError,
                    ValueError,
                ) as e:
--- a/tests/ra_aid/test_agent_utils.py
+++ b/tests/ra_aid/test_agent_utils.py
@ -376,9 +376,51 @@ def test_handle_api_error_valueerror():
    from ra_aid.agent_utils import _handle_api_error
-    # ValueError not containing "code" or "429" should be re-raised
+    # ValueError not containing "code" or rate limit phrases should be re-raised
    with pytest.raises(ValueError):
-        _handle_api_error(ValueError("some error"), 0, 5, 1)
+        _handle_api_error(ValueError("some unrelated error"), 0, 5, 1)
    # ValueError with "429" should be handled without raising
    _handle_api_error(ValueError("error code 429"), 0, 5, 1)
    # ValueError with "rate limit" phrase should be handled without raising
    _handle_api_error(ValueError("hit rate limit"), 0, 5, 1)
    # ValueError with "too many requests" phrase should be handled without raising
    _handle_api_error(ValueError("too many requests, try later"), 0, 5, 1)
    # ValueError with "quota exceeded" phrase should be handled without raising
    _handle_api_error(ValueError("quota exceeded for this month"), 0, 5, 1)
 def test_handle_api_error_status_code():
    from ra_aid.agent_utils import _handle_api_error
    # Error with status_code=429 attribute should be handled without raising
    error_with_status = Exception("Rate limited")
    error_with_status.status_code = 429
    _handle_api_error(error_with_status, 0, 5, 1)
    # Error with http_status=429 attribute should be handled without raising
    error_with_http_status = Exception("Too many requests")
    error_with_http_status.http_status = 429
    _handle_api_error(error_with_http_status, 0, 5, 1)
 def test_handle_api_error_rate_limit_phrases():
    from ra_aid.agent_utils import _handle_api_error
    # Generic exception with "rate limit" phrase should be handled without raising
    _handle_api_error(Exception("You have exceeded your rate limit"), 0, 5, 1)
    # Generic exception with "too many requests" phrase should be handled without raising
    _handle_api_error(Exception("Too many requests, please slow down"), 0, 5, 1)
    # Generic exception with "quota exceeded" phrase should be handled without raising
    _handle_api_error(Exception("API quota exceeded for this billing period"), 0, 5, 1)
    # Generic exception with "rate" and "limit" separate but in message should be handled
    _handle_api_error(Exception("You hit the rate at which we limit requests"), 0, 5, 1)
 def test_handle_api_error_max_retries():
@ -614,3 +656,11 @@ def test_run_agent_with_retry_handles_api_badrequest_error(monkeypatch):
        assert "Agent has crashed: Unretryable API error" in result
        # Verify the agent is marked as crashed
        assert is_crashed()
 def test_handle_api_error_resource_exhausted():
    from google.api_core.exceptions import ResourceExhausted
    from ra_aid.agent_utils import _handle_api_error
    # ResourceExhausted exception should be handled without raising
    resource_exhausted_error = ResourceExhausted("429 Resource has been exhausted (e.g. check quota).")
    _handle_api_error(resource_exhausted_error, 0, 5, 1)