catch more rate limit errors

2025-02-27 22:42:06 -05:00 · 2025-02-27 22:42:06 -05:00 · 429f854fb8
parent 99bbcdbc2f
commit 429f854fb8
2 changed files with 76 additions and 3 deletions
--- a/ra_aid/agent_utils.py
+++ b/ra_aid/agent_utils.py
@ -11,6 +11,9 @@ from typing import Any, Dict, List, Literal, Optional, Sequence, ContextManager

 import litellm
 from anthropic import APIError, APITimeoutError, InternalServerError, RateLimitError
+from openai import RateLimitError as OpenAIRateLimitError
+from litellm.exceptions import RateLimitError as LiteLLMRateLimitError
+from google.api_core.exceptions import ResourceExhausted
 from langchain_core.language_models import BaseChatModel
 from langchain_core.messages import (
    BaseMessage,
@ -838,13 +841,30 @@ def _execute_test_command_wrapper(original_prompt, config, test_attempts, auto_t


 def _handle_api_error(e, attempt, max_retries, base_delay):
+    # 1. Check if this is a ValueError with 429 code or rate limit phrases
    if isinstance(e, ValueError):
        error_str = str(e).lower()
-        if "code" not in error_str or "429" not in error_str:
+        rate_limit_phrases = ["429", "rate limit", "too many requests", "quota exceeded"]
+        if "code" not in error_str and not any(phrase in error_str for phrase in rate_limit_phrases):
            raise e
+    
+    # 2. Check for status_code or http_status attribute equal to 429
+    if hasattr(e, 'status_code') and e.status_code == 429:
+        pass  # This is a rate limit error, continue with retry logic
+    elif hasattr(e, 'http_status') and e.http_status == 429:
+        pass  # This is a rate limit error, continue with retry logic
+    # 3. Check for rate limit phrases in error message
+    elif isinstance(e, Exception) and not isinstance(e, ValueError):
+        error_str = str(e).lower()
+        if not any(phrase in error_str for phrase in ["rate limit", "too many requests", "quota exceeded", "429"]) and not ("rate" in error_str and "limit" in error_str):
+            # This doesn't look like a rate limit error, but we'll still retry other API errors
+            pass
+    
+    # Apply common retry logic for all identified errors
    if attempt == max_retries - 1:
        logger.error("Max retries reached, failing: %s", str(e))
        raise RuntimeError(f"Max retries ({max_retries}) exceeded. Last error: {e}")
+    
    logger.warning("API error (attempt %d/%d): %s", attempt + 1, max_retries, str(e))
    delay = base_delay * (2**attempt)
    print_error(
@ -979,6 +999,9 @@ def run_agent_with_retry(
                    InternalServerError,
                    APITimeoutError,
                    RateLimitError,
+                    OpenAIRateLimitError,
+                    LiteLLMRateLimitError,
+                    ResourceExhausted,
                    APIError,
                    ValueError,
                ) as e:
--- a/tests/ra_aid/test_agent_utils.py
+++ b/tests/ra_aid/test_agent_utils.py
@ -376,9 +376,51 @@ def test_handle_api_error_valueerror():

    from ra_aid.agent_utils import _handle_api_error

-    # ValueError not containing "code" or "429" should be re-raised
+    # ValueError not containing "code" or rate limit phrases should be re-raised
    with pytest.raises(ValueError):
-        _handle_api_error(ValueError("some error"), 0, 5, 1)
+        _handle_api_error(ValueError("some unrelated error"), 0, 5, 1)
+        
+    # ValueError with "429" should be handled without raising
+    _handle_api_error(ValueError("error code 429"), 0, 5, 1)
+    
+    # ValueError with "rate limit" phrase should be handled without raising
+    _handle_api_error(ValueError("hit rate limit"), 0, 5, 1)
+    
+    # ValueError with "too many requests" phrase should be handled without raising
+    _handle_api_error(ValueError("too many requests, try later"), 0, 5, 1)
+    
+    # ValueError with "quota exceeded" phrase should be handled without raising
+    _handle_api_error(ValueError("quota exceeded for this month"), 0, 5, 1)
+
+
+def test_handle_api_error_status_code():
+    from ra_aid.agent_utils import _handle_api_error
+    
+    # Error with status_code=429 attribute should be handled without raising
+    error_with_status = Exception("Rate limited")
+    error_with_status.status_code = 429
+    _handle_api_error(error_with_status, 0, 5, 1)
+    
+    # Error with http_status=429 attribute should be handled without raising
+    error_with_http_status = Exception("Too many requests")
+    error_with_http_status.http_status = 429
+    _handle_api_error(error_with_http_status, 0, 5, 1)
+
+
+def test_handle_api_error_rate_limit_phrases():
+    from ra_aid.agent_utils import _handle_api_error
+    
+    # Generic exception with "rate limit" phrase should be handled without raising
+    _handle_api_error(Exception("You have exceeded your rate limit"), 0, 5, 1)
+    
+    # Generic exception with "too many requests" phrase should be handled without raising
+    _handle_api_error(Exception("Too many requests, please slow down"), 0, 5, 1)
+    
+    # Generic exception with "quota exceeded" phrase should be handled without raising
+    _handle_api_error(Exception("API quota exceeded for this billing period"), 0, 5, 1)
+    
+    # Generic exception with "rate" and "limit" separate but in message should be handled
+    _handle_api_error(Exception("You hit the rate at which we limit requests"), 0, 5, 1)


 def test_handle_api_error_max_retries():
@ -614,3 +656,11 @@ def test_run_agent_with_retry_handles_api_badrequest_error(monkeypatch):
        assert "Agent has crashed: Unretryable API error" in result
        # Verify the agent is marked as crashed
        assert is_crashed()
+
+def test_handle_api_error_resource_exhausted():
+    from google.api_core.exceptions import ResourceExhausted
+    from ra_aid.agent_utils import _handle_api_error
+    
+    # ResourceExhausted exception should be handled without raising
+    resource_exhausted_error = ResourceExhausted("429 Resource has been exhausted (e.g. check quota).")
+    _handle_api_error(resource_exhausted_error, 0, 5, 1)