diff --git a/ra_aid/agent_utils.py b/ra_aid/agent_utils.py index 2447895..258040e 100644 --- a/ra_aid/agent_utils.py +++ b/ra_aid/agent_utils.py @@ -11,6 +11,9 @@ from typing import Any, Dict, List, Literal, Optional, Sequence, ContextManager import litellm from anthropic import APIError, APITimeoutError, InternalServerError, RateLimitError +from openai import RateLimitError as OpenAIRateLimitError +from litellm.exceptions import RateLimitError as LiteLLMRateLimitError +from google.api_core.exceptions import ResourceExhausted from langchain_core.language_models import BaseChatModel from langchain_core.messages import ( BaseMessage, @@ -838,13 +841,30 @@ def _execute_test_command_wrapper(original_prompt, config, test_attempts, auto_t def _handle_api_error(e, attempt, max_retries, base_delay): + # 1. Check if this is a ValueError with 429 code or rate limit phrases if isinstance(e, ValueError): error_str = str(e).lower() - if "code" not in error_str or "429" not in error_str: + rate_limit_phrases = ["429", "rate limit", "too many requests", "quota exceeded"] + if "code" not in error_str and not any(phrase in error_str for phrase in rate_limit_phrases): raise e + + # 2. Check for status_code or http_status attribute equal to 429 + if hasattr(e, 'status_code') and e.status_code == 429: + pass # This is a rate limit error, continue with retry logic + elif hasattr(e, 'http_status') and e.http_status == 429: + pass # This is a rate limit error, continue with retry logic + # 3. Check for rate limit phrases in error message + elif isinstance(e, Exception) and not isinstance(e, ValueError): + error_str = str(e).lower() + if not any(phrase in error_str for phrase in ["rate limit", "too many requests", "quota exceeded", "429"]) and not ("rate" in error_str and "limit" in error_str): + # This doesn't look like a rate limit error, but we'll still retry other API errors + pass + + # Apply common retry logic for all identified errors if attempt == max_retries - 1: logger.error("Max retries reached, failing: %s", str(e)) raise RuntimeError(f"Max retries ({max_retries}) exceeded. Last error: {e}") + logger.warning("API error (attempt %d/%d): %s", attempt + 1, max_retries, str(e)) delay = base_delay * (2**attempt) print_error( @@ -979,6 +999,9 @@ def run_agent_with_retry( InternalServerError, APITimeoutError, RateLimitError, + OpenAIRateLimitError, + LiteLLMRateLimitError, + ResourceExhausted, APIError, ValueError, ) as e: diff --git a/tests/ra_aid/test_agent_utils.py b/tests/ra_aid/test_agent_utils.py index 094f642..95fabc4 100644 --- a/tests/ra_aid/test_agent_utils.py +++ b/tests/ra_aid/test_agent_utils.py @@ -376,9 +376,51 @@ def test_handle_api_error_valueerror(): from ra_aid.agent_utils import _handle_api_error - # ValueError not containing "code" or "429" should be re-raised + # ValueError not containing "code" or rate limit phrases should be re-raised with pytest.raises(ValueError): - _handle_api_error(ValueError("some error"), 0, 5, 1) + _handle_api_error(ValueError("some unrelated error"), 0, 5, 1) + + # ValueError with "429" should be handled without raising + _handle_api_error(ValueError("error code 429"), 0, 5, 1) + + # ValueError with "rate limit" phrase should be handled without raising + _handle_api_error(ValueError("hit rate limit"), 0, 5, 1) + + # ValueError with "too many requests" phrase should be handled without raising + _handle_api_error(ValueError("too many requests, try later"), 0, 5, 1) + + # ValueError with "quota exceeded" phrase should be handled without raising + _handle_api_error(ValueError("quota exceeded for this month"), 0, 5, 1) + + +def test_handle_api_error_status_code(): + from ra_aid.agent_utils import _handle_api_error + + # Error with status_code=429 attribute should be handled without raising + error_with_status = Exception("Rate limited") + error_with_status.status_code = 429 + _handle_api_error(error_with_status, 0, 5, 1) + + # Error with http_status=429 attribute should be handled without raising + error_with_http_status = Exception("Too many requests") + error_with_http_status.http_status = 429 + _handle_api_error(error_with_http_status, 0, 5, 1) + + +def test_handle_api_error_rate_limit_phrases(): + from ra_aid.agent_utils import _handle_api_error + + # Generic exception with "rate limit" phrase should be handled without raising + _handle_api_error(Exception("You have exceeded your rate limit"), 0, 5, 1) + + # Generic exception with "too many requests" phrase should be handled without raising + _handle_api_error(Exception("Too many requests, please slow down"), 0, 5, 1) + + # Generic exception with "quota exceeded" phrase should be handled without raising + _handle_api_error(Exception("API quota exceeded for this billing period"), 0, 5, 1) + + # Generic exception with "rate" and "limit" separate but in message should be handled + _handle_api_error(Exception("You hit the rate at which we limit requests"), 0, 5, 1) def test_handle_api_error_max_retries(): @@ -614,3 +656,11 @@ def test_run_agent_with_retry_handles_api_badrequest_error(monkeypatch): assert "Agent has crashed: Unretryable API error" in result # Verify the agent is marked as crashed assert is_crashed() + +def test_handle_api_error_resource_exhausted(): + from google.api_core.exceptions import ResourceExhausted + from ra_aid.agent_utils import _handle_api_error + + # ResourceExhausted exception should be handled without raising + resource_exhausted_error = ResourceExhausted("429 Resource has been exhausted (e.g. check quota).") + _handle_api_error(resource_exhausted_error, 0, 5, 1) \ No newline at end of file