fix bug where we sent reasoning_effort to models that do not support it. fixes #91

2025-02-17 19:08:06 -05:00 · 2025-02-17 19:08:06 -05:00 · ba92f49a91
parent 8a2f0efb99
commit ba92f49a91
3 changed files with 69 additions and 2 deletions
--- a/ra_aid/llm.py
+++ b/ra_aid/llm.py
@ -257,8 +257,9 @@ def create_llm_client(
            "model": model_name,
            **temp_kwargs,
        }
-        if is_expert:
+        if is_expert and model_config.get("supports_reasoning_effort", False):
            openai_kwargs["reasoning_effort"] = "high"
+
        return ChatOpenAI(
            **{
                **openai_kwargs,
--- a/ra_aid/models_params.py
+++ b/ra_aid/models_params.py
@ -12,107 +12,125 @@ models_params = {
            "token_limit": 16385,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "supports_reasoning_effort": False,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-3.5": {
            "token_limit": 4096,
            "supports_temperature": True,
+            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-3.5-turbo": {
            "token_limit": 16385,
            "supports_temperature": True,
+            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-3.5-turbo-1106": {
            "token_limit": 16385,
            "supports_temperature": True,
+            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-3.5-turbo-instruct": {
            "token_limit": 4096,
            "supports_temperature": True,
+            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-0125-preview": {
            "token_limit": 128000,
            "supports_temperature": True,
+            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-turbo-preview": {
            "token_limit": 128000,
            "supports_temperature": True,
+            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-turbo": {
            "token_limit": 128000,
            "supports_temperature": True,
+            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-turbo-2024-04-09": {
            "token_limit": 128000,
            "supports_temperature": True,
+            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-1106-preview": {
            "token_limit": 128000,
            "supports_temperature": True,
+            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-vision-preview": {
            "token_limit": 128000,
            "supports_temperature": True,
+            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4": {
            "token_limit": 8192,
            "supports_temperature": True,
+            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-0613": {
            "token_limit": 8192,
            "supports_temperature": True,
+            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-32k": {
            "token_limit": 32768,
            "supports_temperature": True,
+            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-32k-0613": {
            "token_limit": 32768,
            "supports_temperature": True,
+            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4o": {
            "token_limit": 128000,
            "supports_temperature": True,
+            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4o-2024-08-06": {
            "token_limit": 128000,
            "supports_temperature": True,
+            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4o-2024-05-13": {
            "token_limit": 128000,
            "supports_temperature": True,
+            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
@ -125,21 +143,25 @@ models_params = {
        "o1-preview": {
            "token_limit": 128000,
            "supports_temperature": False,
+            "supports_reasoning_effort": True,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "o1-mini": {
            "token_limit": 128000,
            "supports_temperature": False,
+            "supports_reasoning_effort": True,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "o1": {
            "token_limit": 200000,
            "supports_temperature": False,
+            "supports_reasoning_effort": True,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "o3-mini": {
            "token_limit": 200000,
            "supports_temperature": False,
+            "supports_reasoning_effort": True,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
    },
@ -147,6 +169,7 @@ models_params = {
        "gpt-3.5-turbo-0125": {
            "token_limit": 16385,
            "supports_temperature": True,
+            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
@ -183,24 +206,28 @@ models_params = {
        "gpt-4-turbo-preview": {
            "token_limit": 128000,
            "supports_temperature": True,
+            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-turbo": {
            "token_limit": 128000,
            "supports_temperature": True,
+            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-turbo-2024-04-09": {
            "token_limit": 128000,
            "supports_temperature": True,
+            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-1106-preview": {
            "token_limit": 128000,
            "supports_temperature": True,
+            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
@ -213,36 +240,42 @@ models_params = {
        "gpt-4": {
            "token_limit": 8192,
            "supports_temperature": True,
+            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-0613": {
            "token_limit": 8192,
            "supports_temperature": True,
+            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-32k": {
            "token_limit": 32768,
            "supports_temperature": True,
+            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-32k-0613": {
            "token_limit": 32768,
            "supports_temperature": True,
+            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4o": {
            "token_limit": 128000,
            "supports_temperature": True,
+            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4o-mini": {
            "token_limit": 128000,
            "supports_temperature": True,
+            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
--- a/tests/ra_aid/test_llm.py
+++ b/tests/ra_aid/test_llm.py
@ -75,7 +75,6 @@ def test_initialize_expert_openai_custom(clean_env, mock_openai, monkeypatch):
        api_key="test-key",
        model="gpt-4-preview",
        temperature=0,
-        reasoning_effort="high",
        timeout=180,
        max_retries=5,
    )
@ -566,6 +565,40 @@ def mock_deepseek_reasoner():
        yield mock


+def test_reasoning_effort_only_passed_to_supported_models(clean_env, mock_openai, monkeypatch):
+    """Test that reasoning_effort is only passed to supported models."""
+    monkeypatch.setenv("EXPERT_OPENAI_API_KEY", "test-key")
+    
+    # Initialize expert LLM with GPT-4 (which doesn't support reasoning_effort)
+    _llm = initialize_expert_llm("openai", "gpt-4")
+    
+    # Verify reasoning_effort was not included in kwargs
+    mock_openai.assert_called_with(
+        api_key="test-key",
+        model="gpt-4",
+        temperature=0,
+        timeout=180,
+        max_retries=5,
+    )
+
+
+def test_reasoning_effort_passed_to_supported_models(clean_env, mock_openai, monkeypatch):
+    """Test that reasoning_effort is passed to models that support it."""
+    monkeypatch.setenv("EXPERT_OPENAI_API_KEY", "test-key")
+    
+    # Initialize expert LLM with o1 (which supports reasoning_effort)
+    _llm = initialize_expert_llm("openai", "o1")
+    
+    # Verify reasoning_effort was included in kwargs
+    mock_openai.assert_called_with(
+        api_key="test-key",
+        model="o1",
+        reasoning_effort="high",
+        timeout=180,
+        max_retries=5,
+    )
+
+
 def test_initialize_deepseek(
    clean_env, mock_openai, mock_deepseek_reasoner, monkeypatch
 ):