From ba92f49a91a235abaa342b1b925e55b36d65fa93 Mon Sep 17 00:00:00 2001
From: AI Christianson <ai.christianson@christianson.ai>
Date: Mon, 17 Feb 2025 19:08:06 -0500
Subject: [PATCH] fix bug where we sent reasoning_effort to models that do not
 support it. fixes #91

---
 ra_aid/llm.py            |  3 ++-
 ra_aid/models_params.py  | 33 +++++++++++++++++++++++++++++++++
 tests/ra_aid/test_llm.py | 35 ++++++++++++++++++++++++++++++++++-
 3 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/ra_aid/llm.py b/ra_aid/llm.py
index 32d842d..2765010 100644
--- a/ra_aid/llm.py
+++ b/ra_aid/llm.py
@@ -257,8 +257,9 @@ def create_llm_client(
             "model": model_name,
             **temp_kwargs,
         }
-        if is_expert:
+        if is_expert and model_config.get("supports_reasoning_effort", False):
             openai_kwargs["reasoning_effort"] = "high"
+
         return ChatOpenAI(
             **{
                 **openai_kwargs,
diff --git a/ra_aid/models_params.py b/ra_aid/models_params.py
index 7d10c30..6b9465c 100644
--- a/ra_aid/models_params.py
+++ b/ra_aid/models_params.py
@@ -12,107 +12,125 @@ models_params = {
             "token_limit": 16385,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "supports_reasoning_effort": False,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-3.5": {
             "token_limit": 4096,
             "supports_temperature": True,
+            "supports_reasoning_effort": False,
             "default_temperature": DEFAULT_TEMPERATURE,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-3.5-turbo": {
             "token_limit": 16385,
             "supports_temperature": True,
+            "supports_reasoning_effort": False,
             "default_temperature": DEFAULT_TEMPERATURE,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-3.5-turbo-1106": {
             "token_limit": 16385,
             "supports_temperature": True,
+            "supports_reasoning_effort": False,
             "default_temperature": DEFAULT_TEMPERATURE,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-3.5-turbo-instruct": {
             "token_limit": 4096,
             "supports_temperature": True,
+            "supports_reasoning_effort": False,
             "default_temperature": DEFAULT_TEMPERATURE,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-0125-preview": {
             "token_limit": 128000,
             "supports_temperature": True,
+            "supports_reasoning_effort": False,
             "default_temperature": DEFAULT_TEMPERATURE,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-turbo-preview": {
             "token_limit": 128000,
             "supports_temperature": True,
+            "supports_reasoning_effort": False,
             "default_temperature": DEFAULT_TEMPERATURE,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-turbo": {
             "token_limit": 128000,
             "supports_temperature": True,
+            "supports_reasoning_effort": False,
             "default_temperature": DEFAULT_TEMPERATURE,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-turbo-2024-04-09": {
             "token_limit": 128000,
             "supports_temperature": True,
+            "supports_reasoning_effort": False,
             "default_temperature": DEFAULT_TEMPERATURE,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-1106-preview": {
             "token_limit": 128000,
             "supports_temperature": True,
+            "supports_reasoning_effort": False,
             "default_temperature": DEFAULT_TEMPERATURE,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-vision-preview": {
             "token_limit": 128000,
             "supports_temperature": True,
+            "supports_reasoning_effort": False,
             "default_temperature": DEFAULT_TEMPERATURE,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4": {
             "token_limit": 8192,
             "supports_temperature": True,
+            "supports_reasoning_effort": False,
             "default_temperature": DEFAULT_TEMPERATURE,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-0613": {
             "token_limit": 8192,
             "supports_temperature": True,
+            "supports_reasoning_effort": False,
             "default_temperature": DEFAULT_TEMPERATURE,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-32k": {
             "token_limit": 32768,
             "supports_temperature": True,
+            "supports_reasoning_effort": False,
             "default_temperature": DEFAULT_TEMPERATURE,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-32k-0613": {
             "token_limit": 32768,
             "supports_temperature": True,
+            "supports_reasoning_effort": False,
             "default_temperature": DEFAULT_TEMPERATURE,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4o": {
             "token_limit": 128000,
             "supports_temperature": True,
+            "supports_reasoning_effort": False,
             "default_temperature": DEFAULT_TEMPERATURE,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4o-2024-08-06": {
             "token_limit": 128000,
             "supports_temperature": True,
+            "supports_reasoning_effort": False,
             "default_temperature": DEFAULT_TEMPERATURE,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4o-2024-05-13": {
             "token_limit": 128000,
             "supports_temperature": True,
+            "supports_reasoning_effort": False,
             "default_temperature": DEFAULT_TEMPERATURE,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
@@ -125,21 +143,25 @@ models_params = {
         "o1-preview": {
             "token_limit": 128000,
             "supports_temperature": False,
+            "supports_reasoning_effort": True,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "o1-mini": {
             "token_limit": 128000,
             "supports_temperature": False,
+            "supports_reasoning_effort": True,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "o1": {
             "token_limit": 200000,
             "supports_temperature": False,
+            "supports_reasoning_effort": True,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "o3-mini": {
             "token_limit": 200000,
             "supports_temperature": False,
+            "supports_reasoning_effort": True,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
     },
@@ -147,6 +169,7 @@ models_params = {
         "gpt-3.5-turbo-0125": {
             "token_limit": 16385,
             "supports_temperature": True,
+            "supports_reasoning_effort": False,
             "default_temperature": DEFAULT_TEMPERATURE,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
@@ -183,24 +206,28 @@ models_params = {
         "gpt-4-turbo-preview": {
             "token_limit": 128000,
             "supports_temperature": True,
+            "supports_reasoning_effort": False,
             "default_temperature": DEFAULT_TEMPERATURE,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-turbo": {
             "token_limit": 128000,
             "supports_temperature": True,
+            "supports_reasoning_effort": False,
             "default_temperature": DEFAULT_TEMPERATURE,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-turbo-2024-04-09": {
             "token_limit": 128000,
             "supports_temperature": True,
+            "supports_reasoning_effort": False,
             "default_temperature": DEFAULT_TEMPERATURE,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-1106-preview": {
             "token_limit": 128000,
             "supports_temperature": True,
+            "supports_reasoning_effort": False,
             "default_temperature": DEFAULT_TEMPERATURE,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
@@ -213,36 +240,42 @@ models_params = {
         "gpt-4": {
             "token_limit": 8192,
             "supports_temperature": True,
+            "supports_reasoning_effort": False,
             "default_temperature": DEFAULT_TEMPERATURE,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-0613": {
             "token_limit": 8192,
             "supports_temperature": True,
+            "supports_reasoning_effort": False,
             "default_temperature": DEFAULT_TEMPERATURE,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-32k": {
             "token_limit": 32768,
             "supports_temperature": True,
+            "supports_reasoning_effort": False,
             "default_temperature": DEFAULT_TEMPERATURE,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-32k-0613": {
             "token_limit": 32768,
             "supports_temperature": True,
+            "supports_reasoning_effort": False,
             "default_temperature": DEFAULT_TEMPERATURE,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4o": {
             "token_limit": 128000,
             "supports_temperature": True,
+            "supports_reasoning_effort": False,
             "default_temperature": DEFAULT_TEMPERATURE,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4o-mini": {
             "token_limit": 128000,
             "supports_temperature": True,
+            "supports_reasoning_effort": False,
             "default_temperature": DEFAULT_TEMPERATURE,
             "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
diff --git a/tests/ra_aid/test_llm.py b/tests/ra_aid/test_llm.py
index 2193d8b..5c7c822 100644
--- a/tests/ra_aid/test_llm.py
+++ b/tests/ra_aid/test_llm.py
@@ -75,7 +75,6 @@ def test_initialize_expert_openai_custom(clean_env, mock_openai, monkeypatch):
         api_key="test-key",
         model="gpt-4-preview",
         temperature=0,
-        reasoning_effort="high",
         timeout=180,
         max_retries=5,
     )
@@ -566,6 +565,40 @@ def mock_deepseek_reasoner():
         yield mock
 
 
+def test_reasoning_effort_only_passed_to_supported_models(clean_env, mock_openai, monkeypatch):
+    """Test that reasoning_effort is only passed to supported models."""
+    monkeypatch.setenv("EXPERT_OPENAI_API_KEY", "test-key")
+    
+    # Initialize expert LLM with GPT-4 (which doesn't support reasoning_effort)
+    _llm = initialize_expert_llm("openai", "gpt-4")
+    
+    # Verify reasoning_effort was not included in kwargs
+    mock_openai.assert_called_with(
+        api_key="test-key",
+        model="gpt-4",
+        temperature=0,
+        timeout=180,
+        max_retries=5,
+    )
+
+
+def test_reasoning_effort_passed_to_supported_models(clean_env, mock_openai, monkeypatch):
+    """Test that reasoning_effort is passed to models that support it."""
+    monkeypatch.setenv("EXPERT_OPENAI_API_KEY", "test-key")
+    
+    # Initialize expert LLM with o1 (which supports reasoning_effort)
+    _llm = initialize_expert_llm("openai", "o1")
+    
+    # Verify reasoning_effort was included in kwargs
+    mock_openai.assert_called_with(
+        api_key="test-key",
+        model="o1",
+        reasoning_effort="high",
+        timeout=180,
+        max_retries=5,
+    )
+
+
 def test_initialize_deepseek(
     clean_env, mock_openai, mock_deepseek_reasoner, monkeypatch
 ):