From ba92f49a91a235abaa342b1b925e55b36d65fa93 Mon Sep 17 00:00:00 2001 From: AI Christianson Date: Mon, 17 Feb 2025 19:08:06 -0500 Subject: [PATCH] fix bug where we sent reasoning_effort to models that do not support it. fixes #91 --- ra_aid/llm.py | 3 ++- ra_aid/models_params.py | 33 +++++++++++++++++++++++++++++++++ tests/ra_aid/test_llm.py | 35 ++++++++++++++++++++++++++++++++++- 3 files changed, 69 insertions(+), 2 deletions(-) diff --git a/ra_aid/llm.py b/ra_aid/llm.py index 32d842d..2765010 100644 --- a/ra_aid/llm.py +++ b/ra_aid/llm.py @@ -257,8 +257,9 @@ def create_llm_client( "model": model_name, **temp_kwargs, } - if is_expert: + if is_expert and model_config.get("supports_reasoning_effort", False): openai_kwargs["reasoning_effort"] = "high" + return ChatOpenAI( **{ **openai_kwargs, diff --git a/ra_aid/models_params.py b/ra_aid/models_params.py index 7d10c30..6b9465c 100644 --- a/ra_aid/models_params.py +++ b/ra_aid/models_params.py @@ -12,107 +12,125 @@ models_params = { "token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "supports_reasoning_effort": False, "latency_coefficient": DEFAULT_BASE_LATENCY, }, "gpt-3.5": { "token_limit": 4096, "supports_temperature": True, + "supports_reasoning_effort": False, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY, }, "gpt-3.5-turbo": { "token_limit": 16385, "supports_temperature": True, + "supports_reasoning_effort": False, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY, }, "gpt-3.5-turbo-1106": { "token_limit": 16385, "supports_temperature": True, + "supports_reasoning_effort": False, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY, }, "gpt-3.5-turbo-instruct": { "token_limit": 4096, "supports_temperature": True, + "supports_reasoning_effort": False, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY, }, "gpt-4-0125-preview": { "token_limit": 128000, "supports_temperature": True, + "supports_reasoning_effort": False, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY, }, "gpt-4-turbo-preview": { "token_limit": 128000, "supports_temperature": True, + "supports_reasoning_effort": False, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY, }, "gpt-4-turbo": { "token_limit": 128000, "supports_temperature": True, + "supports_reasoning_effort": False, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY, }, "gpt-4-turbo-2024-04-09": { "token_limit": 128000, "supports_temperature": True, + "supports_reasoning_effort": False, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY, }, "gpt-4-1106-preview": { "token_limit": 128000, "supports_temperature": True, + "supports_reasoning_effort": False, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY, }, "gpt-4-vision-preview": { "token_limit": 128000, "supports_temperature": True, + "supports_reasoning_effort": False, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY, }, "gpt-4": { "token_limit": 8192, "supports_temperature": True, + "supports_reasoning_effort": False, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY, }, "gpt-4-0613": { "token_limit": 8192, "supports_temperature": True, + "supports_reasoning_effort": False, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY, }, "gpt-4-32k": { "token_limit": 32768, "supports_temperature": True, + "supports_reasoning_effort": False, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY, }, "gpt-4-32k-0613": { "token_limit": 32768, "supports_temperature": True, + "supports_reasoning_effort": False, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY, }, "gpt-4o": { "token_limit": 128000, "supports_temperature": True, + "supports_reasoning_effort": False, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY, }, "gpt-4o-2024-08-06": { "token_limit": 128000, "supports_temperature": True, + "supports_reasoning_effort": False, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY, }, "gpt-4o-2024-05-13": { "token_limit": 128000, "supports_temperature": True, + "supports_reasoning_effort": False, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY, }, @@ -125,21 +143,25 @@ models_params = { "o1-preview": { "token_limit": 128000, "supports_temperature": False, + "supports_reasoning_effort": True, "latency_coefficient": DEFAULT_BASE_LATENCY, }, "o1-mini": { "token_limit": 128000, "supports_temperature": False, + "supports_reasoning_effort": True, "latency_coefficient": DEFAULT_BASE_LATENCY, }, "o1": { "token_limit": 200000, "supports_temperature": False, + "supports_reasoning_effort": True, "latency_coefficient": DEFAULT_BASE_LATENCY, }, "o3-mini": { "token_limit": 200000, "supports_temperature": False, + "supports_reasoning_effort": True, "latency_coefficient": DEFAULT_BASE_LATENCY, }, }, @@ -147,6 +169,7 @@ models_params = { "gpt-3.5-turbo-0125": { "token_limit": 16385, "supports_temperature": True, + "supports_reasoning_effort": False, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY, }, @@ -183,24 +206,28 @@ models_params = { "gpt-4-turbo-preview": { "token_limit": 128000, "supports_temperature": True, + "supports_reasoning_effort": False, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY, }, "gpt-4-turbo": { "token_limit": 128000, "supports_temperature": True, + "supports_reasoning_effort": False, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY, }, "gpt-4-turbo-2024-04-09": { "token_limit": 128000, "supports_temperature": True, + "supports_reasoning_effort": False, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY, }, "gpt-4-1106-preview": { "token_limit": 128000, "supports_temperature": True, + "supports_reasoning_effort": False, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY, }, @@ -213,36 +240,42 @@ models_params = { "gpt-4": { "token_limit": 8192, "supports_temperature": True, + "supports_reasoning_effort": False, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY, }, "gpt-4-0613": { "token_limit": 8192, "supports_temperature": True, + "supports_reasoning_effort": False, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY, }, "gpt-4-32k": { "token_limit": 32768, "supports_temperature": True, + "supports_reasoning_effort": False, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY, }, "gpt-4-32k-0613": { "token_limit": 32768, "supports_temperature": True, + "supports_reasoning_effort": False, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY, }, "gpt-4o": { "token_limit": 128000, "supports_temperature": True, + "supports_reasoning_effort": False, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY, }, "gpt-4o-mini": { "token_limit": 128000, "supports_temperature": True, + "supports_reasoning_effort": False, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY, }, diff --git a/tests/ra_aid/test_llm.py b/tests/ra_aid/test_llm.py index 2193d8b..5c7c822 100644 --- a/tests/ra_aid/test_llm.py +++ b/tests/ra_aid/test_llm.py @@ -75,7 +75,6 @@ def test_initialize_expert_openai_custom(clean_env, mock_openai, monkeypatch): api_key="test-key", model="gpt-4-preview", temperature=0, - reasoning_effort="high", timeout=180, max_retries=5, ) @@ -566,6 +565,40 @@ def mock_deepseek_reasoner(): yield mock +def test_reasoning_effort_only_passed_to_supported_models(clean_env, mock_openai, monkeypatch): + """Test that reasoning_effort is only passed to supported models.""" + monkeypatch.setenv("EXPERT_OPENAI_API_KEY", "test-key") + + # Initialize expert LLM with GPT-4 (which doesn't support reasoning_effort) + _llm = initialize_expert_llm("openai", "gpt-4") + + # Verify reasoning_effort was not included in kwargs + mock_openai.assert_called_with( + api_key="test-key", + model="gpt-4", + temperature=0, + timeout=180, + max_retries=5, + ) + + +def test_reasoning_effort_passed_to_supported_models(clean_env, mock_openai, monkeypatch): + """Test that reasoning_effort is passed to models that support it.""" + monkeypatch.setenv("EXPERT_OPENAI_API_KEY", "test-key") + + # Initialize expert LLM with o1 (which supports reasoning_effort) + _llm = initialize_expert_llm("openai", "o1") + + # Verify reasoning_effort was included in kwargs + mock_openai.assert_called_with( + api_key="test-key", + model="o1", + reasoning_effort="high", + timeout=180, + max_retries=5, + ) + + def test_initialize_deepseek( clean_env, mock_openai, mock_deepseek_reasoner, monkeypatch ):