fix bug where we sent reasoning_effort to models that do not support it. fixes #91

This commit is contained in:
AI Christianson 2025-02-17 19:08:06 -05:00
parent 8a2f0efb99
commit ba92f49a91
3 changed files with 69 additions and 2 deletions

View File

@ -257,8 +257,9 @@ def create_llm_client(
"model": model_name,
**temp_kwargs,
}
if is_expert:
if is_expert and model_config.get("supports_reasoning_effort", False):
openai_kwargs["reasoning_effort"] = "high"
return ChatOpenAI(
**{
**openai_kwargs,

View File

@ -12,107 +12,125 @@ models_params = {
"token_limit": 16385,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"supports_reasoning_effort": False,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"gpt-3.5": {
"token_limit": 4096,
"supports_temperature": True,
"supports_reasoning_effort": False,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"gpt-3.5-turbo": {
"token_limit": 16385,
"supports_temperature": True,
"supports_reasoning_effort": False,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"gpt-3.5-turbo-1106": {
"token_limit": 16385,
"supports_temperature": True,
"supports_reasoning_effort": False,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"gpt-3.5-turbo-instruct": {
"token_limit": 4096,
"supports_temperature": True,
"supports_reasoning_effort": False,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"gpt-4-0125-preview": {
"token_limit": 128000,
"supports_temperature": True,
"supports_reasoning_effort": False,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"gpt-4-turbo-preview": {
"token_limit": 128000,
"supports_temperature": True,
"supports_reasoning_effort": False,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"gpt-4-turbo": {
"token_limit": 128000,
"supports_temperature": True,
"supports_reasoning_effort": False,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"gpt-4-turbo-2024-04-09": {
"token_limit": 128000,
"supports_temperature": True,
"supports_reasoning_effort": False,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"gpt-4-1106-preview": {
"token_limit": 128000,
"supports_temperature": True,
"supports_reasoning_effort": False,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"gpt-4-vision-preview": {
"token_limit": 128000,
"supports_temperature": True,
"supports_reasoning_effort": False,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"gpt-4": {
"token_limit": 8192,
"supports_temperature": True,
"supports_reasoning_effort": False,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"gpt-4-0613": {
"token_limit": 8192,
"supports_temperature": True,
"supports_reasoning_effort": False,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"gpt-4-32k": {
"token_limit": 32768,
"supports_temperature": True,
"supports_reasoning_effort": False,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"gpt-4-32k-0613": {
"token_limit": 32768,
"supports_temperature": True,
"supports_reasoning_effort": False,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"gpt-4o": {
"token_limit": 128000,
"supports_temperature": True,
"supports_reasoning_effort": False,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"gpt-4o-2024-08-06": {
"token_limit": 128000,
"supports_temperature": True,
"supports_reasoning_effort": False,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"gpt-4o-2024-05-13": {
"token_limit": 128000,
"supports_temperature": True,
"supports_reasoning_effort": False,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
@ -125,21 +143,25 @@ models_params = {
"o1-preview": {
"token_limit": 128000,
"supports_temperature": False,
"supports_reasoning_effort": True,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"o1-mini": {
"token_limit": 128000,
"supports_temperature": False,
"supports_reasoning_effort": True,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"o1": {
"token_limit": 200000,
"supports_temperature": False,
"supports_reasoning_effort": True,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"o3-mini": {
"token_limit": 200000,
"supports_temperature": False,
"supports_reasoning_effort": True,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
},
@ -147,6 +169,7 @@ models_params = {
"gpt-3.5-turbo-0125": {
"token_limit": 16385,
"supports_temperature": True,
"supports_reasoning_effort": False,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
@ -183,24 +206,28 @@ models_params = {
"gpt-4-turbo-preview": {
"token_limit": 128000,
"supports_temperature": True,
"supports_reasoning_effort": False,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"gpt-4-turbo": {
"token_limit": 128000,
"supports_temperature": True,
"supports_reasoning_effort": False,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"gpt-4-turbo-2024-04-09": {
"token_limit": 128000,
"supports_temperature": True,
"supports_reasoning_effort": False,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"gpt-4-1106-preview": {
"token_limit": 128000,
"supports_temperature": True,
"supports_reasoning_effort": False,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
@ -213,36 +240,42 @@ models_params = {
"gpt-4": {
"token_limit": 8192,
"supports_temperature": True,
"supports_reasoning_effort": False,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"gpt-4-0613": {
"token_limit": 8192,
"supports_temperature": True,
"supports_reasoning_effort": False,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"gpt-4-32k": {
"token_limit": 32768,
"supports_temperature": True,
"supports_reasoning_effort": False,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"gpt-4-32k-0613": {
"token_limit": 32768,
"supports_temperature": True,
"supports_reasoning_effort": False,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"gpt-4o": {
"token_limit": 128000,
"supports_temperature": True,
"supports_reasoning_effort": False,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"gpt-4o-mini": {
"token_limit": 128000,
"supports_temperature": True,
"supports_reasoning_effort": False,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},

View File

@ -75,7 +75,6 @@ def test_initialize_expert_openai_custom(clean_env, mock_openai, monkeypatch):
api_key="test-key",
model="gpt-4-preview",
temperature=0,
reasoning_effort="high",
timeout=180,
max_retries=5,
)
@ -566,6 +565,40 @@ def mock_deepseek_reasoner():
yield mock
def test_reasoning_effort_only_passed_to_supported_models(clean_env, mock_openai, monkeypatch):
"""Test that reasoning_effort is only passed to supported models."""
monkeypatch.setenv("EXPERT_OPENAI_API_KEY", "test-key")
# Initialize expert LLM with GPT-4 (which doesn't support reasoning_effort)
_llm = initialize_expert_llm("openai", "gpt-4")
# Verify reasoning_effort was not included in kwargs
mock_openai.assert_called_with(
api_key="test-key",
model="gpt-4",
temperature=0,
timeout=180,
max_retries=5,
)
def test_reasoning_effort_passed_to_supported_models(clean_env, mock_openai, monkeypatch):
"""Test that reasoning_effort is passed to models that support it."""
monkeypatch.setenv("EXPERT_OPENAI_API_KEY", "test-key")
# Initialize expert LLM with o1 (which supports reasoning_effort)
_llm = initialize_expert_llm("openai", "o1")
# Verify reasoning_effort was included in kwargs
mock_openai.assert_called_with(
api_key="test-key",
model="o1",
reasoning_effort="high",
timeout=180,
max_retries=5,
)
def test_initialize_deepseek(
clean_env, mock_openai, mock_deepseek_reasoner, monkeypatch
):