fix bug where we sent reasoning_effort to models that do not support it. fixes #91
This commit is contained in:
parent
8a2f0efb99
commit
ba92f49a91
|
|
@ -257,8 +257,9 @@ def create_llm_client(
|
||||||
"model": model_name,
|
"model": model_name,
|
||||||
**temp_kwargs,
|
**temp_kwargs,
|
||||||
}
|
}
|
||||||
if is_expert:
|
if is_expert and model_config.get("supports_reasoning_effort", False):
|
||||||
openai_kwargs["reasoning_effort"] = "high"
|
openai_kwargs["reasoning_effort"] = "high"
|
||||||
|
|
||||||
return ChatOpenAI(
|
return ChatOpenAI(
|
||||||
**{
|
**{
|
||||||
**openai_kwargs,
|
**openai_kwargs,
|
||||||
|
|
|
||||||
|
|
@ -12,107 +12,125 @@ models_params = {
|
||||||
"token_limit": 16385,
|
"token_limit": 16385,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
"default_temperature": DEFAULT_TEMPERATURE,
|
"default_temperature": DEFAULT_TEMPERATURE,
|
||||||
|
"supports_reasoning_effort": False,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
"gpt-3.5": {
|
"gpt-3.5": {
|
||||||
"token_limit": 4096,
|
"token_limit": 4096,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
|
"supports_reasoning_effort": False,
|
||||||
"default_temperature": DEFAULT_TEMPERATURE,
|
"default_temperature": DEFAULT_TEMPERATURE,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
"gpt-3.5-turbo": {
|
"gpt-3.5-turbo": {
|
||||||
"token_limit": 16385,
|
"token_limit": 16385,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
|
"supports_reasoning_effort": False,
|
||||||
"default_temperature": DEFAULT_TEMPERATURE,
|
"default_temperature": DEFAULT_TEMPERATURE,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
"gpt-3.5-turbo-1106": {
|
"gpt-3.5-turbo-1106": {
|
||||||
"token_limit": 16385,
|
"token_limit": 16385,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
|
"supports_reasoning_effort": False,
|
||||||
"default_temperature": DEFAULT_TEMPERATURE,
|
"default_temperature": DEFAULT_TEMPERATURE,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
"gpt-3.5-turbo-instruct": {
|
"gpt-3.5-turbo-instruct": {
|
||||||
"token_limit": 4096,
|
"token_limit": 4096,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
|
"supports_reasoning_effort": False,
|
||||||
"default_temperature": DEFAULT_TEMPERATURE,
|
"default_temperature": DEFAULT_TEMPERATURE,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
"gpt-4-0125-preview": {
|
"gpt-4-0125-preview": {
|
||||||
"token_limit": 128000,
|
"token_limit": 128000,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
|
"supports_reasoning_effort": False,
|
||||||
"default_temperature": DEFAULT_TEMPERATURE,
|
"default_temperature": DEFAULT_TEMPERATURE,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
"gpt-4-turbo-preview": {
|
"gpt-4-turbo-preview": {
|
||||||
"token_limit": 128000,
|
"token_limit": 128000,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
|
"supports_reasoning_effort": False,
|
||||||
"default_temperature": DEFAULT_TEMPERATURE,
|
"default_temperature": DEFAULT_TEMPERATURE,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
"gpt-4-turbo": {
|
"gpt-4-turbo": {
|
||||||
"token_limit": 128000,
|
"token_limit": 128000,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
|
"supports_reasoning_effort": False,
|
||||||
"default_temperature": DEFAULT_TEMPERATURE,
|
"default_temperature": DEFAULT_TEMPERATURE,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
"gpt-4-turbo-2024-04-09": {
|
"gpt-4-turbo-2024-04-09": {
|
||||||
"token_limit": 128000,
|
"token_limit": 128000,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
|
"supports_reasoning_effort": False,
|
||||||
"default_temperature": DEFAULT_TEMPERATURE,
|
"default_temperature": DEFAULT_TEMPERATURE,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
"gpt-4-1106-preview": {
|
"gpt-4-1106-preview": {
|
||||||
"token_limit": 128000,
|
"token_limit": 128000,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
|
"supports_reasoning_effort": False,
|
||||||
"default_temperature": DEFAULT_TEMPERATURE,
|
"default_temperature": DEFAULT_TEMPERATURE,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
"gpt-4-vision-preview": {
|
"gpt-4-vision-preview": {
|
||||||
"token_limit": 128000,
|
"token_limit": 128000,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
|
"supports_reasoning_effort": False,
|
||||||
"default_temperature": DEFAULT_TEMPERATURE,
|
"default_temperature": DEFAULT_TEMPERATURE,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
"gpt-4": {
|
"gpt-4": {
|
||||||
"token_limit": 8192,
|
"token_limit": 8192,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
|
"supports_reasoning_effort": False,
|
||||||
"default_temperature": DEFAULT_TEMPERATURE,
|
"default_temperature": DEFAULT_TEMPERATURE,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
"gpt-4-0613": {
|
"gpt-4-0613": {
|
||||||
"token_limit": 8192,
|
"token_limit": 8192,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
|
"supports_reasoning_effort": False,
|
||||||
"default_temperature": DEFAULT_TEMPERATURE,
|
"default_temperature": DEFAULT_TEMPERATURE,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
"gpt-4-32k": {
|
"gpt-4-32k": {
|
||||||
"token_limit": 32768,
|
"token_limit": 32768,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
|
"supports_reasoning_effort": False,
|
||||||
"default_temperature": DEFAULT_TEMPERATURE,
|
"default_temperature": DEFAULT_TEMPERATURE,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
"gpt-4-32k-0613": {
|
"gpt-4-32k-0613": {
|
||||||
"token_limit": 32768,
|
"token_limit": 32768,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
|
"supports_reasoning_effort": False,
|
||||||
"default_temperature": DEFAULT_TEMPERATURE,
|
"default_temperature": DEFAULT_TEMPERATURE,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
"gpt-4o": {
|
"gpt-4o": {
|
||||||
"token_limit": 128000,
|
"token_limit": 128000,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
|
"supports_reasoning_effort": False,
|
||||||
"default_temperature": DEFAULT_TEMPERATURE,
|
"default_temperature": DEFAULT_TEMPERATURE,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
"gpt-4o-2024-08-06": {
|
"gpt-4o-2024-08-06": {
|
||||||
"token_limit": 128000,
|
"token_limit": 128000,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
|
"supports_reasoning_effort": False,
|
||||||
"default_temperature": DEFAULT_TEMPERATURE,
|
"default_temperature": DEFAULT_TEMPERATURE,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
"gpt-4o-2024-05-13": {
|
"gpt-4o-2024-05-13": {
|
||||||
"token_limit": 128000,
|
"token_limit": 128000,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
|
"supports_reasoning_effort": False,
|
||||||
"default_temperature": DEFAULT_TEMPERATURE,
|
"default_temperature": DEFAULT_TEMPERATURE,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
|
|
@ -125,21 +143,25 @@ models_params = {
|
||||||
"o1-preview": {
|
"o1-preview": {
|
||||||
"token_limit": 128000,
|
"token_limit": 128000,
|
||||||
"supports_temperature": False,
|
"supports_temperature": False,
|
||||||
|
"supports_reasoning_effort": True,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
"o1-mini": {
|
"o1-mini": {
|
||||||
"token_limit": 128000,
|
"token_limit": 128000,
|
||||||
"supports_temperature": False,
|
"supports_temperature": False,
|
||||||
|
"supports_reasoning_effort": True,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
"o1": {
|
"o1": {
|
||||||
"token_limit": 200000,
|
"token_limit": 200000,
|
||||||
"supports_temperature": False,
|
"supports_temperature": False,
|
||||||
|
"supports_reasoning_effort": True,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
"o3-mini": {
|
"o3-mini": {
|
||||||
"token_limit": 200000,
|
"token_limit": 200000,
|
||||||
"supports_temperature": False,
|
"supports_temperature": False,
|
||||||
|
"supports_reasoning_effort": True,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
@ -147,6 +169,7 @@ models_params = {
|
||||||
"gpt-3.5-turbo-0125": {
|
"gpt-3.5-turbo-0125": {
|
||||||
"token_limit": 16385,
|
"token_limit": 16385,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
|
"supports_reasoning_effort": False,
|
||||||
"default_temperature": DEFAULT_TEMPERATURE,
|
"default_temperature": DEFAULT_TEMPERATURE,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
|
|
@ -183,24 +206,28 @@ models_params = {
|
||||||
"gpt-4-turbo-preview": {
|
"gpt-4-turbo-preview": {
|
||||||
"token_limit": 128000,
|
"token_limit": 128000,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
|
"supports_reasoning_effort": False,
|
||||||
"default_temperature": DEFAULT_TEMPERATURE,
|
"default_temperature": DEFAULT_TEMPERATURE,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
"gpt-4-turbo": {
|
"gpt-4-turbo": {
|
||||||
"token_limit": 128000,
|
"token_limit": 128000,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
|
"supports_reasoning_effort": False,
|
||||||
"default_temperature": DEFAULT_TEMPERATURE,
|
"default_temperature": DEFAULT_TEMPERATURE,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
"gpt-4-turbo-2024-04-09": {
|
"gpt-4-turbo-2024-04-09": {
|
||||||
"token_limit": 128000,
|
"token_limit": 128000,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
|
"supports_reasoning_effort": False,
|
||||||
"default_temperature": DEFAULT_TEMPERATURE,
|
"default_temperature": DEFAULT_TEMPERATURE,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
"gpt-4-1106-preview": {
|
"gpt-4-1106-preview": {
|
||||||
"token_limit": 128000,
|
"token_limit": 128000,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
|
"supports_reasoning_effort": False,
|
||||||
"default_temperature": DEFAULT_TEMPERATURE,
|
"default_temperature": DEFAULT_TEMPERATURE,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
|
|
@ -213,36 +240,42 @@ models_params = {
|
||||||
"gpt-4": {
|
"gpt-4": {
|
||||||
"token_limit": 8192,
|
"token_limit": 8192,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
|
"supports_reasoning_effort": False,
|
||||||
"default_temperature": DEFAULT_TEMPERATURE,
|
"default_temperature": DEFAULT_TEMPERATURE,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
"gpt-4-0613": {
|
"gpt-4-0613": {
|
||||||
"token_limit": 8192,
|
"token_limit": 8192,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
|
"supports_reasoning_effort": False,
|
||||||
"default_temperature": DEFAULT_TEMPERATURE,
|
"default_temperature": DEFAULT_TEMPERATURE,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
"gpt-4-32k": {
|
"gpt-4-32k": {
|
||||||
"token_limit": 32768,
|
"token_limit": 32768,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
|
"supports_reasoning_effort": False,
|
||||||
"default_temperature": DEFAULT_TEMPERATURE,
|
"default_temperature": DEFAULT_TEMPERATURE,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
"gpt-4-32k-0613": {
|
"gpt-4-32k-0613": {
|
||||||
"token_limit": 32768,
|
"token_limit": 32768,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
|
"supports_reasoning_effort": False,
|
||||||
"default_temperature": DEFAULT_TEMPERATURE,
|
"default_temperature": DEFAULT_TEMPERATURE,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
"gpt-4o": {
|
"gpt-4o": {
|
||||||
"token_limit": 128000,
|
"token_limit": 128000,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
|
"supports_reasoning_effort": False,
|
||||||
"default_temperature": DEFAULT_TEMPERATURE,
|
"default_temperature": DEFAULT_TEMPERATURE,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
"gpt-4o-mini": {
|
"gpt-4o-mini": {
|
||||||
"token_limit": 128000,
|
"token_limit": 128000,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
|
"supports_reasoning_effort": False,
|
||||||
"default_temperature": DEFAULT_TEMPERATURE,
|
"default_temperature": DEFAULT_TEMPERATURE,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -75,7 +75,6 @@ def test_initialize_expert_openai_custom(clean_env, mock_openai, monkeypatch):
|
||||||
api_key="test-key",
|
api_key="test-key",
|
||||||
model="gpt-4-preview",
|
model="gpt-4-preview",
|
||||||
temperature=0,
|
temperature=0,
|
||||||
reasoning_effort="high",
|
|
||||||
timeout=180,
|
timeout=180,
|
||||||
max_retries=5,
|
max_retries=5,
|
||||||
)
|
)
|
||||||
|
|
@ -566,6 +565,40 @@ def mock_deepseek_reasoner():
|
||||||
yield mock
|
yield mock
|
||||||
|
|
||||||
|
|
||||||
|
def test_reasoning_effort_only_passed_to_supported_models(clean_env, mock_openai, monkeypatch):
|
||||||
|
"""Test that reasoning_effort is only passed to supported models."""
|
||||||
|
monkeypatch.setenv("EXPERT_OPENAI_API_KEY", "test-key")
|
||||||
|
|
||||||
|
# Initialize expert LLM with GPT-4 (which doesn't support reasoning_effort)
|
||||||
|
_llm = initialize_expert_llm("openai", "gpt-4")
|
||||||
|
|
||||||
|
# Verify reasoning_effort was not included in kwargs
|
||||||
|
mock_openai.assert_called_with(
|
||||||
|
api_key="test-key",
|
||||||
|
model="gpt-4",
|
||||||
|
temperature=0,
|
||||||
|
timeout=180,
|
||||||
|
max_retries=5,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_reasoning_effort_passed_to_supported_models(clean_env, mock_openai, monkeypatch):
|
||||||
|
"""Test that reasoning_effort is passed to models that support it."""
|
||||||
|
monkeypatch.setenv("EXPERT_OPENAI_API_KEY", "test-key")
|
||||||
|
|
||||||
|
# Initialize expert LLM with o1 (which supports reasoning_effort)
|
||||||
|
_llm = initialize_expert_llm("openai", "o1")
|
||||||
|
|
||||||
|
# Verify reasoning_effort was included in kwargs
|
||||||
|
mock_openai.assert_called_with(
|
||||||
|
api_key="test-key",
|
||||||
|
model="o1",
|
||||||
|
reasoning_effort="high",
|
||||||
|
timeout=180,
|
||||||
|
max_retries=5,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_initialize_deepseek(
|
def test_initialize_deepseek(
|
||||||
clean_env, mock_openai, mock_deepseek_reasoner, monkeypatch
|
clean_env, mock_openai, mock_deepseek_reasoner, monkeypatch
|
||||||
):
|
):
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue