fix bug where we sent reasoning_effort to models that do not support it. fixes #91
This commit is contained in:
parent
8a2f0efb99
commit
ba92f49a91
|
|
@ -257,8 +257,9 @@ def create_llm_client(
|
|||
"model": model_name,
|
||||
**temp_kwargs,
|
||||
}
|
||||
if is_expert:
|
||||
if is_expert and model_config.get("supports_reasoning_effort", False):
|
||||
openai_kwargs["reasoning_effort"] = "high"
|
||||
|
||||
return ChatOpenAI(
|
||||
**{
|
||||
**openai_kwargs,
|
||||
|
|
|
|||
|
|
@ -12,107 +12,125 @@ models_params = {
|
|||
"token_limit": 16385,
|
||||
"supports_temperature": True,
|
||||
"default_temperature": DEFAULT_TEMPERATURE,
|
||||
"supports_reasoning_effort": False,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
"gpt-3.5": {
|
||||
"token_limit": 4096,
|
||||
"supports_temperature": True,
|
||||
"supports_reasoning_effort": False,
|
||||
"default_temperature": DEFAULT_TEMPERATURE,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
"gpt-3.5-turbo": {
|
||||
"token_limit": 16385,
|
||||
"supports_temperature": True,
|
||||
"supports_reasoning_effort": False,
|
||||
"default_temperature": DEFAULT_TEMPERATURE,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
"gpt-3.5-turbo-1106": {
|
||||
"token_limit": 16385,
|
||||
"supports_temperature": True,
|
||||
"supports_reasoning_effort": False,
|
||||
"default_temperature": DEFAULT_TEMPERATURE,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
"gpt-3.5-turbo-instruct": {
|
||||
"token_limit": 4096,
|
||||
"supports_temperature": True,
|
||||
"supports_reasoning_effort": False,
|
||||
"default_temperature": DEFAULT_TEMPERATURE,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
"gpt-4-0125-preview": {
|
||||
"token_limit": 128000,
|
||||
"supports_temperature": True,
|
||||
"supports_reasoning_effort": False,
|
||||
"default_temperature": DEFAULT_TEMPERATURE,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
"gpt-4-turbo-preview": {
|
||||
"token_limit": 128000,
|
||||
"supports_temperature": True,
|
||||
"supports_reasoning_effort": False,
|
||||
"default_temperature": DEFAULT_TEMPERATURE,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
"gpt-4-turbo": {
|
||||
"token_limit": 128000,
|
||||
"supports_temperature": True,
|
||||
"supports_reasoning_effort": False,
|
||||
"default_temperature": DEFAULT_TEMPERATURE,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
"gpt-4-turbo-2024-04-09": {
|
||||
"token_limit": 128000,
|
||||
"supports_temperature": True,
|
||||
"supports_reasoning_effort": False,
|
||||
"default_temperature": DEFAULT_TEMPERATURE,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
"gpt-4-1106-preview": {
|
||||
"token_limit": 128000,
|
||||
"supports_temperature": True,
|
||||
"supports_reasoning_effort": False,
|
||||
"default_temperature": DEFAULT_TEMPERATURE,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
"gpt-4-vision-preview": {
|
||||
"token_limit": 128000,
|
||||
"supports_temperature": True,
|
||||
"supports_reasoning_effort": False,
|
||||
"default_temperature": DEFAULT_TEMPERATURE,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
"gpt-4": {
|
||||
"token_limit": 8192,
|
||||
"supports_temperature": True,
|
||||
"supports_reasoning_effort": False,
|
||||
"default_temperature": DEFAULT_TEMPERATURE,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
"gpt-4-0613": {
|
||||
"token_limit": 8192,
|
||||
"supports_temperature": True,
|
||||
"supports_reasoning_effort": False,
|
||||
"default_temperature": DEFAULT_TEMPERATURE,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
"gpt-4-32k": {
|
||||
"token_limit": 32768,
|
||||
"supports_temperature": True,
|
||||
"supports_reasoning_effort": False,
|
||||
"default_temperature": DEFAULT_TEMPERATURE,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
"gpt-4-32k-0613": {
|
||||
"token_limit": 32768,
|
||||
"supports_temperature": True,
|
||||
"supports_reasoning_effort": False,
|
||||
"default_temperature": DEFAULT_TEMPERATURE,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
"gpt-4o": {
|
||||
"token_limit": 128000,
|
||||
"supports_temperature": True,
|
||||
"supports_reasoning_effort": False,
|
||||
"default_temperature": DEFAULT_TEMPERATURE,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
"gpt-4o-2024-08-06": {
|
||||
"token_limit": 128000,
|
||||
"supports_temperature": True,
|
||||
"supports_reasoning_effort": False,
|
||||
"default_temperature": DEFAULT_TEMPERATURE,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
"gpt-4o-2024-05-13": {
|
||||
"token_limit": 128000,
|
||||
"supports_temperature": True,
|
||||
"supports_reasoning_effort": False,
|
||||
"default_temperature": DEFAULT_TEMPERATURE,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
|
|
@ -125,21 +143,25 @@ models_params = {
|
|||
"o1-preview": {
|
||||
"token_limit": 128000,
|
||||
"supports_temperature": False,
|
||||
"supports_reasoning_effort": True,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
"o1-mini": {
|
||||
"token_limit": 128000,
|
||||
"supports_temperature": False,
|
||||
"supports_reasoning_effort": True,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
"o1": {
|
||||
"token_limit": 200000,
|
||||
"supports_temperature": False,
|
||||
"supports_reasoning_effort": True,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
"o3-mini": {
|
||||
"token_limit": 200000,
|
||||
"supports_temperature": False,
|
||||
"supports_reasoning_effort": True,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
},
|
||||
|
|
@ -147,6 +169,7 @@ models_params = {
|
|||
"gpt-3.5-turbo-0125": {
|
||||
"token_limit": 16385,
|
||||
"supports_temperature": True,
|
||||
"supports_reasoning_effort": False,
|
||||
"default_temperature": DEFAULT_TEMPERATURE,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
|
|
@ -183,24 +206,28 @@ models_params = {
|
|||
"gpt-4-turbo-preview": {
|
||||
"token_limit": 128000,
|
||||
"supports_temperature": True,
|
||||
"supports_reasoning_effort": False,
|
||||
"default_temperature": DEFAULT_TEMPERATURE,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
"gpt-4-turbo": {
|
||||
"token_limit": 128000,
|
||||
"supports_temperature": True,
|
||||
"supports_reasoning_effort": False,
|
||||
"default_temperature": DEFAULT_TEMPERATURE,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
"gpt-4-turbo-2024-04-09": {
|
||||
"token_limit": 128000,
|
||||
"supports_temperature": True,
|
||||
"supports_reasoning_effort": False,
|
||||
"default_temperature": DEFAULT_TEMPERATURE,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
"gpt-4-1106-preview": {
|
||||
"token_limit": 128000,
|
||||
"supports_temperature": True,
|
||||
"supports_reasoning_effort": False,
|
||||
"default_temperature": DEFAULT_TEMPERATURE,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
|
|
@ -213,36 +240,42 @@ models_params = {
|
|||
"gpt-4": {
|
||||
"token_limit": 8192,
|
||||
"supports_temperature": True,
|
||||
"supports_reasoning_effort": False,
|
||||
"default_temperature": DEFAULT_TEMPERATURE,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
"gpt-4-0613": {
|
||||
"token_limit": 8192,
|
||||
"supports_temperature": True,
|
||||
"supports_reasoning_effort": False,
|
||||
"default_temperature": DEFAULT_TEMPERATURE,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
"gpt-4-32k": {
|
||||
"token_limit": 32768,
|
||||
"supports_temperature": True,
|
||||
"supports_reasoning_effort": False,
|
||||
"default_temperature": DEFAULT_TEMPERATURE,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
"gpt-4-32k-0613": {
|
||||
"token_limit": 32768,
|
||||
"supports_temperature": True,
|
||||
"supports_reasoning_effort": False,
|
||||
"default_temperature": DEFAULT_TEMPERATURE,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
"gpt-4o": {
|
||||
"token_limit": 128000,
|
||||
"supports_temperature": True,
|
||||
"supports_reasoning_effort": False,
|
||||
"default_temperature": DEFAULT_TEMPERATURE,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
"gpt-4o-mini": {
|
||||
"token_limit": 128000,
|
||||
"supports_temperature": True,
|
||||
"supports_reasoning_effort": False,
|
||||
"default_temperature": DEFAULT_TEMPERATURE,
|
||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||
},
|
||||
|
|
|
|||
|
|
@ -75,7 +75,6 @@ def test_initialize_expert_openai_custom(clean_env, mock_openai, monkeypatch):
|
|||
api_key="test-key",
|
||||
model="gpt-4-preview",
|
||||
temperature=0,
|
||||
reasoning_effort="high",
|
||||
timeout=180,
|
||||
max_retries=5,
|
||||
)
|
||||
|
|
@ -566,6 +565,40 @@ def mock_deepseek_reasoner():
|
|||
yield mock
|
||||
|
||||
|
||||
def test_reasoning_effort_only_passed_to_supported_models(clean_env, mock_openai, monkeypatch):
|
||||
"""Test that reasoning_effort is only passed to supported models."""
|
||||
monkeypatch.setenv("EXPERT_OPENAI_API_KEY", "test-key")
|
||||
|
||||
# Initialize expert LLM with GPT-4 (which doesn't support reasoning_effort)
|
||||
_llm = initialize_expert_llm("openai", "gpt-4")
|
||||
|
||||
# Verify reasoning_effort was not included in kwargs
|
||||
mock_openai.assert_called_with(
|
||||
api_key="test-key",
|
||||
model="gpt-4",
|
||||
temperature=0,
|
||||
timeout=180,
|
||||
max_retries=5,
|
||||
)
|
||||
|
||||
|
||||
def test_reasoning_effort_passed_to_supported_models(clean_env, mock_openai, monkeypatch):
|
||||
"""Test that reasoning_effort is passed to models that support it."""
|
||||
monkeypatch.setenv("EXPERT_OPENAI_API_KEY", "test-key")
|
||||
|
||||
# Initialize expert LLM with o1 (which supports reasoning_effort)
|
||||
_llm = initialize_expert_llm("openai", "o1")
|
||||
|
||||
# Verify reasoning_effort was included in kwargs
|
||||
mock_openai.assert_called_with(
|
||||
api_key="test-key",
|
||||
model="o1",
|
||||
reasoning_effort="high",
|
||||
timeout=180,
|
||||
max_retries=5,
|
||||
)
|
||||
|
||||
|
||||
def test_initialize_deepseek(
|
||||
clean_env, mock_openai, mock_deepseek_reasoner, monkeypatch
|
||||
):
|
||||
|
|
|
|||
Loading…
Reference in New Issue