add base latency model param

This commit is contained in:
AI Christianson 2025-02-17 18:11:33 -05:00
parent 13aa77a389
commit e8e4dac038
1 changed files with 174 additions and 131 deletions

View File

@ -4,278 +4,305 @@ List of model parameters
DEFAULT_TOKEN_LIMIT = 100000 DEFAULT_TOKEN_LIMIT = 100000
DEFAULT_TEMPERATURE = 0.7 DEFAULT_TEMPERATURE = 0.7
DEFAULT_BASE_LATENCY = 180
models_params = { models_params = {
"openai": { "openai": {
"gpt-3.5-turbo-0125": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-3.5-turbo-0125": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-3.5": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-3.5": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-3.5-turbo": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-3.5-turbo": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-3.5-turbo-1106": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-3.5-turbo-1106": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-3.5-turbo-instruct": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-3.5-turbo-instruct": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-0125-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-4-0125-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-turbo-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-4-turbo-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-turbo": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-4-turbo": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-turbo-2024-04-09": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-4-turbo-2024-04-09": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-1106-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-4-1106-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-vision-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-4-vision-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-4": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-0613": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-4-0613": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-32k": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-4-32k": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-32k-0613": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-4-32k-0613": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4o": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-4o": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4o-2024-08-06": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-4o-2024-08-06": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4o-2024-05-13": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-4o-2024-05-13": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4o-mini": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-4o-mini": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"o1-preview": {"token_limit": 128000, "supports_temperature": False}, "o1-preview": {"token_limit": 128000, "supports_temperature": False, "latency_coefficient": DEFAULT_BASE_LATENCY},
"o1-mini": {"token_limit": 128000, "supports_temperature": False}, "o1-mini": {"token_limit": 128000, "supports_temperature": False, "latency_coefficient": DEFAULT_BASE_LATENCY},
"o1-preview": {"token_limit": 128000, "supports_temperature": False}, "o1-preview": {"token_limit": 128000, "supports_temperature": False, "latency_coefficient": DEFAULT_BASE_LATENCY},
"o1": {"token_limit": 200000, "supports_temperature": False}, "o1": {"token_limit": 200000, "supports_temperature": False, "latency_coefficient": DEFAULT_BASE_LATENCY},
"o3-mini": {"token_limit": 200000, "supports_temperature": False}, "o3-mini": {"token_limit": 200000, "supports_temperature": False, "latency_coefficient": DEFAULT_BASE_LATENCY},
}, },
"azure_openai": { "azure_openai": {
"gpt-3.5-turbo-0125": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-3.5-turbo-0125": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-3.5": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-3.5": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-3.5-turbo": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-3.5-turbo": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-3.5-turbo-1106": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-3.5-turbo-1106": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-3.5-turbo-instruct": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-3.5-turbo-instruct": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-0125-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-4-0125-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-turbo-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-4-turbo-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-turbo": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-4-turbo": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-turbo-2024-04-09": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-4-turbo-2024-04-09": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-1106-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-4-1106-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-vision-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-4-vision-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-4": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-0613": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-4-0613": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-32k": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-4-32k": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-32k-0613": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-4-32k-0613": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4o": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-4o": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4o-mini": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gpt-4o-mini": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"chatgpt-4o-latest": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "chatgpt-4o-latest": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"o1-preview": {"token_limit": 128000, "supports_temperature": False}, "o1-preview": {"token_limit": 128000, "supports_temperature": False, "latency_coefficient": DEFAULT_BASE_LATENCY},
"o1-mini": {"token_limit": 128000, "supports_temperature": False}, "o1-mini": {"token_limit": 128000, "supports_temperature": False, "latency_coefficient": DEFAULT_BASE_LATENCY},
}, },
"google_genai": { "google_genai": {
"gemini-pro": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gemini-pro": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gemini-1.5-flash-latest": { "gemini-1.5-flash-latest": {
"token_limit": 128000, "token_limit": 128000,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
}, },
"gemini-1.5-pro-latest": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gemini-1.5-pro-latest": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"models/embedding-001": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "models/embedding-001": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
}, },
"google_vertexai": { "google_vertexai": {
"gemini-1.5-flash": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gemini-1.5-flash": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gemini-1.5-pro": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gemini-1.5-pro": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gemini-1.0-pro": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gemini-1.0-pro": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
}, },
"ollama": { "ollama": {
"command-r": {"token_limit": 12800, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "command-r": {"token_limit": 12800, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"codellama": {"token_limit": 16000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "codellama": {"token_limit": 16000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"dbrx": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "dbrx": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"deepseek-coder:33b": {"token_limit": 16000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "deepseek-coder:33b": {"token_limit": 16000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"falcon": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "falcon": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama2": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "llama2": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama2:7b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "llama2:7b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama2:13b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "llama2:13b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama2:70b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "llama2:70b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama3": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "llama3": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama3:8b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "llama3:8b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama3:70b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "llama3:70b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama3.1": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "llama3.1": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama3.1:8b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "llama3.1:8b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama3.1:70b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "llama3.1:70b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"lama3.1:405b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "lama3.1:405b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama3.2": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "llama3.2": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama3.2:1b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "llama3.2:1b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama3.2:3b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "llama3.2:3b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama3.3:70b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "llama3.3:70b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"scrapegraph": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "scrapegraph": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"mistral-small": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "mistral-small": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"mistral-openorca": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "mistral-openorca": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"mistral-large": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "mistral-large": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"grok-1": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "grok-1": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llava": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "llava": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"mixtral:8x22b-instruct": {"token_limit": 65536, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "mixtral:8x22b-instruct": {"token_limit": 65536, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"nomic-embed-text": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "nomic-embed-text": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"nous-hermes2:34b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "nous-hermes2:34b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"orca-mini": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "orca-mini": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"phi3:3.8b": {"token_limit": 12800, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "phi3:3.8b": {"token_limit": 12800, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"phi3:14b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "phi3:14b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"qwen:0.5b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "qwen:0.5b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"qwen:1.8b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "qwen:1.8b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"qwen:4b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "qwen:4b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"qwen:14b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "qwen:14b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"qwen:32b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "qwen:32b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"qwen:72b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "qwen:72b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"qwen:110b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "qwen:110b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"stablelm-zephyr": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "stablelm-zephyr": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"wizardlm2:8x22b": {"token_limit": 65536, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "wizardlm2:8x22b": {"token_limit": 65536, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"mistral": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "mistral": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gemma2": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gemma2": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gemma2:9b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gemma2:9b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gemma2:27b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gemma2:27b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
# embedding models # embedding models
"shaw/dmeta-embedding-zh-small-q4": { "shaw/dmeta-embedding-zh-small-q4": {
"token_limit": 8192, "token_limit": 8192,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
}, },
"shaw/dmeta-embedding-zh-q4": { "shaw/dmeta-embedding-zh-q4": {
"token_limit": 8192, "token_limit": 8192,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
}, },
"chevalblanc/acge_text_embedding": { "chevalblanc/acge_text_embedding": {
"token_limit": 8192, "token_limit": 8192,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
}, },
"martcreation/dmeta-embedding-zh": { "martcreation/dmeta-embedding-zh": {
"token_limit": 8192, "token_limit": 8192,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
}, },
"snowflake-arctic-embed": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "snowflake-arctic-embed": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"mxbai-embed-large": {"token_limit": 512, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "mxbai-embed-large": {"token_limit": 512, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
}, },
"oneapi": {"qwen-turbo": {"token_limit": 6000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}}, "oneapi": {"qwen-turbo": {"token_limit": 6000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}},
"nvidia": { "nvidia": {
"meta/llama3-70b-instruct": {"token_limit": 419, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "meta/llama3-70b-instruct": {"token_limit": 419, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"meta/llama3-8b-instruct": {"token_limit": 419, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "meta/llama3-8b-instruct": {"token_limit": 419, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"nemotron-4-340b-instruct": {"token_limit": 1024, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "nemotron-4-340b-instruct": {"token_limit": 1024, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"databricks/dbrx-instruct": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "databricks/dbrx-instruct": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"google/codegemma-7b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "google/codegemma-7b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"google/gemma-2b": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "google/gemma-2b": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"google/gemma-7b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "google/gemma-7b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"google/recurrentgemma-2b": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "google/recurrentgemma-2b": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"meta/codellama-70b": {"token_limit": 16384, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "meta/codellama-70b": {"token_limit": 16384, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"meta/llama2-70b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "meta/llama2-70b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"microsoft/phi-3-mini-128k-instruct": { "microsoft/phi-3-mini-128k-instruct": {
"token_limit": 122880, "token_limit": 122880,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
}, },
"mistralai/mistral-7b-instruct-v0.2": { "mistralai/mistral-7b-instruct-v0.2": {
"token_limit": 4096, "token_limit": 4096,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
}, },
"mistralai/mistral-large": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "mistralai/mistral-large": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"mistralai/mixtral-8x22b-instruct-v0.1": { "mistralai/mixtral-8x22b-instruct-v0.1": {
"token_limit": 32768, "token_limit": 32768,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
}, },
"mistralai/mixtral-8x7b-instruct-v0.1": { "mistralai/mixtral-8x7b-instruct-v0.1": {
"token_limit": 8192, "token_limit": 8192,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
}, },
"snowflake/arctic": {"token_limit": 16384, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "snowflake/arctic": {"token_limit": 16384, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
}, },
"groq": { "groq": {
"llama3-8b-8192": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "llama3-8b-8192": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama3-70b-8192": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "llama3-70b-8192": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"mixtral-8x7b-32768": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "mixtral-8x7b-32768": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gemma-7b-it": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "gemma-7b-it": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"claude-3-haiku-20240307'": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "claude-3-haiku-20240307'": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
}, },
"toghetherai": { "toghetherai": {
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": { "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": {
"token_limit": 128000, "token_limit": 128000,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
}, },
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": { "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": {
"token_limit": 128000, "token_limit": 128000,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
}, },
"mistralai/Mixtral-8x22B-Instruct-v0.1": { "mistralai/Mixtral-8x22B-Instruct-v0.1": {
"token_limit": 128000, "token_limit": 128000,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
}, },
"stabilityai/stable-diffusion-xl-base-1.0": { "stabilityai/stable-diffusion-xl-base-1.0": {
"token_limit": 2048, "token_limit": 2048,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
}, },
"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": { "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": {
"token_limit": 128000, "token_limit": 128000,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
}, },
"NousResearch/Hermes-3-Llama-3.1-405B-Turbo": { "NousResearch/Hermes-3-Llama-3.1-405B-Turbo": {
"token_limit": 128000, "token_limit": 128000,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
}, },
"Gryphe/MythoMax-L2-13b-Lite": { "Gryphe/MythoMax-L2-13b-Lite": {
"token_limit": 8192, "token_limit": 8192,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
}, },
"Salesforce/Llama-Rank-V1": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "Salesforce/Llama-Rank-V1": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"meta-llama/Meta-Llama-Guard-3-8B": { "meta-llama/Meta-Llama-Guard-3-8B": {
"token_limit": 128000, "token_limit": 128000,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
}, },
"meta-llama/Meta-Llama-3-70B-Instruct-Turbo": { "meta-llama/Meta-Llama-3-70B-Instruct-Turbo": {
"token_limit": 128000, "token_limit": 128000,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
}, },
"meta-llama/Llama-3-8b-chat-hf": { "meta-llama/Llama-3-8b-chat-hf": {
"token_limit": 8192, "token_limit": 8192,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
}, },
"meta-llama/Llama-3-70b-chat-hf": { "meta-llama/Llama-3-70b-chat-hf": {
"token_limit": 8192, "token_limit": 8192,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
}, },
"Qwen/Qwen2-72B-Instruct": { "Qwen/Qwen2-72B-Instruct": {
"token_limit": 128000, "token_limit": 128000,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
}, },
"google/gemma-2-27b-it": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "google/gemma-2-27b-it": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
}, },
"anthropic": { "anthropic": {
"claude_instant": {"token_limit": 100000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "claude_instant": {"token_limit": 100000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"claude2": {"token_limit": 9000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "claude2": {"token_limit": 9000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"claude2.1": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "claude2.1": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"claude3": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "claude3": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"claude3.5": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "claude3.5": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"claude-3-opus-20240229": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "claude-3-opus-20240229": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"claude-3-sonnet-20240229": { "claude-3-sonnet-20240229": {
"token_limit": 200000, "token_limit": 200000,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
}, },
"claude-3-haiku-20240307": { "claude-3-haiku-20240307": {
"token_limit": 200000, "token_limit": 200000,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
}, },
"claude-3-5-sonnet-20240620": { "claude-3-5-sonnet-20240620": {
"token_limit": 200000, "token_limit": 200000,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
}, },
"claude-3-5-sonnet-20241022": { "claude-3-5-sonnet-20241022": {
"token_limit": 200000, "token_limit": 200000,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": 1.0, "default_temperature": 1.0,
"latency_coefficient": DEFAULT_BASE_LATENCY,
}, },
"claude-3-5-haiku-latest": { "claude-3-5-haiku-latest": {
"token_limit": 200000, "token_limit": 200000,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
}, },
}, },
"bedrock": { "bedrock": {
@ -283,93 +310,109 @@ models_params = {
"token_limit": 200000, "token_limit": 200000,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
}, },
"anthropic.claude-3-sonnet-20240229-v1:0": { "anthropic.claude-3-sonnet-20240229-v1:0": {
"token_limit": 200000, "token_limit": 200000,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
}, },
"anthropic.claude-3-opus-20240229-v1:0": { "anthropic.claude-3-opus-20240229-v1:0": {
"token_limit": 200000, "token_limit": 200000,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
}, },
"anthropic.claude-3-5-sonnet-20240620-v1:0": { "anthropic.claude-3-5-sonnet-20240620-v1:0": {
"token_limit": 200000, "token_limit": 200000,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
}, },
"claude-3-5-haiku-latest": { "claude-3-5-haiku-latest": {
"token_limit": 200000, "token_limit": 200000,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
}, },
"anthropic.claude-v2:1": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "anthropic.claude-v2:1": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"anthropic.claude-v2": {"token_limit": 100000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "anthropic.claude-v2": {"token_limit": 100000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"anthropic.claude-instant-v1": { "anthropic.claude-instant-v1": {
"token_limit": 100000, "token_limit": 100000,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
}, },
"meta.llama3-8b-instruct-v1:0": { "meta.llama3-8b-instruct-v1:0": {
"token_limit": 8192, "token_limit": 8192,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
}, },
"meta.llama3-70b-instruct-v1:0": { "meta.llama3-70b-instruct-v1:0": {
"token_limit": 8192, "token_limit": 8192,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
}, },
"meta.llama2-13b-chat-v1": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "meta.llama2-13b-chat-v1": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"meta.llama2-70b-chat-v1": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "meta.llama2-70b-chat-v1": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"mistral.mistral-7b-instruct-v0:2": { "mistral.mistral-7b-instruct-v0:2": {
"token_limit": 32768, "token_limit": 32768,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
}, },
"mistral.mixtral-8x7b-instruct-v0:1": { "mistral.mixtral-8x7b-instruct-v0:1": {
"token_limit": 32768, "token_limit": 32768,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
}, },
"mistral.mistral-large-2402-v1:0": { "mistral.mistral-large-2402-v1:0": {
"token_limit": 32768, "token_limit": 32768,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
}, },
"mistral.mistral-small-2402-v1:0": { "mistral.mistral-small-2402-v1:0": {
"token_limit": 32768, "token_limit": 32768,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
}, },
"amazon.titan-embed-text-v1": { "amazon.titan-embed-text-v1": {
"token_limit": 8000, "token_limit": 8000,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
}, },
"amazon.titan-embed-text-v2:0": { "amazon.titan-embed-text-v2:0": {
"token_limit": 8000, "token_limit": 8000,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
}, },
"cohere.embed-english-v3": {"token_limit": 512, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "cohere.embed-english-v3": {"token_limit": 512, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"cohere.embed-multilingual-v3": { "cohere.embed-multilingual-v3": {
"token_limit": 512, "token_limit": 512,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
}, },
}, },
"mistralai": { "mistralai": {
"mistral-large-latest": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "mistral-large-latest": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"open-mistral-nemo": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "open-mistral-nemo": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"codestral-latest": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, "codestral-latest": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
}, },
"togetherai": { "togetherai": {
"Meta-Llama-3.1-70B-Instruct-Turbo": { "Meta-Llama-3.1-70B-Instruct-Turbo": {
"token_limit": 128000, "token_limit": 128000,
"supports_temperature": True, "supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE, "default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
} }
}, },
} }