RA.Aid/ra_aid/models_params.py

"""
List of model parameters
"""

DEFAULT_TOKEN_LIMIT = 100000
DEFAULT_TEMPERATURE = 0.7
DEFAULT_BASE_LATENCY = 240

models_params = {
    "openai": {
        "gpt-3.5-turbo-0125": {
            "token_limit": 16385,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "supports_reasoning_effort": False,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-3.5": {
            "token_limit": 4096,
            "supports_temperature": True,
            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-3.5-turbo": {
            "token_limit": 16385,
            "supports_temperature": True,
            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-3.5-turbo-1106": {
            "token_limit": 16385,
            "supports_temperature": True,
            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-3.5-turbo-instruct": {
            "token_limit": 4096,
            "supports_temperature": True,
            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-0125-preview": {
            "token_limit": 128000,
            "supports_temperature": True,
            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-turbo-preview": {
            "token_limit": 128000,
            "supports_temperature": True,
            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-turbo": {
            "token_limit": 128000,
            "supports_temperature": True,
            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-turbo-2024-04-09": {
            "token_limit": 128000,
            "supports_temperature": True,
            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-1106-preview": {
            "token_limit": 128000,
            "supports_temperature": True,
            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-vision-preview": {
            "token_limit": 128000,
            "supports_temperature": True,
            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4": {
            "token_limit": 8192,
            "supports_temperature": True,
            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-0613": {
            "token_limit": 8192,
            "supports_temperature": True,
            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-32k": {
            "token_limit": 32768,
            "supports_temperature": True,
            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-32k-0613": {
            "token_limit": 32768,
            "supports_temperature": True,
            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4o": {
            "token_limit": 128000,
            "supports_temperature": True,
            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4o-2024-08-06": {
            "token_limit": 128000,
            "supports_temperature": True,
            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4o-2024-05-13": {
            "token_limit": 128000,
            "supports_temperature": True,
            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4o-mini": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "o1-preview": {
            "token_limit": 128000,
            "supports_temperature": False,
            "supports_reasoning_effort": True,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "o1-mini": {
            "token_limit": 128000,
            "supports_temperature": False,
            "supports_reasoning_effort": True,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "o1": {
            "token_limit": 200000,
            "supports_temperature": False,
            "supports_reasoning_effort": True,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "o3-mini": {
            "token_limit": 200000,
            "supports_temperature": False,
            "supports_reasoning_effort": True,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
    },
    "openrouter": {
        "qwen/qwen-2.5-coder-32b-instruct": {
            "token_limit": 131072,
            "default_temperature": 0.4,
            "supports_temperature": True,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
            "max_tokens": 32000,
            "reasoning_assist_default": False,
        }
    },
    "openai-compatible": {
        "qwen-qwq-32b": {
            "token_limit": 131072,
            "supports_think_tag": True,
            "supports_temperature": True,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
            "max_tokens": 131072,
        }
    },
    "azure_openai": {
        "gpt-3.5-turbo-0125": {
            "token_limit": 16385,
            "supports_temperature": True,
            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-3.5": {
            "token_limit": 4096,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-3.5-turbo": {
            "token_limit": 16385,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-3.5-turbo-1106": {
            "token_limit": 16385,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-3.5-turbo-instruct": {
            "token_limit": 4096,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-0125-preview": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-turbo-preview": {
            "token_limit": 128000,
            "supports_temperature": True,
            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-turbo": {
            "token_limit": 128000,
            "supports_temperature": True,
            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-turbo-2024-04-09": {
            "token_limit": 128000,
            "supports_temperature": True,
            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-1106-preview": {
            "token_limit": 128000,
            "supports_temperature": True,
            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-vision-preview": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4": {
            "token_limit": 8192,
            "supports_temperature": True,
            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-0613": {
            "token_limit": 8192,
            "supports_temperature": True,
            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-32k": {
            "token_limit": 32768,
            "supports_temperature": True,
            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4-32k-0613": {
            "token_limit": 32768,
            "supports_temperature": True,
            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4o": {
            "token_limit": 128000,
            "supports_temperature": True,
            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gpt-4o-mini": {
            "token_limit": 128000,
            "supports_temperature": True,
            "supports_reasoning_effort": False,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "chatgpt-4o-latest": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "o1-preview": {
            "token_limit": 128000,
            "supports_temperature": False,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "o1-mini": {
            "token_limit": 128000,
            "supports_temperature": False,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
    },
    "google_genai": {
        "gemini-pro": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gemini-1.5-flash-latest": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gemini-1.5-pro-latest": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "models/embedding-001": {
            "token_limit": 2048,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
    },
    "google_vertexai": {
        "gemini-1.5-flash": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gemini-1.5-pro": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gemini-1.0-pro": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
    },
    "ollama": {
        "command-r": {
            "token_limit": 12800,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "codellama": {
            "token_limit": 16000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "dbrx": {
            "token_limit": 32768,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "deepseek-coder:33b": {
            "token_limit": 16000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "falcon": {
            "token_limit": 2048,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "llama2": {
            "token_limit": 4096,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "llama2:7b": {
            "token_limit": 4096,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "llama2:13b": {
            "token_limit": 4096,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "llama2:70b": {
            "token_limit": 4096,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "llama3": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "llama3:8b": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "llama3:70b": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "llama3.1": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "llama3.1:8b": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "llama3.1:70b": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "lama3.1:405b": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "llama3.2": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "llama3.2:1b": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "llama3.2:3b": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "llama3.3:70b": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "scrapegraph": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "mistral-small": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "mistral-openorca": {
            "token_limit": 32000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "mistral-large": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "grok-1": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "llava": {
            "token_limit": 4096,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "mixtral:8x22b-instruct": {
            "token_limit": 65536,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "nomic-embed-text": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "nous-hermes2:34b": {
            "token_limit": 4096,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "orca-mini": {
            "token_limit": 2048,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "phi3:3.8b": {
            "token_limit": 12800,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "phi3:14b": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "qwen:0.5b": {
            "token_limit": 32000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "qwen:1.8b": {
            "token_limit": 32000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "qwen:4b": {
            "token_limit": 32000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "qwen:14b": {
            "token_limit": 32000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "qwen:32b": {
            "token_limit": 32000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "qwen:72b": {
            "token_limit": 32000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "qwen:110b": {
            "token_limit": 32000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "stablelm-zephyr": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "wizardlm2:8x22b": {
            "token_limit": 65536,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "mistral": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gemma2": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gemma2:9b": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gemma2:27b": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        # embedding models
        "shaw/dmeta-embedding-zh-small-q4": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "shaw/dmeta-embedding-zh-q4": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "chevalblanc/acge_text_embedding": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "martcreation/dmeta-embedding-zh": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "snowflake-arctic-embed": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "mxbai-embed-large": {
            "token_limit": 512,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
    },
    "oneapi": {
        "qwen-turbo": {
            "token_limit": 6000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        }
    },
    "nvidia": {
        "meta/llama3-70b-instruct": {
            "token_limit": 419,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "meta/llama3-8b-instruct": {
            "token_limit": 419,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "nemotron-4-340b-instruct": {
            "token_limit": 1024,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "databricks/dbrx-instruct": {
            "token_limit": 4096,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "google/codegemma-7b": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "google/gemma-2b": {
            "token_limit": 2048,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "google/gemma-7b": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "google/recurrentgemma-2b": {
            "token_limit": 2048,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "meta/codellama-70b": {
            "token_limit": 16384,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "meta/llama2-70b": {
            "token_limit": 4096,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "microsoft/phi-3-mini-128k-instruct": {
            "token_limit": 122880,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "mistralai/mistral-7b-instruct-v0.2": {
            "token_limit": 4096,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "mistralai/mistral-large": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "mistralai/mixtral-8x22b-instruct-v0.1": {
            "token_limit": 32768,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "mistralai/mixtral-8x7b-instruct-v0.1": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "snowflake/arctic": {
            "token_limit": 16384,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
    },
    "groq": {
        "llama3-8b-8192": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "llama3-70b-8192": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "mixtral-8x7b-32768": {
            "token_limit": 32768,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "gemma-7b-it": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "claude-3-haiku-20240307'": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
    },
    "toghetherai": {
        "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "mistralai/Mixtral-8x22B-Instruct-v0.1": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "stabilityai/stable-diffusion-xl-base-1.0": {
            "token_limit": 2048,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "NousResearch/Hermes-3-Llama-3.1-405B-Turbo": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "Gryphe/MythoMax-L2-13b-Lite": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "Salesforce/Llama-Rank-V1": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "meta-llama/Meta-Llama-Guard-3-8B": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "meta-llama/Meta-Llama-3-70B-Instruct-Turbo": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "meta-llama/Llama-3-8b-chat-hf": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "meta-llama/Llama-3-70b-chat-hf": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "Qwen/Qwen2-72B-Instruct": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "google/gemma-2-27b-it": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
    },
    "anthropic": {
        "claude_instant": {
            "token_limit": 100000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "claude2": {
            "token_limit": 9000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "claude2.1": {
            "token_limit": 200000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "claude3": {
            "token_limit": 200000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "claude3.5": {
            "token_limit": 200000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "claude-3-opus-20240229": {
            "token_limit": 200000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "claude-3-sonnet-20240229": {
            "token_limit": 200000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "claude-3-haiku-20240307": {
            "token_limit": 200000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "claude-3-5-sonnet-20240620": {
            "token_limit": 200000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "claude-3-5-sonnet-20241022": {
            "token_limit": 200000,
            "supports_temperature": True,
            "default_temperature": 1.0,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "claude-3-7-sonnet-20250219": {
            "token_limit": 200000,
            "supports_temperature": True,
            "supports_thinking": True,
            "max_tokens": 64000,
            "default_temperature": 1.0,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "claude-3-5-haiku-latest": {
            "token_limit": 200000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
    },
    "bedrock": {
        "anthropic.claude-3-haiku-20240307-v1:0": {
            "token_limit": 200000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "anthropic.claude-3-sonnet-20240229-v1:0": {
            "token_limit": 200000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "anthropic.claude-3-opus-20240229-v1:0": {
            "token_limit": 200000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "anthropic.claude-3-5-sonnet-20240620-v1:0": {
            "token_limit": 200000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "claude-3-5-haiku-latest": {
            "token_limit": 200000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "anthropic.claude-v2:1": {
            "token_limit": 200000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "anthropic.claude-v2": {
            "token_limit": 100000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "anthropic.claude-instant-v1": {
            "token_limit": 100000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "meta.llama3-8b-instruct-v1:0": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "meta.llama3-70b-instruct-v1:0": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "meta.llama2-13b-chat-v1": {
            "token_limit": 4096,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "meta.llama2-70b-chat-v1": {
            "token_limit": 4096,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "mistral.mistral-7b-instruct-v0:2": {
            "token_limit": 32768,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "mistral.mixtral-8x7b-instruct-v0:1": {
            "token_limit": 32768,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "mistral.mistral-large-2402-v1:0": {
            "token_limit": 32768,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "mistral.mistral-small-2402-v1:0": {
            "token_limit": 32768,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "amazon.titan-embed-text-v1": {
            "token_limit": 8000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "amazon.titan-embed-text-v2:0": {
            "token_limit": 8000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "cohere.embed-english-v3": {
            "token_limit": 512,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "cohere.embed-multilingual-v3": {
            "token_limit": 512,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
    },
    "mistralai": {
        "mistral-large-latest": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "open-mistral-nemo": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "codestral-latest": {
            "token_limit": 32000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
    },
    "togetherai": {
        "Meta-Llama-3.1-70B-Instruct-Turbo": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        }
    },
}