From 13aa77a389cc6de955627adcb764ac5f80c6585b Mon Sep 17 00:00:00 2001 From: AI Christianson Date: Mon, 17 Feb 2025 14:22:44 -0500 Subject: [PATCH 1/2] add vercel analytics to docs --- docs/docusaurus.config.ts | 4 +++ docs/package-lock.json | 61 +++++++++++++++++++++++++++++++++++++++ docs/package.json | 1 + 3 files changed, 66 insertions(+) diff --git a/docs/docusaurus.config.ts b/docs/docusaurus.config.ts index f13b89f..4b3de14 100644 --- a/docs/docusaurus.config.ts +++ b/docs/docusaurus.config.ts @@ -17,6 +17,10 @@ const config: Config = { locales: ['en'], }, + plugins: [ + '@docusaurus/plugin-vercel-analytics' + ], + presets: [ [ 'classic', diff --git a/docs/package-lock.json b/docs/package-lock.json index d3a5f04..248e2d8 100644 --- a/docs/package-lock.json +++ b/docs/package-lock.json @@ -9,6 +9,7 @@ "version": "0.0.0", "dependencies": { "@docusaurus/core": "3.7.0", + "@docusaurus/plugin-vercel-analytics": "^3.7.0", "@docusaurus/preset-classic": "3.7.0", "@mdx-js/react": "^3.0.0", "clsx": "^2.0.0", @@ -3529,6 +3530,66 @@ "react-dom": "^18.0.0 || ^19.0.0" } }, + "node_modules/@docusaurus/plugin-vercel-analytics": { + "version": "3.7.0", + "resolved": "https://registry.npmjs.org/@docusaurus/plugin-vercel-analytics/-/plugin-vercel-analytics-3.7.0.tgz", + "integrity": "sha512-zEOsqNI3oj4WRO9Dbzsar9fctwAl60PZJqhu14X5W3z5zT/E1TFKrHW/oJHU/a1r5o9K2cFsSNdDn2tyuaFJoQ==", + "license": "MIT", + "dependencies": { + "@docusaurus/core": "3.7.0", + "@docusaurus/logger": "3.7.0", + "@docusaurus/types": "3.7.0", + "@docusaurus/utils": "3.7.0", + "@docusaurus/utils-validation": "3.7.0", + "@vercel/analytics": "^1.1.1", + "tslib": "^2.6.0" + }, + "engines": { + "node": ">=18.0" + }, + "peerDependencies": { + "react": "^18.0.0 || ^19.0.0", + "react-dom": "^18.0.0 || ^19.0.0" + } + }, + "node_modules/@docusaurus/plugin-vercel-analytics/node_modules/@vercel/analytics": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/@vercel/analytics/-/analytics-1.5.0.tgz", + "integrity": "sha512-MYsBzfPki4gthY5HnYN7jgInhAZ7Ac1cYDoRWFomwGHWEX7odTEzbtg9kf/QSo7XEsEAqlQugA6gJ2WS2DEa3g==", + "license": "MPL-2.0", + "peerDependencies": { + "@remix-run/react": "^2", + "@sveltejs/kit": "^1 || ^2", + "next": ">= 13", + "react": "^18 || ^19 || ^19.0.0-rc", + "svelte": ">= 4", + "vue": "^3", + "vue-router": "^4" + }, + "peerDependenciesMeta": { + "@remix-run/react": { + "optional": true + }, + "@sveltejs/kit": { + "optional": true + }, + "next": { + "optional": true + }, + "react": { + "optional": true + }, + "svelte": { + "optional": true + }, + "vue": { + "optional": true + }, + "vue-router": { + "optional": true + } + } + }, "node_modules/@docusaurus/preset-classic": { "version": "3.7.0", "resolved": "https://registry.npmjs.org/@docusaurus/preset-classic/-/preset-classic-3.7.0.tgz", diff --git a/docs/package.json b/docs/package.json index 32ddd41..8bf5596 100644 --- a/docs/package.json +++ b/docs/package.json @@ -16,6 +16,7 @@ }, "dependencies": { "@docusaurus/core": "3.7.0", + "@docusaurus/plugin-vercel-analytics": "^3.7.0", "@docusaurus/preset-classic": "3.7.0", "@mdx-js/react": "^3.0.0", "clsx": "^2.0.0", From e8e4dac038c40815a7f0873c34a414617c76b11a Mon Sep 17 00:00:00 2001 From: AI Christianson Date: Mon, 17 Feb 2025 18:11:33 -0500 Subject: [PATCH 2/2] add base latency model param --- ra_aid/models_params.py | 305 +++++++++++++++++++++++----------------- 1 file changed, 174 insertions(+), 131 deletions(-) diff --git a/ra_aid/models_params.py b/ra_aid/models_params.py index 21e04ae..5c80e0c 100644 --- a/ra_aid/models_params.py +++ b/ra_aid/models_params.py @@ -4,278 +4,305 @@ List of model parameters DEFAULT_TOKEN_LIMIT = 100000 DEFAULT_TEMPERATURE = 0.7 +DEFAULT_BASE_LATENCY = 180 models_params = { "openai": { - "gpt-3.5-turbo-0125": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-3.5": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-3.5-turbo": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-3.5-turbo-1106": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-3.5-turbo-instruct": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-4-0125-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-4-turbo-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-4-turbo": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-4-turbo-2024-04-09": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-4-1106-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-4-vision-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-4": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-4-0613": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-4-32k": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-4-32k-0613": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-4o": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-4o-2024-08-06": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-4o-2024-05-13": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-4o-mini": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "o1-preview": {"token_limit": 128000, "supports_temperature": False}, - "o1-mini": {"token_limit": 128000, "supports_temperature": False}, - "o1-preview": {"token_limit": 128000, "supports_temperature": False}, - "o1": {"token_limit": 200000, "supports_temperature": False}, - "o3-mini": {"token_limit": 200000, "supports_temperature": False}, + "gpt-3.5-turbo-0125": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-3.5": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-3.5-turbo": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-3.5-turbo-1106": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-3.5-turbo-instruct": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-4-0125-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-4-turbo-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-4-turbo": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-4-turbo-2024-04-09": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-4-1106-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-4-vision-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-4": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-4-0613": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-4-32k": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-4-32k-0613": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-4o": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-4o-2024-08-06": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-4o-2024-05-13": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-4o-mini": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "o1-preview": {"token_limit": 128000, "supports_temperature": False, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "o1-mini": {"token_limit": 128000, "supports_temperature": False, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "o1-preview": {"token_limit": 128000, "supports_temperature": False, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "o1": {"token_limit": 200000, "supports_temperature": False, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "o3-mini": {"token_limit": 200000, "supports_temperature": False, "latency_coefficient": DEFAULT_BASE_LATENCY}, }, "azure_openai": { - "gpt-3.5-turbo-0125": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-3.5": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-3.5-turbo": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-3.5-turbo-1106": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-3.5-turbo-instruct": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-4-0125-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-4-turbo-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-4-turbo": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-4-turbo-2024-04-09": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-4-1106-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-4-vision-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-4": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-4-0613": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-4-32k": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-4-32k-0613": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-4o": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gpt-4o-mini": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "chatgpt-4o-latest": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "o1-preview": {"token_limit": 128000, "supports_temperature": False}, - "o1-mini": {"token_limit": 128000, "supports_temperature": False}, + "gpt-3.5-turbo-0125": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-3.5": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-3.5-turbo": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-3.5-turbo-1106": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-3.5-turbo-instruct": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-4-0125-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-4-turbo-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-4-turbo": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-4-turbo-2024-04-09": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-4-1106-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-4-vision-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-4": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-4-0613": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-4-32k": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-4-32k-0613": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-4o": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gpt-4o-mini": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "chatgpt-4o-latest": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "o1-preview": {"token_limit": 128000, "supports_temperature": False, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "o1-mini": {"token_limit": 128000, "supports_temperature": False, "latency_coefficient": DEFAULT_BASE_LATENCY}, }, "google_genai": { - "gemini-pro": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, + "gemini-pro": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, "gemini-1.5-flash-latest": { "token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY }, - "gemini-1.5-pro-latest": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "models/embedding-001": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, + "gemini-1.5-pro-latest": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "models/embedding-001": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, }, "google_vertexai": { - "gemini-1.5-flash": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gemini-1.5-pro": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gemini-1.0-pro": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, + "gemini-1.5-flash": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gemini-1.5-pro": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gemini-1.0-pro": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, }, "ollama": { - "command-r": {"token_limit": 12800, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "codellama": {"token_limit": 16000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "dbrx": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "deepseek-coder:33b": {"token_limit": 16000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "falcon": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "llama2": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "llama2:7b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "llama2:13b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "llama2:70b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "llama3": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "llama3:8b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "llama3:70b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "llama3.1": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "llama3.1:8b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "llama3.1:70b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "lama3.1:405b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "llama3.2": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "llama3.2:1b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "llama3.2:3b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "llama3.3:70b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "scrapegraph": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "mistral-small": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "mistral-openorca": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "mistral-large": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "grok-1": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "llava": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "mixtral:8x22b-instruct": {"token_limit": 65536, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "nomic-embed-text": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "nous-hermes2:34b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "orca-mini": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "phi3:3.8b": {"token_limit": 12800, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "phi3:14b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "qwen:0.5b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "qwen:1.8b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "qwen:4b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "qwen:14b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "qwen:32b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "qwen:72b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "qwen:110b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "stablelm-zephyr": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "wizardlm2:8x22b": {"token_limit": 65536, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "mistral": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gemma2": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gemma2:9b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gemma2:27b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, + "command-r": {"token_limit": 12800, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "codellama": {"token_limit": 16000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "dbrx": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "deepseek-coder:33b": {"token_limit": 16000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "falcon": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "llama2": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "llama2:7b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "llama2:13b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "llama2:70b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "llama3": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "llama3:8b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "llama3:70b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "llama3.1": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "llama3.1:8b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "llama3.1:70b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "lama3.1:405b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "llama3.2": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "llama3.2:1b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "llama3.2:3b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "llama3.3:70b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "scrapegraph": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "mistral-small": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "mistral-openorca": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "mistral-large": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "grok-1": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "llava": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "mixtral:8x22b-instruct": {"token_limit": 65536, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "nomic-embed-text": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "nous-hermes2:34b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "orca-mini": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "phi3:3.8b": {"token_limit": 12800, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "phi3:14b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "qwen:0.5b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "qwen:1.8b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "qwen:4b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "qwen:14b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "qwen:32b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "qwen:72b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "qwen:110b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "stablelm-zephyr": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "wizardlm2:8x22b": {"token_limit": 65536, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "mistral": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gemma2": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gemma2:9b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gemma2:27b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, # embedding models "shaw/dmeta-embedding-zh-small-q4": { "token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY, }, "shaw/dmeta-embedding-zh-q4": { "token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY, }, "chevalblanc/acge_text_embedding": { "token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY, }, "martcreation/dmeta-embedding-zh": { "token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY, }, - "snowflake-arctic-embed": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "mxbai-embed-large": {"token_limit": 512, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, + "snowflake-arctic-embed": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "mxbai-embed-large": {"token_limit": 512, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, }, - "oneapi": {"qwen-turbo": {"token_limit": 6000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}}, + "oneapi": {"qwen-turbo": {"token_limit": 6000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}}, "nvidia": { - "meta/llama3-70b-instruct": {"token_limit": 419, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "meta/llama3-8b-instruct": {"token_limit": 419, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "nemotron-4-340b-instruct": {"token_limit": 1024, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "databricks/dbrx-instruct": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "google/codegemma-7b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "google/gemma-2b": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "google/gemma-7b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "google/recurrentgemma-2b": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "meta/codellama-70b": {"token_limit": 16384, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "meta/llama2-70b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, + "meta/llama3-70b-instruct": {"token_limit": 419, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "meta/llama3-8b-instruct": {"token_limit": 419, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "nemotron-4-340b-instruct": {"token_limit": 1024, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "databricks/dbrx-instruct": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "google/codegemma-7b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "google/gemma-2b": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "google/gemma-7b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "google/recurrentgemma-2b": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "meta/codellama-70b": {"token_limit": 16384, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "meta/llama2-70b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, "microsoft/phi-3-mini-128k-instruct": { "token_limit": 122880, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY, }, "mistralai/mistral-7b-instruct-v0.2": { "token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY }, - "mistralai/mistral-large": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, + "mistralai/mistral-large": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, "mistralai/mixtral-8x22b-instruct-v0.1": { "token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY }, "mistralai/mixtral-8x7b-instruct-v0.1": { "token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY }, - "snowflake/arctic": {"token_limit": 16384, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, + "snowflake/arctic": {"token_limit": 16384, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, }, "groq": { - "llama3-8b-8192": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "llama3-70b-8192": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "mixtral-8x7b-32768": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "gemma-7b-it": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "claude-3-haiku-20240307'": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, + "llama3-8b-8192": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "llama3-70b-8192": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "mixtral-8x7b-32768": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "gemma-7b-it": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "claude-3-haiku-20240307'": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, }, "toghetherai": { "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": { "token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY }, "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": { "token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY }, "mistralai/Mixtral-8x22B-Instruct-v0.1": { "token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY }, "stabilityai/stable-diffusion-xl-base-1.0": { "token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY }, "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": { "token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY }, "NousResearch/Hermes-3-Llama-3.1-405B-Turbo": { "token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY }, "Gryphe/MythoMax-L2-13b-Lite": { "token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY }, - "Salesforce/Llama-Rank-V1": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, + "Salesforce/Llama-Rank-V1": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, "meta-llama/Meta-Llama-Guard-3-8B": { "token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY }, "meta-llama/Meta-Llama-3-70B-Instruct-Turbo": { "token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY }, "meta-llama/Llama-3-8b-chat-hf": { "token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY }, "meta-llama/Llama-3-70b-chat-hf": { "token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY }, "Qwen/Qwen2-72B-Instruct": { "token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY }, - "google/gemma-2-27b-it": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, + "google/gemma-2-27b-it": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, }, "anthropic": { - "claude_instant": {"token_limit": 100000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "claude2": {"token_limit": 9000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "claude2.1": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "claude3": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "claude3.5": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "claude-3-opus-20240229": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, + "claude_instant": {"token_limit": 100000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "claude2": {"token_limit": 9000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "claude2.1": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "claude3": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "claude3.5": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "claude-3-opus-20240229": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, "claude-3-sonnet-20240229": { "token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY, }, "claude-3-haiku-20240307": { "token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY, }, "claude-3-5-sonnet-20240620": { "token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY, }, "claude-3-5-sonnet-20241022": { "token_limit": 200000, "supports_temperature": True, "default_temperature": 1.0, + "latency_coefficient": DEFAULT_BASE_LATENCY, }, "claude-3-5-haiku-latest": { "token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY, }, }, "bedrock": { @@ -283,93 +310,109 @@ models_params = { "token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY, }, "anthropic.claude-3-sonnet-20240229-v1:0": { "token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY, }, "anthropic.claude-3-opus-20240229-v1:0": { "token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY, }, "anthropic.claude-3-5-sonnet-20240620-v1:0": { "token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY, }, "claude-3-5-haiku-latest": { "token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY, }, - "anthropic.claude-v2:1": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "anthropic.claude-v2": {"token_limit": 100000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, + "anthropic.claude-v2:1": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "anthropic.claude-v2": {"token_limit": 100000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, "anthropic.claude-instant-v1": { "token_limit": 100000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY, }, "meta.llama3-8b-instruct-v1:0": { "token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY, }, "meta.llama3-70b-instruct-v1:0": { "token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY, }, - "meta.llama2-13b-chat-v1": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "meta.llama2-70b-chat-v1": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, + "meta.llama2-13b-chat-v1": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "meta.llama2-70b-chat-v1": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, "mistral.mistral-7b-instruct-v0:2": { "token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY, }, "mistral.mixtral-8x7b-instruct-v0:1": { "token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY, }, "mistral.mistral-large-2402-v1:0": { "token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY, }, "mistral.mistral-small-2402-v1:0": { "token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY, }, "amazon.titan-embed-text-v1": { "token_limit": 8000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY, }, "amazon.titan-embed-text-v2:0": { "token_limit": 8000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY, }, - "cohere.embed-english-v3": {"token_limit": 512, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, + "cohere.embed-english-v3": {"token_limit": 512, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, "cohere.embed-multilingual-v3": { "token_limit": 512, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY, }, }, "mistralai": { - "mistral-large-latest": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "open-mistral-nemo": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, - "codestral-latest": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}, + "mistral-large-latest": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "open-mistral-nemo": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, + "codestral-latest": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}, }, "togetherai": { "Meta-Llama-3.1-70B-Instruct-Turbo": { "token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, + "latency_coefficient": DEFAULT_BASE_LATENCY } }, }