diff --git a/docs/docusaurus.config.ts b/docs/docusaurus.config.ts
index f13b89f..4b3de14 100644
--- a/docs/docusaurus.config.ts
+++ b/docs/docusaurus.config.ts
@@ -17,6 +17,10 @@ const config: Config = {
     locales: ['en'],
   },
 
+  plugins: [
+    '@docusaurus/plugin-vercel-analytics'
+  ],
+
   presets: [
     [
       'classic',
diff --git a/docs/package-lock.json b/docs/package-lock.json
index d3a5f04..248e2d8 100644
--- a/docs/package-lock.json
+++ b/docs/package-lock.json
@@ -9,6 +9,7 @@
       "version": "0.0.0",
       "dependencies": {
         "@docusaurus/core": "3.7.0",
+        "@docusaurus/plugin-vercel-analytics": "^3.7.0",
         "@docusaurus/preset-classic": "3.7.0",
         "@mdx-js/react": "^3.0.0",
         "clsx": "^2.0.0",
@@ -3529,6 +3530,66 @@
         "react-dom": "^18.0.0 || ^19.0.0"
       }
     },
+    "node_modules/@docusaurus/plugin-vercel-analytics": {
+      "version": "3.7.0",
+      "resolved": "https://registry.npmjs.org/@docusaurus/plugin-vercel-analytics/-/plugin-vercel-analytics-3.7.0.tgz",
+      "integrity": "sha512-zEOsqNI3oj4WRO9Dbzsar9fctwAl60PZJqhu14X5W3z5zT/E1TFKrHW/oJHU/a1r5o9K2cFsSNdDn2tyuaFJoQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@docusaurus/core": "3.7.0",
+        "@docusaurus/logger": "3.7.0",
+        "@docusaurus/types": "3.7.0",
+        "@docusaurus/utils": "3.7.0",
+        "@docusaurus/utils-validation": "3.7.0",
+        "@vercel/analytics": "^1.1.1",
+        "tslib": "^2.6.0"
+      },
+      "engines": {
+        "node": ">=18.0"
+      },
+      "peerDependencies": {
+        "react": "^18.0.0 || ^19.0.0",
+        "react-dom": "^18.0.0 || ^19.0.0"
+      }
+    },
+    "node_modules/@docusaurus/plugin-vercel-analytics/node_modules/@vercel/analytics": {
+      "version": "1.5.0",
+      "resolved": "https://registry.npmjs.org/@vercel/analytics/-/analytics-1.5.0.tgz",
+      "integrity": "sha512-MYsBzfPki4gthY5HnYN7jgInhAZ7Ac1cYDoRWFomwGHWEX7odTEzbtg9kf/QSo7XEsEAqlQugA6gJ2WS2DEa3g==",
+      "license": "MPL-2.0",
+      "peerDependencies": {
+        "@remix-run/react": "^2",
+        "@sveltejs/kit": "^1 || ^2",
+        "next": ">= 13",
+        "react": "^18 || ^19 || ^19.0.0-rc",
+        "svelte": ">= 4",
+        "vue": "^3",
+        "vue-router": "^4"
+      },
+      "peerDependenciesMeta": {
+        "@remix-run/react": {
+          "optional": true
+        },
+        "@sveltejs/kit": {
+          "optional": true
+        },
+        "next": {
+          "optional": true
+        },
+        "react": {
+          "optional": true
+        },
+        "svelte": {
+          "optional": true
+        },
+        "vue": {
+          "optional": true
+        },
+        "vue-router": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/@docusaurus/preset-classic": {
       "version": "3.7.0",
       "resolved": "https://registry.npmjs.org/@docusaurus/preset-classic/-/preset-classic-3.7.0.tgz",
diff --git a/docs/package.json b/docs/package.json
index 32ddd41..8bf5596 100644
--- a/docs/package.json
+++ b/docs/package.json
@@ -16,6 +16,7 @@
   },
   "dependencies": {
     "@docusaurus/core": "3.7.0",
+    "@docusaurus/plugin-vercel-analytics": "^3.7.0",
     "@docusaurus/preset-classic": "3.7.0",
     "@mdx-js/react": "^3.0.0",
     "clsx": "^2.0.0",
diff --git a/ra_aid/models_params.py b/ra_aid/models_params.py
index 0d374ca..2124258 100644
--- a/ra_aid/models_params.py
+++ b/ra_aid/models_params.py
@@ -4,6 +4,7 @@ List of model parameters
 
 DEFAULT_TOKEN_LIMIT = 100000
 DEFAULT_TEMPERATURE = 0.7
+DEFAULT_BASE_LATENCY = 180
 
 models_params = {
     "openai": {
@@ -11,216 +12,281 @@ models_params = {
             "token_limit": 16385,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-3.5": {
             "token_limit": 4096,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-3.5-turbo": {
             "token_limit": 16385,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-3.5-turbo-1106": {
             "token_limit": 16385,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-3.5-turbo-instruct": {
             "token_limit": 4096,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-0125-preview": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-turbo-preview": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-turbo": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-turbo-2024-04-09": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-1106-preview": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-vision-preview": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-0613": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-32k": {
             "token_limit": 32768,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-32k-0613": {
             "token_limit": 32768,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4o": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4o-2024-08-06": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4o-2024-05-13": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4o-mini": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
+        },
+        "o1-preview": {
+            "token_limit": 128000,
+            "supports_temperature": False,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
+        },
+        "o1-mini": {
+            "token_limit": 128000,
+            "supports_temperature": False,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
+        },
+        "o1": {
+            "token_limit": 200000,
+            "supports_temperature": False,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
+        },
+        "o3-mini": {
+            "token_limit": 200000,
+            "supports_temperature": False,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
-        "o1-preview": {"token_limit": 128000, "supports_temperature": False},
-        "o1-mini": {"token_limit": 128000, "supports_temperature": False},
-        "o1": {"token_limit": 200000, "supports_temperature": False},
-        "o3-mini": {"token_limit": 200000, "supports_temperature": False},
     },
     "azure_openai": {
         "gpt-3.5-turbo-0125": {
             "token_limit": 16385,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-3.5": {
             "token_limit": 4096,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-3.5-turbo": {
             "token_limit": 16385,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-3.5-turbo-1106": {
             "token_limit": 16385,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-3.5-turbo-instruct": {
             "token_limit": 4096,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-0125-preview": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-turbo-preview": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-turbo": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-turbo-2024-04-09": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-1106-preview": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-vision-preview": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-0613": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-32k": {
             "token_limit": 32768,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4-32k-0613": {
             "token_limit": 32768,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4o": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gpt-4o-mini": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "chatgpt-4o-latest": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
+        },
+        "o1-preview": {
+            "token_limit": 128000,
+            "supports_temperature": False,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
+        },
+        "o1-mini": {
+            "token_limit": 128000,
+            "supports_temperature": False,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
-        "o1-preview": {"token_limit": 128000, "supports_temperature": False},
-        "o1-mini": {"token_limit": 128000, "supports_temperature": False},
     },
     "google_genai": {
         "gemini-pro": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gemini-1.5-flash-latest": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gemini-1.5-pro-latest": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "models/embedding-001": {
             "token_limit": 2048,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
     },
     "google_vertexai": {
@@ -228,16 +294,19 @@ models_params = {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gemini-1.5-pro": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gemini-1.0-pro": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
     },
     "ollama": {
@@ -245,257 +314,308 @@ models_params = {
             "token_limit": 12800,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "codellama": {
             "token_limit": 16000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "dbrx": {
             "token_limit": 32768,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "deepseek-coder:33b": {
             "token_limit": 16000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "falcon": {
             "token_limit": 2048,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "llama2": {
             "token_limit": 4096,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "llama2:7b": {
             "token_limit": 4096,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "llama2:13b": {
             "token_limit": 4096,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "llama2:70b": {
             "token_limit": 4096,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "llama3": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "llama3:8b": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "llama3:70b": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "llama3.1": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "llama3.1:8b": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "llama3.1:70b": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "lama3.1:405b": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "llama3.2": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "llama3.2:1b": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "llama3.2:3b": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "llama3.3:70b": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "scrapegraph": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "mistral-small": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "mistral-openorca": {
             "token_limit": 32000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "mistral-large": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "grok-1": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "llava": {
             "token_limit": 4096,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "mixtral:8x22b-instruct": {
             "token_limit": 65536,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "nomic-embed-text": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "nous-hermes2:34b": {
             "token_limit": 4096,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "orca-mini": {
             "token_limit": 2048,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "phi3:3.8b": {
             "token_limit": 12800,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "phi3:14b": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "qwen:0.5b": {
             "token_limit": 32000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "qwen:1.8b": {
             "token_limit": 32000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "qwen:4b": {
             "token_limit": 32000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "qwen:14b": {
             "token_limit": 32000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "qwen:32b": {
             "token_limit": 32000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "qwen:72b": {
             "token_limit": 32000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "qwen:110b": {
             "token_limit": 32000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "stablelm-zephyr": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "wizardlm2:8x22b": {
             "token_limit": 65536,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "mistral": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gemma2": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gemma2:9b": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gemma2:27b": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         # embedding models
         "shaw/dmeta-embedding-zh-small-q4": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "shaw/dmeta-embedding-zh-q4": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "chevalblanc/acge_text_embedding": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "martcreation/dmeta-embedding-zh": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "snowflake-arctic-embed": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "mxbai-embed-large": {
             "token_limit": 512,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
     },
     "oneapi": {
@@ -503,6 +623,7 @@ models_params = {
             "token_limit": 6000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         }
     },
     "nvidia": {
@@ -510,81 +631,97 @@ models_params = {
             "token_limit": 419,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "meta/llama3-8b-instruct": {
             "token_limit": 419,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "nemotron-4-340b-instruct": {
             "token_limit": 1024,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "databricks/dbrx-instruct": {
             "token_limit": 4096,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "google/codegemma-7b": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "google/gemma-2b": {
             "token_limit": 2048,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "google/gemma-7b": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "google/recurrentgemma-2b": {
             "token_limit": 2048,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "meta/codellama-70b": {
             "token_limit": 16384,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "meta/llama2-70b": {
             "token_limit": 4096,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "microsoft/phi-3-mini-128k-instruct": {
             "token_limit": 122880,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "mistralai/mistral-7b-instruct-v0.2": {
             "token_limit": 4096,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "mistralai/mistral-large": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "mistralai/mixtral-8x22b-instruct-v0.1": {
             "token_limit": 32768,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "mistralai/mixtral-8x7b-instruct-v0.1": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "snowflake/arctic": {
             "token_limit": 16384,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
     },
     "groq": {
@@ -592,26 +729,31 @@ models_params = {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "llama3-70b-8192": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "mixtral-8x7b-32768": {
             "token_limit": 32768,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "gemma-7b-it": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "claude-3-haiku-20240307'": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
     },
     "toghetherai": {
@@ -619,71 +761,85 @@ models_params = {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "mistralai/Mixtral-8x22B-Instruct-v0.1": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "stabilityai/stable-diffusion-xl-base-1.0": {
             "token_limit": 2048,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "NousResearch/Hermes-3-Llama-3.1-405B-Turbo": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "Gryphe/MythoMax-L2-13b-Lite": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "Salesforce/Llama-Rank-V1": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "meta-llama/Meta-Llama-Guard-3-8B": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "meta-llama/Meta-Llama-3-70B-Instruct-Turbo": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "meta-llama/Llama-3-8b-chat-hf": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "meta-llama/Llama-3-70b-chat-hf": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "Qwen/Qwen2-72B-Instruct": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "google/gemma-2-27b-it": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
     },
     "anthropic": {
@@ -691,56 +847,67 @@ models_params = {
             "token_limit": 100000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "claude2": {
             "token_limit": 9000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "claude2.1": {
             "token_limit": 200000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "claude3": {
             "token_limit": 200000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "claude3.5": {
             "token_limit": 200000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "claude-3-opus-20240229": {
             "token_limit": 200000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "claude-3-sonnet-20240229": {
             "token_limit": 200000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "claude-3-haiku-20240307": {
             "token_limit": 200000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "claude-3-5-sonnet-20240620": {
             "token_limit": 200000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "claude-3-5-sonnet-20241022": {
             "token_limit": 200000,
             "supports_temperature": True,
             "default_temperature": 1.0,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "claude-3-5-haiku-latest": {
             "token_limit": 200000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
     },
     "bedrock": {
@@ -748,101 +915,121 @@ models_params = {
             "token_limit": 200000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "anthropic.claude-3-sonnet-20240229-v1:0": {
             "token_limit": 200000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "anthropic.claude-3-opus-20240229-v1:0": {
             "token_limit": 200000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "anthropic.claude-3-5-sonnet-20240620-v1:0": {
             "token_limit": 200000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "claude-3-5-haiku-latest": {
             "token_limit": 200000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "anthropic.claude-v2:1": {
             "token_limit": 200000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "anthropic.claude-v2": {
             "token_limit": 100000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "anthropic.claude-instant-v1": {
             "token_limit": 100000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "meta.llama3-8b-instruct-v1:0": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "meta.llama3-70b-instruct-v1:0": {
             "token_limit": 8192,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "meta.llama2-13b-chat-v1": {
             "token_limit": 4096,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "meta.llama2-70b-chat-v1": {
             "token_limit": 4096,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "mistral.mistral-7b-instruct-v0:2": {
             "token_limit": 32768,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "mistral.mixtral-8x7b-instruct-v0:1": {
             "token_limit": 32768,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "mistral.mistral-large-2402-v1:0": {
             "token_limit": 32768,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "mistral.mistral-small-2402-v1:0": {
             "token_limit": 32768,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "amazon.titan-embed-text-v1": {
             "token_limit": 8000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "amazon.titan-embed-text-v2:0": {
             "token_limit": 8000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "cohere.embed-english-v3": {
             "token_limit": 512,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "cohere.embed-multilingual-v3": {
             "token_limit": 512,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
     },
     "mistralai": {
@@ -850,16 +1037,19 @@ models_params = {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "open-mistral-nemo": {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
         "codestral-latest": {
             "token_limit": 32000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         },
     },
     "togetherai": {
@@ -867,6 +1057,7 @@ models_params = {
             "token_limit": 128000,
             "supports_temperature": True,
             "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
         }
     },
 }