prompts

2025-02-23 07:51:38 -05:00 · 2025-02-22 15:44:25 -05:00 · 2025-02-22 14:01:40 -05:00 · 2025-02-21 18:38:00 -05:00 · 2025-02-21 17:26:31 -05:00 · 2025-02-21 12:59:11 -05:00
6 changed files with 342 additions and 157 deletions
--- a/ra_aid/agent_utils.py
+++ b/ra_aid/agent_utils.py
@ -225,11 +225,11 @@ def is_anthropic_claude(config: Dict[str, Any]) -> bool:
    """
    provider = config.get("provider", "")
    model_name = config.get("model", "")
-    return (
-        provider.lower() == "anthropic"
-        and model_name
-        and "claude" in model_name.lower()
+    result = (
+        (provider.lower() == "anthropic" and model_name and "claude" in model_name.lower())
+        or (provider.lower() == "openrouter" and model_name.lower().startswith("anthropic/claude-"))
    )
+    return result


 def create_agent(
--- a/ra_aid/models_params.py
+++ b/ra_aid/models_params.py
@ -4,278 +4,305 @@ List of model parameters

 DEFAULT_TOKEN_LIMIT = 100000
 DEFAULT_TEMPERATURE = 0.7
+DEFAULT_BASE_LATENCY = 240

 models_params = {
    "openai": {
-        "gpt-3.5-turbo-0125": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-3.5": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-3.5-turbo": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-3.5-turbo-1106": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-3.5-turbo-instruct": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-4-0125-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-4-turbo-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-4-turbo": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-4-turbo-2024-04-09": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-4-1106-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-4-vision-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-4": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-4-0613": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-4-32k": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-4-32k-0613": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-4o": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-4o-2024-08-06": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-4o-2024-05-13": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-4o-mini": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "o1-preview": {"token_limit": 128000, "supports_temperature": False},
-        "o1-mini": {"token_limit": 128000, "supports_temperature": False},
-        "o1-preview": {"token_limit": 128000, "supports_temperature": False},
-        "o1": {"token_limit": 200000, "supports_temperature": False},
-        "o3-mini": {"token_limit": 200000, "supports_temperature": False},
+        "gpt-3.5-turbo-0125": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-3.5": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-3.5-turbo": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-3.5-turbo-1106": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-3.5-turbo-instruct": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-4-0125-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-4-turbo-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-4-turbo": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-4-turbo-2024-04-09": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-4-1106-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-4-vision-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-4": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-4-0613": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-4-32k": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-4-32k-0613": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-4o": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-4o-2024-08-06": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-4o-2024-05-13": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-4o-mini": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "o1-preview": {"token_limit": 128000, "supports_temperature": False, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "o1-mini": {"token_limit": 128000, "supports_temperature": False, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "o1-preview": {"token_limit": 128000, "supports_temperature": False, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "o1": {"token_limit": 200000, "supports_temperature": False, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "o3-mini": {"token_limit": 200000, "supports_temperature": False, "latency_coefficient": DEFAULT_BASE_LATENCY},
    },
    "azure_openai": {
-        "gpt-3.5-turbo-0125": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-3.5": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-3.5-turbo": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-3.5-turbo-1106": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-3.5-turbo-instruct": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-4-0125-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-4-turbo-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-4-turbo": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-4-turbo-2024-04-09": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-4-1106-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-4-vision-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-4": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-4-0613": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-4-32k": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-4-32k-0613": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-4o": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gpt-4o-mini": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "chatgpt-4o-latest": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "o1-preview": {"token_limit": 128000, "supports_temperature": False},
-        "o1-mini": {"token_limit": 128000, "supports_temperature": False},
+        "gpt-3.5-turbo-0125": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-3.5": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-3.5-turbo": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-3.5-turbo-1106": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-3.5-turbo-instruct": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-4-0125-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-4-turbo-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-4-turbo": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-4-turbo-2024-04-09": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-4-1106-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-4-vision-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-4": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-4-0613": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-4-32k": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-4-32k-0613": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-4o": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gpt-4o-mini": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "chatgpt-4o-latest": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "o1-preview": {"token_limit": 128000, "supports_temperature": False, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "o1-mini": {"token_limit": 128000, "supports_temperature": False, "latency_coefficient": DEFAULT_BASE_LATENCY},
    },
    "google_genai": {
-        "gemini-pro": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
+        "gemini-pro": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
        "gemini-1.5-flash-latest": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY
        },
-        "gemini-1.5-pro-latest": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "models/embedding-001": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
+        "gemini-1.5-pro-latest": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "models/embedding-001": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
    },
    "google_vertexai": {
-        "gemini-1.5-flash": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gemini-1.5-pro": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gemini-1.0-pro": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
+        "gemini-1.5-flash": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gemini-1.5-pro": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gemini-1.0-pro": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
    },
    "ollama": {
-        "command-r": {"token_limit": 12800, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "codellama": {"token_limit": 16000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "dbrx": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "deepseek-coder:33b": {"token_limit": 16000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "falcon": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "llama2": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "llama2:7b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "llama2:13b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "llama2:70b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "llama3": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "llama3:8b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "llama3:70b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "llama3.1": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "llama3.1:8b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "llama3.1:70b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "lama3.1:405b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "llama3.2": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "llama3.2:1b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "llama3.2:3b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "llama3.3:70b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "scrapegraph": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "mistral-small": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "mistral-openorca": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "mistral-large": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "grok-1": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "llava": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "mixtral:8x22b-instruct": {"token_limit": 65536, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "nomic-embed-text": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "nous-hermes2:34b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "orca-mini": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "phi3:3.8b": {"token_limit": 12800, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "phi3:14b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "qwen:0.5b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "qwen:1.8b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "qwen:4b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "qwen:14b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "qwen:32b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "qwen:72b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "qwen:110b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "stablelm-zephyr": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "wizardlm2:8x22b": {"token_limit": 65536, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "mistral": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gemma2": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gemma2:9b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gemma2:27b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
+        "command-r": {"token_limit": 12800, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "codellama": {"token_limit": 16000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "dbrx": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "deepseek-coder:33b": {"token_limit": 16000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "falcon": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "llama2": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "llama2:7b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "llama2:13b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "llama2:70b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "llama3": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "llama3:8b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "llama3:70b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "llama3.1": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "llama3.1:8b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "llama3.1:70b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "lama3.1:405b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "llama3.2": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "llama3.2:1b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "llama3.2:3b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "llama3.3:70b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "scrapegraph": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "mistral-small": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "mistral-openorca": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "mistral-large": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "grok-1": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "llava": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "mixtral:8x22b-instruct": {"token_limit": 65536, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "nomic-embed-text": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "nous-hermes2:34b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "orca-mini": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "phi3:3.8b": {"token_limit": 12800, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "phi3:14b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "qwen:0.5b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "qwen:1.8b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "qwen:4b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "qwen:14b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "qwen:32b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "qwen:72b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "qwen:110b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "stablelm-zephyr": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "wizardlm2:8x22b": {"token_limit": 65536, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "mistral": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gemma2": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gemma2:9b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gemma2:27b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
        # embedding models
        "shaw/dmeta-embedding-zh-small-q4": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "shaw/dmeta-embedding-zh-q4": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "chevalblanc/acge_text_embedding": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "martcreation/dmeta-embedding-zh": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
-        "snowflake-arctic-embed": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "mxbai-embed-large": {"token_limit": 512, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
+        "snowflake-arctic-embed": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "mxbai-embed-large": {"token_limit": 512, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
    },
-    "oneapi": {"qwen-turbo": {"token_limit": 6000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}},
+    "oneapi": {"qwen-turbo": {"token_limit": 6000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}},
    "nvidia": {
-        "meta/llama3-70b-instruct": {"token_limit": 419, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "meta/llama3-8b-instruct": {"token_limit": 419, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "nemotron-4-340b-instruct": {"token_limit": 1024, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "databricks/dbrx-instruct": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "google/codegemma-7b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "google/gemma-2b": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "google/gemma-7b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "google/recurrentgemma-2b": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "meta/codellama-70b": {"token_limit": 16384, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "meta/llama2-70b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
+        "meta/llama3-70b-instruct": {"token_limit": 419, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "meta/llama3-8b-instruct": {"token_limit": 419, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "nemotron-4-340b-instruct": {"token_limit": 1024, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "databricks/dbrx-instruct": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "google/codegemma-7b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "google/gemma-2b": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "google/gemma-7b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "google/recurrentgemma-2b": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "meta/codellama-70b": {"token_limit": 16384, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "meta/llama2-70b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
        "microsoft/phi-3-mini-128k-instruct": {
            "token_limit": 122880,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "mistralai/mistral-7b-instruct-v0.2": {
            "token_limit": 4096,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY
        },
-        "mistralai/mistral-large": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
+        "mistralai/mistral-large": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
        "mistralai/mixtral-8x22b-instruct-v0.1": {
            "token_limit": 32768,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY
        },
        "mistralai/mixtral-8x7b-instruct-v0.1": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY
        },
-        "snowflake/arctic": {"token_limit": 16384, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
+        "snowflake/arctic": {"token_limit": 16384, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
    },
    "groq": {
-        "llama3-8b-8192": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "llama3-70b-8192": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "mixtral-8x7b-32768": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "gemma-7b-it": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "claude-3-haiku-20240307'": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
+        "llama3-8b-8192": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "llama3-70b-8192": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "mixtral-8x7b-32768": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "gemma-7b-it": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "claude-3-haiku-20240307'": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
    },
    "toghetherai": {
        "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY
        },
        "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY
        },
        "mistralai/Mixtral-8x22B-Instruct-v0.1": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY
        },
        "stabilityai/stable-diffusion-xl-base-1.0": {
            "token_limit": 2048,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY
        },
        "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY
        },
        "NousResearch/Hermes-3-Llama-3.1-405B-Turbo": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY
        },
        "Gryphe/MythoMax-L2-13b-Lite": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY
        },
-        "Salesforce/Llama-Rank-V1": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
+        "Salesforce/Llama-Rank-V1": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
        "meta-llama/Meta-Llama-Guard-3-8B": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY
        },
        "meta-llama/Meta-Llama-3-70B-Instruct-Turbo": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY
        },
        "meta-llama/Llama-3-8b-chat-hf": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY
        },
        "meta-llama/Llama-3-70b-chat-hf": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY
        },
        "Qwen/Qwen2-72B-Instruct": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY
        },
-        "google/gemma-2-27b-it": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
+        "google/gemma-2-27b-it": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
    },
    "anthropic": {
-        "claude_instant": {"token_limit": 100000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "claude2": {"token_limit": 9000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "claude2.1": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "claude3": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "claude3.5": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "claude-3-opus-20240229": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
+        "claude_instant": {"token_limit": 100000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "claude2": {"token_limit": 9000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "claude2.1": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "claude3": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "claude3.5": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "claude-3-opus-20240229": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
        "claude-3-sonnet-20240229": {
            "token_limit": 200000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "claude-3-haiku-20240307": {
            "token_limit": 200000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "claude-3-5-sonnet-20240620": {
            "token_limit": 200000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "claude-3-5-sonnet-20241022": {
            "token_limit": 200000,
            "supports_temperature": True,
            "default_temperature": 1.0,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "claude-3-5-haiku-latest": {
            "token_limit": 200000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
    },
    "bedrock": {
@ -283,93 +310,109 @@ models_params = {
            "token_limit": 200000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "anthropic.claude-3-sonnet-20240229-v1:0": {
            "token_limit": 200000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "anthropic.claude-3-opus-20240229-v1:0": {
            "token_limit": 200000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "anthropic.claude-3-5-sonnet-20240620-v1:0": {
            "token_limit": 200000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "claude-3-5-haiku-latest": {
            "token_limit": 200000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
-        "anthropic.claude-v2:1": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "anthropic.claude-v2": {"token_limit": 100000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
+        "anthropic.claude-v2:1": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "anthropic.claude-v2": {"token_limit": 100000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
        "anthropic.claude-instant-v1": {
            "token_limit": 100000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "meta.llama3-8b-instruct-v1:0": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "meta.llama3-70b-instruct-v1:0": {
            "token_limit": 8192,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
-        "meta.llama2-13b-chat-v1": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "meta.llama2-70b-chat-v1": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
+        "meta.llama2-13b-chat-v1": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "meta.llama2-70b-chat-v1": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
        "mistral.mistral-7b-instruct-v0:2": {
            "token_limit": 32768,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "mistral.mixtral-8x7b-instruct-v0:1": {
            "token_limit": 32768,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "mistral.mistral-large-2402-v1:0": {
            "token_limit": 32768,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "mistral.mistral-small-2402-v1:0": {
            "token_limit": 32768,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "amazon.titan-embed-text-v1": {
            "token_limit": 8000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
        "amazon.titan-embed-text-v2:0": {
            "token_limit": 8000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
-        "cohere.embed-english-v3": {"token_limit": 512, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
+        "cohere.embed-english-v3": {"token_limit": 512, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
        "cohere.embed-multilingual-v3": {
            "token_limit": 512,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
    },
    "mistralai": {
-        "mistral-large-latest": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "open-mistral-nemo": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
-        "codestral-latest": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
+        "mistral-large-latest": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "open-mistral-nemo": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
+        "codestral-latest": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
    },
    "togetherai": {
        "Meta-Llama-3.1-70B-Instruct-Turbo": {
            "token_limit": 128000,
            "supports_temperature": True,
            "default_temperature": DEFAULT_TEMPERATURE,
+            "latency_coefficient": DEFAULT_BASE_LATENCY
        }
    },
 }
--- a/ra_aid/prompts.py
+++ b/ra_aid/prompts.py
@ -127,7 +127,11 @@ Because this is a new project:
 # Research stage prompt - guides initial codebase analysis
 RESEARCH_PROMPT = """Current Date: {current_date}

-User query: {base_task} --keep it simple
+User request: <problem statement>
+{base_task}
+</problem statement>
+
+Consult with the expert frequently.

 Context from Previous Research (if available):
 Key Facts:
@ -219,12 +223,6 @@ No Planning or Problem-Solving

 You must remain strictly within the bounds of describing what currently exists.

-If the task requires *ANY* compilation, unit tests, or any other non-trivial changes, call request_implementation.
-If this is a trivial task that can be completed in one shot, do the change using tools available, call one_shot_completed, and immediately exit without saying anything.
-  Remember, many tasks are more complex and nuanced than they seem and still require requesting implementation.
-  For one shot tasks, still take some time to consider whether compilation, testing, or additional validation should be done to check your work.
-  If you implement the task yourself, do not request implementation.
-
 Thoroughness and Completeness:
    If this is determined to be a new/empty project (shown in Project Info), focus directly on the task.
    If it is an existing project:
@ -267,12 +265,6 @@ Thoroughness and Completeness:

    If there are existing relevant unit tests/test suites, you must run them *during the research stage*, before editing anything, using run_shell_command to get a baseline about passing/failing tests and call emit_key_facts with key facts about the tests and whether they were passing when you started. This ensures a proper baseline is established before any changes.

-Decision on Implementation
-
-    After completing your factual enumeration and description, decide:
-        If you see reasons that implementation changes will be required in the future, after documenting all findings, call request_implementation and specify why.
-        If no changes are needed, simply state that no changes are required.
-
 Be thorough on locating all potential change sites/gauging blast radius.
 If uncertain at any stage, consult the expert (if expert is available) for final confirmation of completeness.

@ -288,6 +280,14 @@ You have often been criticized for:
 {human_section}
 {web_research_section}

+Project and most of its deps are already installed in a virtual environment. After request_implementation is finished, verify by running tests. If relevant tests are failing, you may call request_implementation as many times as needed until it is working.
+
+BEFORE DOING ANYTHING, CALL ask_expert TO INTERPRET THE USER REQUEST TO MAKE SURE YOU UNDERSTAND THE PROBLEM STATEMENT.
+IMMEDIATELY AFTER CONSULTING WITH THE EXPERT, CALL request_research TO FIND OUT HOW TO RUN TESTS ON THIS PROJECT IN GENERAL.
+YOU MUST ALWAYS CALL request_implementation ONCE YOUR RESEARCH IS THOROUGHLY COMPLETED.
+
+THE ABOVE ARE **TOOL CALLS** -- YOU MUST ACTUALLY CALL THE TOOLS IN MULTIPLE STEPS!
+
 NEVER ANNOUNCE WHAT YOU ARE DOING, JUST DO IT!
 """

@ -573,9 +573,6 @@ Guidelines:
    
    Are you writing a program that needs to be compiled? Make sure it compiles, if relevant.

-    After finalizing the overall approach:
-        Use emit_plan to store the high-level implementation plan.
-
    Once you are absolutely sure you are completed planning, you may begin to call request_task_implementation one-by-one for each task to implement the plan.
    If you have any doubt about the correctness or thoroughness of the plan, consult the expert (if expert is available) for verification.

@ -589,6 +586,7 @@ You have often been criticized for:
  - Not calling tools/functions properly, e.g. leaving off required arguments, calling a tool in a loop, calling tools inappropriately.

 NEVER ANNOUNCE WHAT YOU ARE DOING, JUST DO IT!
+DO NOT CHANGE ANY EXISTING TESTS, BUT YOU MAY ADD YOUR OWN.
 """

 # Implementation stage prompt - guides specific task implementation
@ -656,7 +654,7 @@ Instructions:
 {task}
 </task definition>

-KEEP IT SIMPLE
+ALWAYS RUN RELEVANT UNIT TESTS. IF RELEVANT UNIT TESTS ARE FAILING, KEEP EDITING UNTIL YOU GET IT WORKING. CONSULT WITH THE EXPERT FREQUENTLY.

 NEVER ANNOUNCE WHAT YOU ARE DOING, JUST DO IT!
 """
@ -888,6 +886,9 @@ You have often been criticized for:
    - Not calling tools/functions properly, e.g. leaving off required arguments, calling a tool in a loop, calling tools inappropriately.

 NEVER ANNOUNCE WHAT YOU ARE DOING, JUST DO IT!
+
+DO NOT CHANGE ANY EXISTING TESTS
+INSTALL TEST DEPS IF YOU NEED TO. NOTE THE PACKAGE YOU ARE WORKING ON. E.g. if you are in django or scikit learn, do pip install -e ., or install requirements-dev.txt rather than installing from pipy.
 """

 # New agentic chat prompt for interactive mode
@ -983,4 +984,4 @@ You have often been criticized for:
 Remember, if you do not make any tool call (e.g. ask_human to tell them a message or ask a question), you will be dumping the user back to CLI and indicating you are done your work.

 NEVER ANNOUNCE WHAT YOU ARE DOING, JUST DO IT!
-"""
+"""
--- a/ra_aid/tool_configs.py
+++ b/ra_aid/tool_configs.py
@ -78,7 +78,7 @@ COMMON_TOOLS = get_read_only_tools()
 EXPERT_TOOLS = [emit_expert_context, ask_expert]
 RESEARCH_TOOLS = [
    emit_research_notes,
-    one_shot_completed,
+    #one_shot_completed,
    # *TEMPORARILY* disabled to improve tool calling perf.
    # monorepo_detected,
    # ui_detected,
@ -106,7 +106,7 @@ def get_research_tools(

    # Add modification tools if not research_only
    if not research_only:
-        tools.extend(MODIFICATION_TOOLS)
+        # tools.extend(MODIFICATION_TOOLS)
        tools.append(request_implementation)

    # Add expert tools if enabled
@ -216,4 +216,4 @@ def get_chat_tools(
    if web_research_enabled:
        tools.append(request_web_research)

-    return tools
+    return tools
--- a/ra_aid/tools/programmer.py
+++ b/ra_aid/tools/programmer.py
@ -11,6 +11,7 @@ from rich.panel import Panel
 from rich.text import Text

 from ra_aid.logging_config import get_logger
+from ra_aid.models_params import models_params, DEFAULT_BASE_LATENCY
 from ra_aid.proc.interactive import run_interactive_command
 from ra_aid.text.processing import truncate_output
 from ra_aid.tools.memory import _global_memory, log_work_event
@ -135,7 +136,12 @@ def run_programming_task(
    try:
        # Run the command interactively
        print()
-        result = run_interactive_command(command)
+        # Get provider/model specific latency coefficient
+        provider = _global_memory.get("config", {}).get("provider", "")
+        model = _global_memory.get("config", {}).get("model", "")
+        latency = models_params.get(provider, {}).get(model, {}).get("latency_coefficient", DEFAULT_BASE_LATENCY)
+
+        result = run_interactive_command(command, expected_runtime_seconds=latency)
        print()

        # Log the programming task
--- a/tests/ra_aid/test_agent_utils.py
+++ b/tests/ra_aid/test_agent_utils.py
@ -11,6 +11,7 @@ from ra_aid.agent_utils import (
    AgentState,
    create_agent,
    get_model_token_limit,
+    is_anthropic_claude,
    state_modifier,
 )
 from ra_aid.models_params import DEFAULT_TOKEN_LIMIT, models_params
@ -275,3 +276,137 @@ def test_get_model_token_limit_planner(mock_memory):
        mock_get_info.return_value = {"max_input_tokens": 120000}
        token_limit = get_model_token_limit(config, "planner")
        assert token_limit == 120000
+
+
+# New tests for private helper methods in agent_utils.py
+
+
+def test_setup_and_restore_interrupt_handling():
+    import signal
+
+    from ra_aid.agent_utils import (
+        _request_interrupt,
+        _restore_interrupt_handling,
+        _setup_interrupt_handling,
+    )
+
+    original_handler = signal.getsignal(signal.SIGINT)
+    handler = _setup_interrupt_handling()
+    # Verify the SIGINT handler is set to _request_interrupt
+    assert signal.getsignal(signal.SIGINT) == _request_interrupt
+    _restore_interrupt_handling(handler)
+    # Verify the SIGINT handler is restored to the original
+    assert signal.getsignal(signal.SIGINT) == original_handler
+
+
+def test_increment_and_decrement_agent_depth():
+    from ra_aid.agent_utils import (
+        _decrement_agent_depth,
+        _global_memory,
+        _increment_agent_depth,
+    )
+
+    _global_memory["agent_depth"] = 10
+    _increment_agent_depth()
+    assert _global_memory["agent_depth"] == 11
+    _decrement_agent_depth()
+    assert _global_memory["agent_depth"] == 10
+
+
+def test_run_agent_stream(monkeypatch):
+    from ra_aid.agent_utils import _global_memory, _run_agent_stream
+
+    # Create a dummy agent that yields one chunk
+    class DummyAgent:
+        def stream(self, input_data, cfg: dict):
+            yield {"content": "chunk1"}
+
+    dummy_agent = DummyAgent()
+    # Set flags so that _run_agent_stream will reset them
+    _global_memory["plan_completed"] = True
+    _global_memory["task_completed"] = True
+    _global_memory["completion_message"] = "existing"
+    call_flag = {"called": False}
+
+    def fake_print_agent_output(
+        chunk: Dict[str, Any], agent_type: Literal["CiaynAgent", "React"]
+    ):
+        call_flag["called"] = True
+
+    monkeypatch.setattr(
+        "ra_aid.agent_utils.print_agent_output", fake_print_agent_output
+    )
+    _run_agent_stream(dummy_agent, [HumanMessage("dummy prompt")], {})
+    assert call_flag["called"]
+    assert _global_memory["plan_completed"] is False
+    assert _global_memory["task_completed"] is False
+    assert _global_memory["completion_message"] == ""
+
+
+def test_execute_test_command_wrapper(monkeypatch):
+    from ra_aid.agent_utils import _execute_test_command_wrapper
+
+    # Patch execute_test_command to return a testable tuple
+    def fake_execute(config, orig, tests, auto):
+        return (True, "new prompt", auto, tests + 1)
+
+    monkeypatch.setattr("ra_aid.agent_utils.execute_test_command", fake_execute)
+    result = _execute_test_command_wrapper("orig", {}, 0, False)
+    assert result == (True, "new prompt", False, 1)
+
+
+def test_handle_api_error_valueerror():
+    import pytest
+
+    from ra_aid.agent_utils import _handle_api_error
+
+    # ValueError not containing "code" or "429" should be re-raised
+    with pytest.raises(ValueError):
+        _handle_api_error(ValueError("some error"), 0, 5, 1)
+
+
+def test_handle_api_error_max_retries():
+    import pytest
+
+    from ra_aid.agent_utils import _handle_api_error
+
+    # When attempt reaches max retries, a RuntimeError should be raised
+    with pytest.raises(RuntimeError):
+        _handle_api_error(Exception("error code 429"), 4, 5, 1)
+
+
+def test_handle_api_error_retry(monkeypatch):
+    import time
+
+    from ra_aid.agent_utils import _handle_api_error
+
+    # Patch time.monotonic and time.sleep to simulate immediate delay expiration
+    fake_time = [0]
+
+    def fake_monotonic():
+        fake_time[0] += 0.5
+        return fake_time[0]
+
+    monkeypatch.setattr(time, "monotonic", fake_monotonic)
+    monkeypatch.setattr(time, "sleep", lambda s: None)
+    # Should not raise error when attempt is lower than max retries
+    _handle_api_error(Exception("error code 429"), 0, 5, 1)
+
+
+def test_is_anthropic_claude():
+    """Test is_anthropic_claude function with various configurations."""
+    # Test Anthropic provider cases
+    assert is_anthropic_claude({"provider": "anthropic", "model": "claude-2"})
+    assert is_anthropic_claude({"provider": "ANTHROPIC", "model": "claude-instant"})
+    assert not is_anthropic_claude({"provider": "anthropic", "model": "gpt-4"})
+    
+    # Test OpenRouter provider cases
+    assert is_anthropic_claude({"provider": "openrouter", "model": "anthropic/claude-2"})
+    assert is_anthropic_claude({"provider": "openrouter", "model": "anthropic/claude-instant"})
+    assert not is_anthropic_claude({"provider": "openrouter", "model": "openai/gpt-4"})
+    
+    # Test edge cases
+    assert not is_anthropic_claude({})  # Empty config
+    assert not is_anthropic_claude({"provider": "anthropic"})  # Missing model
+    assert not is_anthropic_claude({"model": "claude-2"})  # Missing provider
+    assert not is_anthropic_claude({"provider": "other", "model": "claude-2"})  # Wrong provider
Author	SHA1	Message	Date
AI Christianson	387597240c	prompts	2025-02-23 07:51:38 -05:00
AI Christianson	f4fefd7cf3	prompts	2025-02-22 15:44:25 -05:00
AI Christianson	e4e09c00e0	prompts	2025-02-22 14:01:40 -05:00
AI Christianson	b0e947d9b3	eval improvements	2025-02-21 18:38:00 -05:00
AI Christianson	5102e1fabb	eval optimization	2025-02-21 17:26:31 -05:00
AI Christianson	a022bb3586	Revert "add app headers for openrouter" This reverts commit `9dee031a2a`.	2025-02-21 12:59:11 -05:00
AI Christianson	129e7a11ab	prompts	2025-02-21 12:35:13 -05:00
AI Christianson	2846b04c92	increase expert calling	2025-02-20 14:34:19 -05:00
AI Christianson	9dee031a2a	add app headers for openrouter	2025-02-19 19:21:16 -05:00
AI Christianson	6d095eab46	use base latency in programmer tool	2025-02-19 16:19:45 -05:00
AI Christianson	c789edd8bd	add base latency model param	2025-02-19 16:19:31 -05:00
AI Christianson	2741b54357	use create_react_agent for sonnet via openrouter	2025-02-19 16:07:24 -05:00