Compare commits

...

12 Commits

Author SHA1 Message Date
AI Christianson 387597240c prompts 2025-02-23 07:51:38 -05:00
AI Christianson f4fefd7cf3 prompts 2025-02-22 15:44:25 -05:00
AI Christianson e4e09c00e0 prompts 2025-02-22 14:01:40 -05:00
AI Christianson b0e947d9b3 eval improvements 2025-02-21 18:38:00 -05:00
AI Christianson 5102e1fabb eval optimization 2025-02-21 17:26:31 -05:00
AI Christianson a022bb3586 Revert "add app headers for openrouter"
This reverts commit 9dee031a2a.
2025-02-21 12:59:11 -05:00
AI Christianson 129e7a11ab prompts 2025-02-21 12:35:13 -05:00
AI Christianson 2846b04c92 increase expert calling 2025-02-20 14:34:19 -05:00
AI Christianson 9dee031a2a add app headers for openrouter 2025-02-19 19:21:16 -05:00
AI Christianson 6d095eab46 use base latency in programmer tool 2025-02-19 16:19:45 -05:00
AI Christianson c789edd8bd add base latency model param 2025-02-19 16:19:31 -05:00
AI Christianson 2741b54357 use create_react_agent for sonnet via openrouter 2025-02-19 16:07:24 -05:00
6 changed files with 342 additions and 157 deletions

View File

@ -225,11 +225,11 @@ def is_anthropic_claude(config: Dict[str, Any]) -> bool:
"""
provider = config.get("provider", "")
model_name = config.get("model", "")
return (
provider.lower() == "anthropic"
and model_name
and "claude" in model_name.lower()
result = (
(provider.lower() == "anthropic" and model_name and "claude" in model_name.lower())
or (provider.lower() == "openrouter" and model_name.lower().startswith("anthropic/claude-"))
)
return result
def create_agent(

View File

@ -4,278 +4,305 @@ List of model parameters
DEFAULT_TOKEN_LIMIT = 100000
DEFAULT_TEMPERATURE = 0.7
DEFAULT_BASE_LATENCY = 240
models_params = {
"openai": {
"gpt-3.5-turbo-0125": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-3.5": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-3.5-turbo": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-3.5-turbo-1106": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-3.5-turbo-instruct": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-4-0125-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-4-turbo-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-4-turbo": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-4-turbo-2024-04-09": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-4-1106-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-4-vision-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-4": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-4-0613": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-4-32k": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-4-32k-0613": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-4o": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-4o-2024-08-06": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-4o-2024-05-13": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-4o-mini": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"o1-preview": {"token_limit": 128000, "supports_temperature": False},
"o1-mini": {"token_limit": 128000, "supports_temperature": False},
"o1-preview": {"token_limit": 128000, "supports_temperature": False},
"o1": {"token_limit": 200000, "supports_temperature": False},
"o3-mini": {"token_limit": 200000, "supports_temperature": False},
"gpt-3.5-turbo-0125": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-3.5": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-3.5-turbo": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-3.5-turbo-1106": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-3.5-turbo-instruct": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-0125-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-turbo-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-turbo": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-turbo-2024-04-09": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-1106-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-vision-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-0613": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-32k": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-32k-0613": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4o": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4o-2024-08-06": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4o-2024-05-13": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4o-mini": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"o1-preview": {"token_limit": 128000, "supports_temperature": False, "latency_coefficient": DEFAULT_BASE_LATENCY},
"o1-mini": {"token_limit": 128000, "supports_temperature": False, "latency_coefficient": DEFAULT_BASE_LATENCY},
"o1-preview": {"token_limit": 128000, "supports_temperature": False, "latency_coefficient": DEFAULT_BASE_LATENCY},
"o1": {"token_limit": 200000, "supports_temperature": False, "latency_coefficient": DEFAULT_BASE_LATENCY},
"o3-mini": {"token_limit": 200000, "supports_temperature": False, "latency_coefficient": DEFAULT_BASE_LATENCY},
},
"azure_openai": {
"gpt-3.5-turbo-0125": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-3.5": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-3.5-turbo": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-3.5-turbo-1106": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-3.5-turbo-instruct": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-4-0125-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-4-turbo-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-4-turbo": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-4-turbo-2024-04-09": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-4-1106-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-4-vision-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-4": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-4-0613": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-4-32k": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-4-32k-0613": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-4o": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gpt-4o-mini": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"chatgpt-4o-latest": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"o1-preview": {"token_limit": 128000, "supports_temperature": False},
"o1-mini": {"token_limit": 128000, "supports_temperature": False},
"gpt-3.5-turbo-0125": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-3.5": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-3.5-turbo": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-3.5-turbo-1106": {"token_limit": 16385, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-3.5-turbo-instruct": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-0125-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-turbo-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-turbo": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-turbo-2024-04-09": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-1106-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-vision-preview": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-0613": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-32k": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4-32k-0613": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4o": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gpt-4o-mini": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"chatgpt-4o-latest": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"o1-preview": {"token_limit": 128000, "supports_temperature": False, "latency_coefficient": DEFAULT_BASE_LATENCY},
"o1-mini": {"token_limit": 128000, "supports_temperature": False, "latency_coefficient": DEFAULT_BASE_LATENCY},
},
"google_genai": {
"gemini-pro": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gemini-pro": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gemini-1.5-flash-latest": {
"token_limit": 128000,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
},
"gemini-1.5-pro-latest": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"models/embedding-001": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gemini-1.5-pro-latest": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"models/embedding-001": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
},
"google_vertexai": {
"gemini-1.5-flash": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gemini-1.5-pro": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gemini-1.0-pro": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gemini-1.5-flash": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gemini-1.5-pro": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gemini-1.0-pro": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
},
"ollama": {
"command-r": {"token_limit": 12800, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"codellama": {"token_limit": 16000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"dbrx": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"deepseek-coder:33b": {"token_limit": 16000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"falcon": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"llama2": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"llama2:7b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"llama2:13b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"llama2:70b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"llama3": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"llama3:8b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"llama3:70b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"llama3.1": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"llama3.1:8b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"llama3.1:70b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"lama3.1:405b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"llama3.2": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"llama3.2:1b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"llama3.2:3b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"llama3.3:70b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"scrapegraph": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"mistral-small": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"mistral-openorca": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"mistral-large": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"grok-1": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"llava": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"mixtral:8x22b-instruct": {"token_limit": 65536, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"nomic-embed-text": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"nous-hermes2:34b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"orca-mini": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"phi3:3.8b": {"token_limit": 12800, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"phi3:14b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"qwen:0.5b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"qwen:1.8b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"qwen:4b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"qwen:14b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"qwen:32b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"qwen:72b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"qwen:110b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"stablelm-zephyr": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"wizardlm2:8x22b": {"token_limit": 65536, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"mistral": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gemma2": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gemma2:9b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gemma2:27b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"command-r": {"token_limit": 12800, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"codellama": {"token_limit": 16000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"dbrx": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"deepseek-coder:33b": {"token_limit": 16000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"falcon": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama2": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama2:7b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama2:13b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama2:70b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama3": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama3:8b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama3:70b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama3.1": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama3.1:8b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama3.1:70b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"lama3.1:405b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama3.2": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama3.2:1b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama3.2:3b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama3.3:70b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"scrapegraph": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"mistral-small": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"mistral-openorca": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"mistral-large": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"grok-1": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llava": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"mixtral:8x22b-instruct": {"token_limit": 65536, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"nomic-embed-text": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"nous-hermes2:34b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"orca-mini": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"phi3:3.8b": {"token_limit": 12800, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"phi3:14b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"qwen:0.5b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"qwen:1.8b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"qwen:4b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"qwen:14b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"qwen:32b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"qwen:72b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"qwen:110b": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"stablelm-zephyr": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"wizardlm2:8x22b": {"token_limit": 65536, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"mistral": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gemma2": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gemma2:9b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gemma2:27b": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
# embedding models
"shaw/dmeta-embedding-zh-small-q4": {
"token_limit": 8192,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"shaw/dmeta-embedding-zh-q4": {
"token_limit": 8192,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"chevalblanc/acge_text_embedding": {
"token_limit": 8192,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"martcreation/dmeta-embedding-zh": {
"token_limit": 8192,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"snowflake-arctic-embed": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"mxbai-embed-large": {"token_limit": 512, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"snowflake-arctic-embed": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"mxbai-embed-large": {"token_limit": 512, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
},
"oneapi": {"qwen-turbo": {"token_limit": 6000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE}},
"oneapi": {"qwen-turbo": {"token_limit": 6000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY}},
"nvidia": {
"meta/llama3-70b-instruct": {"token_limit": 419, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"meta/llama3-8b-instruct": {"token_limit": 419, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"nemotron-4-340b-instruct": {"token_limit": 1024, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"databricks/dbrx-instruct": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"google/codegemma-7b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"google/gemma-2b": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"google/gemma-7b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"google/recurrentgemma-2b": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"meta/codellama-70b": {"token_limit": 16384, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"meta/llama2-70b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"meta/llama3-70b-instruct": {"token_limit": 419, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"meta/llama3-8b-instruct": {"token_limit": 419, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"nemotron-4-340b-instruct": {"token_limit": 1024, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"databricks/dbrx-instruct": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"google/codegemma-7b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"google/gemma-2b": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"google/gemma-7b": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"google/recurrentgemma-2b": {"token_limit": 2048, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"meta/codellama-70b": {"token_limit": 16384, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"meta/llama2-70b": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"microsoft/phi-3-mini-128k-instruct": {
"token_limit": 122880,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"mistralai/mistral-7b-instruct-v0.2": {
"token_limit": 4096,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
},
"mistralai/mistral-large": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"mistralai/mistral-large": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"mistralai/mixtral-8x22b-instruct-v0.1": {
"token_limit": 32768,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
},
"mistralai/mixtral-8x7b-instruct-v0.1": {
"token_limit": 8192,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
},
"snowflake/arctic": {"token_limit": 16384, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"snowflake/arctic": {"token_limit": 16384, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
},
"groq": {
"llama3-8b-8192": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"llama3-70b-8192": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"mixtral-8x7b-32768": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"gemma-7b-it": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"claude-3-haiku-20240307'": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"llama3-8b-8192": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"llama3-70b-8192": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"mixtral-8x7b-32768": {"token_limit": 32768, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"gemma-7b-it": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"claude-3-haiku-20240307'": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
},
"toghetherai": {
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": {
"token_limit": 128000,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
},
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": {
"token_limit": 128000,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
},
"mistralai/Mixtral-8x22B-Instruct-v0.1": {
"token_limit": 128000,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
},
"stabilityai/stable-diffusion-xl-base-1.0": {
"token_limit": 2048,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
},
"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": {
"token_limit": 128000,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
},
"NousResearch/Hermes-3-Llama-3.1-405B-Turbo": {
"token_limit": 128000,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
},
"Gryphe/MythoMax-L2-13b-Lite": {
"token_limit": 8192,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
},
"Salesforce/Llama-Rank-V1": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"Salesforce/Llama-Rank-V1": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"meta-llama/Meta-Llama-Guard-3-8B": {
"token_limit": 128000,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
},
"meta-llama/Meta-Llama-3-70B-Instruct-Turbo": {
"token_limit": 128000,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
},
"meta-llama/Llama-3-8b-chat-hf": {
"token_limit": 8192,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
},
"meta-llama/Llama-3-70b-chat-hf": {
"token_limit": 8192,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
},
"Qwen/Qwen2-72B-Instruct": {
"token_limit": 128000,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
},
"google/gemma-2-27b-it": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"google/gemma-2-27b-it": {"token_limit": 8192, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
},
"anthropic": {
"claude_instant": {"token_limit": 100000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"claude2": {"token_limit": 9000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"claude2.1": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"claude3": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"claude3.5": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"claude-3-opus-20240229": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"claude_instant": {"token_limit": 100000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"claude2": {"token_limit": 9000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"claude2.1": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"claude3": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"claude3.5": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"claude-3-opus-20240229": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"claude-3-sonnet-20240229": {
"token_limit": 200000,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"claude-3-haiku-20240307": {
"token_limit": 200000,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"claude-3-5-sonnet-20240620": {
"token_limit": 200000,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"claude-3-5-sonnet-20241022": {
"token_limit": 200000,
"supports_temperature": True,
"default_temperature": 1.0,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"claude-3-5-haiku-latest": {
"token_limit": 200000,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
},
"bedrock": {
@ -283,93 +310,109 @@ models_params = {
"token_limit": 200000,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"anthropic.claude-3-sonnet-20240229-v1:0": {
"token_limit": 200000,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"anthropic.claude-3-opus-20240229-v1:0": {
"token_limit": 200000,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"anthropic.claude-3-5-sonnet-20240620-v1:0": {
"token_limit": 200000,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"claude-3-5-haiku-latest": {
"token_limit": 200000,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"anthropic.claude-v2:1": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"anthropic.claude-v2": {"token_limit": 100000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"anthropic.claude-v2:1": {"token_limit": 200000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"anthropic.claude-v2": {"token_limit": 100000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"anthropic.claude-instant-v1": {
"token_limit": 100000,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"meta.llama3-8b-instruct-v1:0": {
"token_limit": 8192,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"meta.llama3-70b-instruct-v1:0": {
"token_limit": 8192,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"meta.llama2-13b-chat-v1": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"meta.llama2-70b-chat-v1": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"meta.llama2-13b-chat-v1": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"meta.llama2-70b-chat-v1": {"token_limit": 4096, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"mistral.mistral-7b-instruct-v0:2": {
"token_limit": 32768,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"mistral.mixtral-8x7b-instruct-v0:1": {
"token_limit": 32768,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"mistral.mistral-large-2402-v1:0": {
"token_limit": 32768,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"mistral.mistral-small-2402-v1:0": {
"token_limit": 32768,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"amazon.titan-embed-text-v1": {
"token_limit": 8000,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"amazon.titan-embed-text-v2:0": {
"token_limit": 8000,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
"cohere.embed-english-v3": {"token_limit": 512, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"cohere.embed-english-v3": {"token_limit": 512, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"cohere.embed-multilingual-v3": {
"token_limit": 512,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},
},
"mistralai": {
"mistral-large-latest": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"open-mistral-nemo": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"codestral-latest": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE},
"mistral-large-latest": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"open-mistral-nemo": {"token_limit": 128000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
"codestral-latest": {"token_limit": 32000, "supports_temperature": True, "default_temperature": DEFAULT_TEMPERATURE, "latency_coefficient": DEFAULT_BASE_LATENCY},
},
"togetherai": {
"Meta-Llama-3.1-70B-Instruct-Turbo": {
"token_limit": 128000,
"supports_temperature": True,
"default_temperature": DEFAULT_TEMPERATURE,
"latency_coefficient": DEFAULT_BASE_LATENCY
}
},
}

View File

@ -127,7 +127,11 @@ Because this is a new project:
# Research stage prompt - guides initial codebase analysis
RESEARCH_PROMPT = """Current Date: {current_date}
User query: {base_task} --keep it simple
User request: <problem statement>
{base_task}
</problem statement>
Consult with the expert frequently.
Context from Previous Research (if available):
Key Facts:
@ -219,12 +223,6 @@ No Planning or Problem-Solving
You must remain strictly within the bounds of describing what currently exists.
If the task requires *ANY* compilation, unit tests, or any other non-trivial changes, call request_implementation.
If this is a trivial task that can be completed in one shot, do the change using tools available, call one_shot_completed, and immediately exit without saying anything.
Remember, many tasks are more complex and nuanced than they seem and still require requesting implementation.
For one shot tasks, still take some time to consider whether compilation, testing, or additional validation should be done to check your work.
If you implement the task yourself, do not request implementation.
Thoroughness and Completeness:
If this is determined to be a new/empty project (shown in Project Info), focus directly on the task.
If it is an existing project:
@ -267,12 +265,6 @@ Thoroughness and Completeness:
If there are existing relevant unit tests/test suites, you must run them *during the research stage*, before editing anything, using run_shell_command to get a baseline about passing/failing tests and call emit_key_facts with key facts about the tests and whether they were passing when you started. This ensures a proper baseline is established before any changes.
Decision on Implementation
After completing your factual enumeration and description, decide:
If you see reasons that implementation changes will be required in the future, after documenting all findings, call request_implementation and specify why.
If no changes are needed, simply state that no changes are required.
Be thorough on locating all potential change sites/gauging blast radius.
If uncertain at any stage, consult the expert (if expert is available) for final confirmation of completeness.
@ -288,6 +280,14 @@ You have often been criticized for:
{human_section}
{web_research_section}
Project and most of its deps are already installed in a virtual environment. After request_implementation is finished, verify by running tests. If relevant tests are failing, you may call request_implementation as many times as needed until it is working.
BEFORE DOING ANYTHING, CALL ask_expert TO INTERPRET THE USER REQUEST TO MAKE SURE YOU UNDERSTAND THE PROBLEM STATEMENT.
IMMEDIATELY AFTER CONSULTING WITH THE EXPERT, CALL request_research TO FIND OUT HOW TO RUN TESTS ON THIS PROJECT IN GENERAL.
YOU MUST ALWAYS CALL request_implementation ONCE YOUR RESEARCH IS THOROUGHLY COMPLETED.
THE ABOVE ARE **TOOL CALLS** -- YOU MUST ACTUALLY CALL THE TOOLS IN MULTIPLE STEPS!
NEVER ANNOUNCE WHAT YOU ARE DOING, JUST DO IT!
"""
@ -573,9 +573,6 @@ Guidelines:
Are you writing a program that needs to be compiled? Make sure it compiles, if relevant.
After finalizing the overall approach:
Use emit_plan to store the high-level implementation plan.
Once you are absolutely sure you are completed planning, you may begin to call request_task_implementation one-by-one for each task to implement the plan.
If you have any doubt about the correctness or thoroughness of the plan, consult the expert (if expert is available) for verification.
@ -589,6 +586,7 @@ You have often been criticized for:
- Not calling tools/functions properly, e.g. leaving off required arguments, calling a tool in a loop, calling tools inappropriately.
NEVER ANNOUNCE WHAT YOU ARE DOING, JUST DO IT!
DO NOT CHANGE ANY EXISTING TESTS, BUT YOU MAY ADD YOUR OWN.
"""
# Implementation stage prompt - guides specific task implementation
@ -656,7 +654,7 @@ Instructions:
{task}
</task definition>
KEEP IT SIMPLE
ALWAYS RUN RELEVANT UNIT TESTS. IF RELEVANT UNIT TESTS ARE FAILING, KEEP EDITING UNTIL YOU GET IT WORKING. CONSULT WITH THE EXPERT FREQUENTLY.
NEVER ANNOUNCE WHAT YOU ARE DOING, JUST DO IT!
"""
@ -888,6 +886,9 @@ You have often been criticized for:
- Not calling tools/functions properly, e.g. leaving off required arguments, calling a tool in a loop, calling tools inappropriately.
NEVER ANNOUNCE WHAT YOU ARE DOING, JUST DO IT!
DO NOT CHANGE ANY EXISTING TESTS
INSTALL TEST DEPS IF YOU NEED TO. NOTE THE PACKAGE YOU ARE WORKING ON. E.g. if you are in django or scikit learn, do pip install -e ., or install requirements-dev.txt rather than installing from pipy.
"""
# New agentic chat prompt for interactive mode
@ -983,4 +984,4 @@ You have often been criticized for:
Remember, if you do not make any tool call (e.g. ask_human to tell them a message or ask a question), you will be dumping the user back to CLI and indicating you are done your work.
NEVER ANNOUNCE WHAT YOU ARE DOING, JUST DO IT!
"""
"""

View File

@ -78,7 +78,7 @@ COMMON_TOOLS = get_read_only_tools()
EXPERT_TOOLS = [emit_expert_context, ask_expert]
RESEARCH_TOOLS = [
emit_research_notes,
one_shot_completed,
#one_shot_completed,
# *TEMPORARILY* disabled to improve tool calling perf.
# monorepo_detected,
# ui_detected,
@ -106,7 +106,7 @@ def get_research_tools(
# Add modification tools if not research_only
if not research_only:
tools.extend(MODIFICATION_TOOLS)
# tools.extend(MODIFICATION_TOOLS)
tools.append(request_implementation)
# Add expert tools if enabled
@ -216,4 +216,4 @@ def get_chat_tools(
if web_research_enabled:
tools.append(request_web_research)
return tools
return tools

View File

@ -11,6 +11,7 @@ from rich.panel import Panel
from rich.text import Text
from ra_aid.logging_config import get_logger
from ra_aid.models_params import models_params, DEFAULT_BASE_LATENCY
from ra_aid.proc.interactive import run_interactive_command
from ra_aid.text.processing import truncate_output
from ra_aid.tools.memory import _global_memory, log_work_event
@ -135,7 +136,12 @@ def run_programming_task(
try:
# Run the command interactively
print()
result = run_interactive_command(command)
# Get provider/model specific latency coefficient
provider = _global_memory.get("config", {}).get("provider", "")
model = _global_memory.get("config", {}).get("model", "")
latency = models_params.get(provider, {}).get(model, {}).get("latency_coefficient", DEFAULT_BASE_LATENCY)
result = run_interactive_command(command, expected_runtime_seconds=latency)
print()
# Log the programming task

View File

@ -11,6 +11,7 @@ from ra_aid.agent_utils import (
AgentState,
create_agent,
get_model_token_limit,
is_anthropic_claude,
state_modifier,
)
from ra_aid.models_params import DEFAULT_TOKEN_LIMIT, models_params
@ -275,3 +276,137 @@ def test_get_model_token_limit_planner(mock_memory):
mock_get_info.return_value = {"max_input_tokens": 120000}
token_limit = get_model_token_limit(config, "planner")
assert token_limit == 120000
# New tests for private helper methods in agent_utils.py
def test_setup_and_restore_interrupt_handling():
import signal
from ra_aid.agent_utils import (
_request_interrupt,
_restore_interrupt_handling,
_setup_interrupt_handling,
)
original_handler = signal.getsignal(signal.SIGINT)
handler = _setup_interrupt_handling()
# Verify the SIGINT handler is set to _request_interrupt
assert signal.getsignal(signal.SIGINT) == _request_interrupt
_restore_interrupt_handling(handler)
# Verify the SIGINT handler is restored to the original
assert signal.getsignal(signal.SIGINT) == original_handler
def test_increment_and_decrement_agent_depth():
from ra_aid.agent_utils import (
_decrement_agent_depth,
_global_memory,
_increment_agent_depth,
)
_global_memory["agent_depth"] = 10
_increment_agent_depth()
assert _global_memory["agent_depth"] == 11
_decrement_agent_depth()
assert _global_memory["agent_depth"] == 10
def test_run_agent_stream(monkeypatch):
from ra_aid.agent_utils import _global_memory, _run_agent_stream
# Create a dummy agent that yields one chunk
class DummyAgent:
def stream(self, input_data, cfg: dict):
yield {"content": "chunk1"}
dummy_agent = DummyAgent()
# Set flags so that _run_agent_stream will reset them
_global_memory["plan_completed"] = True
_global_memory["task_completed"] = True
_global_memory["completion_message"] = "existing"
call_flag = {"called": False}
def fake_print_agent_output(
chunk: Dict[str, Any], agent_type: Literal["CiaynAgent", "React"]
):
call_flag["called"] = True
monkeypatch.setattr(
"ra_aid.agent_utils.print_agent_output", fake_print_agent_output
)
_run_agent_stream(dummy_agent, [HumanMessage("dummy prompt")], {})
assert call_flag["called"]
assert _global_memory["plan_completed"] is False
assert _global_memory["task_completed"] is False
assert _global_memory["completion_message"] == ""
def test_execute_test_command_wrapper(monkeypatch):
from ra_aid.agent_utils import _execute_test_command_wrapper
# Patch execute_test_command to return a testable tuple
def fake_execute(config, orig, tests, auto):
return (True, "new prompt", auto, tests + 1)
monkeypatch.setattr("ra_aid.agent_utils.execute_test_command", fake_execute)
result = _execute_test_command_wrapper("orig", {}, 0, False)
assert result == (True, "new prompt", False, 1)
def test_handle_api_error_valueerror():
import pytest
from ra_aid.agent_utils import _handle_api_error
# ValueError not containing "code" or "429" should be re-raised
with pytest.raises(ValueError):
_handle_api_error(ValueError("some error"), 0, 5, 1)
def test_handle_api_error_max_retries():
import pytest
from ra_aid.agent_utils import _handle_api_error
# When attempt reaches max retries, a RuntimeError should be raised
with pytest.raises(RuntimeError):
_handle_api_error(Exception("error code 429"), 4, 5, 1)
def test_handle_api_error_retry(monkeypatch):
import time
from ra_aid.agent_utils import _handle_api_error
# Patch time.monotonic and time.sleep to simulate immediate delay expiration
fake_time = [0]
def fake_monotonic():
fake_time[0] += 0.5
return fake_time[0]
monkeypatch.setattr(time, "monotonic", fake_monotonic)
monkeypatch.setattr(time, "sleep", lambda s: None)
# Should not raise error when attempt is lower than max retries
_handle_api_error(Exception("error code 429"), 0, 5, 1)
def test_is_anthropic_claude():
"""Test is_anthropic_claude function with various configurations."""
# Test Anthropic provider cases
assert is_anthropic_claude({"provider": "anthropic", "model": "claude-2"})
assert is_anthropic_claude({"provider": "ANTHROPIC", "model": "claude-instant"})
assert not is_anthropic_claude({"provider": "anthropic", "model": "gpt-4"})
# Test OpenRouter provider cases
assert is_anthropic_claude({"provider": "openrouter", "model": "anthropic/claude-2"})
assert is_anthropic_claude({"provider": "openrouter", "model": "anthropic/claude-instant"})
assert not is_anthropic_claude({"provider": "openrouter", "model": "openai/gpt-4"})
# Test edge cases
assert not is_anthropic_claude({}) # Empty config
assert not is_anthropic_claude({"provider": "anthropic"}) # Missing model
assert not is_anthropic_claude({"model": "claude-2"}) # Missing provider
assert not is_anthropic_claude({"provider": "other", "model": "claude-2"}) # Wrong provider