Refactor models_tokens to be models_params so we can track multiple parameters on a per-model basis.
This commit is contained in:
parent
7913df4db1
commit
f7e02fc98e
23
CHANGELOG.md
23
CHANGELOG.md
|
|
@ -5,6 +5,29 @@ All notable changes to this project will be documented in this file.
|
|||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
- WebUI (#61)
|
||||
|
||||
### Changed
|
||||
- Convert list input to string to handle create-react-agent tool calls correctly (#66)
|
||||
- Add commands for code checking and fixing using ruff (#63)
|
||||
|
||||
### Fixed
|
||||
- Fix token estimation
|
||||
- Fix tests
|
||||
- Prevent duplicate files (#64)
|
||||
- Ensure default temperature is set correctly for different providers
|
||||
- Do not incorrectly give temp parameter to expert model
|
||||
- Correcting URLs that were referencing ai-christianson/ra-aid - should be ai-christianson/RA.Aid (#69)
|
||||
|
||||
### Improved
|
||||
- Integrate litellm to retrieve model token limits for better flexibility (#51)
|
||||
- Handle user defined test cmd (#59)
|
||||
- Run tests during Github CICD (#58)
|
||||
- Refactor models_tokens to be models_params so we can track multiple parameters on a per-model basis.
|
||||
|
||||
## [0.13.0] - 2025-01-22
|
||||
|
||||
### Added
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ from ra_aid.console.formatting import print_error, print_stage_header
|
|||
from ra_aid.console.output import print_agent_output
|
||||
from ra_aid.exceptions import AgentInterrupt
|
||||
from ra_aid.logging_config import get_logger
|
||||
from ra_aid.models_tokens import DEFAULT_TOKEN_LIMIT, models_tokens
|
||||
from ra_aid.models_params import DEFAULT_TOKEN_LIMIT, models_params
|
||||
from ra_aid.project_info import (
|
||||
display_project_status,
|
||||
format_project_info,
|
||||
|
|
@ -143,23 +143,24 @@ def get_model_token_limit(config: Dict[str, Any]) -> Optional[int]:
|
|||
return max_input_tokens
|
||||
except litellm.exceptions.NotFoundError:
|
||||
logger.debug(
|
||||
f"Model {model_name} not found in litellm, falling back to models_tokens"
|
||||
f"Model {model_name} not found in litellm, falling back to models_params"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
f"Error getting model info from litellm: {e}, falling back to models_tokens"
|
||||
f"Error getting model info from litellm: {e}, falling back to models_params"
|
||||
)
|
||||
|
||||
# Fallback to models_tokens dict
|
||||
# Fallback to models_params dict
|
||||
# Normalize model name for fallback lookup (e.g. claude-2 -> claude2)
|
||||
normalized_name = model_name.replace("-", "")
|
||||
provider_tokens = models_tokens.get(provider, {})
|
||||
max_input_tokens = provider_tokens.get(normalized_name, None)
|
||||
if max_input_tokens:
|
||||
provider_tokens = models_params.get(provider, {})
|
||||
if normalized_name in provider_tokens:
|
||||
max_input_tokens = provider_tokens[normalized_name]["token_limit"]
|
||||
logger.debug(
|
||||
f"Found token limit for {provider}/{model_name}: {max_input_tokens}"
|
||||
)
|
||||
else:
|
||||
max_input_tokens = None
|
||||
logger.debug(f"Could not find token limit for {provider}/{model_name}")
|
||||
|
||||
return max_input_tokens
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, System
|
|||
|
||||
from ra_aid.exceptions import ToolExecutionError
|
||||
from ra_aid.logging_config import get_logger
|
||||
from ra_aid.models_tokens import DEFAULT_TOKEN_LIMIT
|
||||
from ra_aid.models_params import DEFAULT_TOKEN_LIMIT
|
||||
from ra_aid.tools.reflection import get_function_info
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,203 @@
|
|||
"""
|
||||
List of model parameters
|
||||
"""
|
||||
|
||||
DEFAULT_TOKEN_LIMIT = 100000
|
||||
|
||||
models_params = {
|
||||
"openai": {
|
||||
"gpt-3.5-turbo-0125": {"token_limit": 16385},
|
||||
"gpt-3.5": {"token_limit": 4096},
|
||||
"gpt-3.5-turbo": {"token_limit": 16385},
|
||||
"gpt-3.5-turbo-1106": {"token_limit": 16385},
|
||||
"gpt-3.5-turbo-instruct": {"token_limit": 4096},
|
||||
"gpt-4-0125-preview": {"token_limit": 128000},
|
||||
"gpt-4-turbo-preview": {"token_limit": 128000},
|
||||
"gpt-4-turbo": {"token_limit": 128000},
|
||||
"gpt-4-turbo-2024-04-09": {"token_limit": 128000},
|
||||
"gpt-4-1106-preview": {"token_limit": 128000},
|
||||
"gpt-4-vision-preview": {"token_limit": 128000},
|
||||
"gpt-4": {"token_limit": 8192},
|
||||
"gpt-4-0613": {"token_limit": 8192},
|
||||
"gpt-4-32k": {"token_limit": 32768},
|
||||
"gpt-4-32k-0613": {"token_limit": 32768},
|
||||
"gpt-4o": {"token_limit": 128000},
|
||||
"gpt-4o-2024-08-06": {"token_limit": 128000},
|
||||
"gpt-4o-2024-05-13": {"token_limit": 128000},
|
||||
"gpt-4o-mini": {"token_limit": 128000},
|
||||
"o1-preview": {"token_limit": 128000},
|
||||
"o1-mini": {"token_limit": 128000}
|
||||
},
|
||||
"azure_openai": {
|
||||
"gpt-3.5-turbo-0125": {"token_limit": 16385},
|
||||
"gpt-3.5": {"token_limit": 4096},
|
||||
"gpt-3.5-turbo": {"token_limit": 16385},
|
||||
"gpt-3.5-turbo-1106": {"token_limit": 16385},
|
||||
"gpt-3.5-turbo-instruct": {"token_limit": 4096},
|
||||
"gpt-4-0125-preview": {"token_limit": 128000},
|
||||
"gpt-4-turbo-preview": {"token_limit": 128000},
|
||||
"gpt-4-turbo": {"token_limit": 128000},
|
||||
"gpt-4-turbo-2024-04-09": {"token_limit": 128000},
|
||||
"gpt-4-1106-preview": {"token_limit": 128000},
|
||||
"gpt-4-vision-preview": {"token_limit": 128000},
|
||||
"gpt-4": {"token_limit": 8192},
|
||||
"gpt-4-0613": {"token_limit": 8192},
|
||||
"gpt-4-32k": {"token_limit": 32768},
|
||||
"gpt-4-32k-0613": {"token_limit": 32768},
|
||||
"gpt-4o": {"token_limit": 128000},
|
||||
"gpt-4o-mini": {"token_limit": 128000},
|
||||
"chatgpt-4o-latest": {"token_limit": 128000},
|
||||
"o1-preview": {"token_limit": 128000},
|
||||
"o1-mini": {"token_limit": 128000}
|
||||
},
|
||||
"google_genai": {
|
||||
"gemini-pro": {"token_limit": 128000},
|
||||
"gemini-1.5-flash-latest": {"token_limit": 128000},
|
||||
"gemini-1.5-pro-latest": {"token_limit": 128000},
|
||||
"models/embedding-001": {"token_limit": 2048}
|
||||
},
|
||||
"google_vertexai": {
|
||||
"gemini-1.5-flash": {"token_limit": 128000},
|
||||
"gemini-1.5-pro": {"token_limit": 128000},
|
||||
"gemini-1.0-pro": {"token_limit": 128000}
|
||||
},
|
||||
"ollama": {
|
||||
"command-r": {"token_limit": 12800},
|
||||
"codellama": {"token_limit": 16000},
|
||||
"dbrx": {"token_limit": 32768},
|
||||
"deepseek-coder:33b": {"token_limit": 16000},
|
||||
"falcon": {"token_limit": 2048},
|
||||
"llama2": {"token_limit": 4096},
|
||||
"llama2:7b": {"token_limit": 4096},
|
||||
"llama2:13b": {"token_limit": 4096},
|
||||
"llama2:70b": {"token_limit": 4096},
|
||||
"llama3": {"token_limit": 8192},
|
||||
"llama3:8b": {"token_limit": 8192},
|
||||
"llama3:70b": {"token_limit": 8192},
|
||||
"llama3.1": {"token_limit": 128000},
|
||||
"llama3.1:8b": {"token_limit": 128000},
|
||||
"llama3.1:70b": {"token_limit": 128000},
|
||||
"lama3.1:405b": {"token_limit": 128000},
|
||||
"llama3.2": {"token_limit": 128000},
|
||||
"llama3.2:1b": {"token_limit": 128000},
|
||||
"llama3.2:3b": {"token_limit": 128000},
|
||||
"llama3.3:70b": {"token_limit": 128000},
|
||||
"scrapegraph": {"token_limit": 8192},
|
||||
"mistral-small": {"token_limit": 128000},
|
||||
"mistral-openorca": {"token_limit": 32000},
|
||||
"mistral-large": {"token_limit": 128000},
|
||||
"grok-1": {"token_limit": 8192},
|
||||
"llava": {"token_limit": 4096},
|
||||
"mixtral:8x22b-instruct": {"token_limit": 65536},
|
||||
"nomic-embed-text": {"token_limit": 8192},
|
||||
"nous-hermes2:34b": {"token_limit": 4096},
|
||||
"orca-mini": {"token_limit": 2048},
|
||||
"phi3:3.8b": {"token_limit": 12800},
|
||||
"phi3:14b": {"token_limit": 128000},
|
||||
"qwen:0.5b": {"token_limit": 32000},
|
||||
"qwen:1.8b": {"token_limit": 32000},
|
||||
"qwen:4b": {"token_limit": 32000},
|
||||
"qwen:14b": {"token_limit": 32000},
|
||||
"qwen:32b": {"token_limit": 32000},
|
||||
"qwen:72b": {"token_limit": 32000},
|
||||
"qwen:110b": {"token_limit": 32000},
|
||||
"stablelm-zephyr": {"token_limit": 8192},
|
||||
"wizardlm2:8x22b": {"token_limit": 65536},
|
||||
"mistral": {"token_limit": 128000},
|
||||
"gemma2": {"token_limit": 128000},
|
||||
"gemma2:9b": {"token_limit": 128000},
|
||||
"gemma2:27b": {"token_limit": 128000},
|
||||
# embedding models
|
||||
"shaw/dmeta-embedding-zh-small-q4": {"token_limit": 8192},
|
||||
"shaw/dmeta-embedding-zh-q4": {"token_limit": 8192},
|
||||
"chevalblanc/acge_text_embedding": {"token_limit": 8192},
|
||||
"martcreation/dmeta-embedding-zh": {"token_limit": 8192},
|
||||
"snowflake-arctic-embed": {"token_limit": 8192},
|
||||
"mxbai-embed-large": {"token_limit": 512}
|
||||
},
|
||||
"oneapi": {
|
||||
"qwen-turbo": {"token_limit": 6000}
|
||||
},
|
||||
"nvidia": {
|
||||
"meta/llama3-70b-instruct": {"token_limit": 419},
|
||||
"meta/llama3-8b-instruct": {"token_limit": 419},
|
||||
"nemotron-4-340b-instruct": {"token_limit": 1024},
|
||||
"databricks/dbrx-instruct": {"token_limit": 4096},
|
||||
"google/codegemma-7b": {"token_limit": 8192},
|
||||
"google/gemma-2b": {"token_limit": 2048},
|
||||
"google/gemma-7b": {"token_limit": 8192},
|
||||
"google/recurrentgemma-2b": {"token_limit": 2048},
|
||||
"meta/codellama-70b": {"token_limit": 16384},
|
||||
"meta/llama2-70b": {"token_limit": 4096},
|
||||
"microsoft/phi-3-mini-128k-instruct": {"token_limit": 122880},
|
||||
"mistralai/mistral-7b-instruct-v0.2": {"token_limit": 4096},
|
||||
"mistralai/mistral-large": {"token_limit": 8192},
|
||||
"mistralai/mixtral-8x22b-instruct-v0.1": {"token_limit": 32768},
|
||||
"mistralai/mixtral-8x7b-instruct-v0.1": {"token_limit": 8192},
|
||||
"snowflake/arctic": {"token_limit": 16384}
|
||||
},
|
||||
"groq": {
|
||||
"llama3-8b-8192": {"token_limit": 8192},
|
||||
"llama3-70b-8192": {"token_limit": 8192},
|
||||
"mixtral-8x7b-32768": {"token_limit": 32768},
|
||||
"gemma-7b-it": {"token_limit": 8192},
|
||||
"claude-3-haiku-20240307'": {"token_limit": 8192}
|
||||
},
|
||||
"toghetherai": {
|
||||
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": {"token_limit": 128000},
|
||||
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": {"token_limit": 128000},
|
||||
"mistralai/Mixtral-8x22B-Instruct-v0.1": {"token_limit": 128000},
|
||||
"stabilityai/stable-diffusion-xl-base-1.0": {"token_limit": 2048},
|
||||
"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": {"token_limit": 128000},
|
||||
"NousResearch/Hermes-3-Llama-3.1-405B-Turbo": {"token_limit": 128000},
|
||||
"Gryphe/MythoMax-L2-13b-Lite": {"token_limit": 8192},
|
||||
"Salesforce/Llama-Rank-V1": {"token_limit": 8192},
|
||||
"meta-llama/Meta-Llama-Guard-3-8B": {"token_limit": 128000},
|
||||
"meta-llama/Meta-Llama-3-70B-Instruct-Turbo": {"token_limit": 128000},
|
||||
"meta-llama/Llama-3-8b-chat-hf": {"token_limit": 8192},
|
||||
"meta-llama/Llama-3-70b-chat-hf": {"token_limit": 8192},
|
||||
"Qwen/Qwen2-72B-Instruct": {"token_limit": 128000},
|
||||
"google/gemma-2-27b-it": {"token_limit": 8192}
|
||||
},
|
||||
"anthropic": {
|
||||
"claude_instant": {"token_limit": 100000},
|
||||
"claude2": {"token_limit": 9000},
|
||||
"claude2.1": {"token_limit": 200000},
|
||||
"claude3": {"token_limit": 200000},
|
||||
"claude3.5": {"token_limit": 200000},
|
||||
"claude-3-opus-20240229": {"token_limit": 200000},
|
||||
"claude-3-sonnet-20240229": {"token_limit": 200000},
|
||||
"claude-3-haiku-20240307": {"token_limit": 200000},
|
||||
"claude-3-5-sonnet-20240620": {"token_limit": 200000},
|
||||
"claude-3-5-sonnet-20241022": {"token_limit": 200000},
|
||||
"claude-3-5-haiku-latest": {"token_limit": 200000}
|
||||
},
|
||||
"bedrock": {
|
||||
"anthropic.claude-3-haiku-20240307-v1:0": {"token_limit": 200000},
|
||||
"anthropic.claude-3-sonnet-20240229-v1:0": {"token_limit": 200000},
|
||||
"anthropic.claude-3-opus-20240229-v1:0": {"token_limit": 200000},
|
||||
"anthropic.claude-3-5-sonnet-20240620-v1:0": {"token_limit": 200000},
|
||||
"claude-3-5-haiku-latest": {"token_limit": 200000},
|
||||
"anthropic.claude-v2:1": {"token_limit": 200000},
|
||||
"anthropic.claude-v2": {"token_limit": 100000},
|
||||
"anthropic.claude-instant-v1": {"token_limit": 100000},
|
||||
"meta.llama3-8b-instruct-v1:0": {"token_limit": 8192},
|
||||
"meta.llama3-70b-instruct-v1:0": {"token_limit": 8192},
|
||||
"meta.llama2-13b-chat-v1": {"token_limit": 4096},
|
||||
"meta.llama2-70b-chat-v1": {"token_limit": 4096},
|
||||
"mistral.mistral-7b-instruct-v0:2": {"token_limit": 32768},
|
||||
"mistral.mixtral-8x7b-instruct-v0:1": {"token_limit": 32768},
|
||||
"mistral.mistral-large-2402-v1:0": {"token_limit": 32768},
|
||||
"mistral.mistral-small-2402-v1:0": {"token_limit": 32768},
|
||||
"amazon.titan-embed-text-v1": {"token_limit": 8000},
|
||||
"amazon.titan-embed-text-v2:0": {"token_limit": 8000},
|
||||
"cohere.embed-english-v3": {"token_limit": 512},
|
||||
"cohere.embed-multilingual-v3": {"token_limit": 512}
|
||||
},
|
||||
"mistralai": {
|
||||
"mistral-large-latest": {"token_limit": 128000},
|
||||
"open-mistral-nemo": {"token_limit": 128000},
|
||||
"codestral-latest": {"token_limit": 32000}
|
||||
},
|
||||
"togetherai": {"Meta-Llama-3.1-70B-Instruct-Turbo": {"token_limit": 128000}}
|
||||
}
|
||||
|
|
@ -13,7 +13,7 @@ from ra_aid.agent_utils import (
|
|||
get_model_token_limit,
|
||||
state_modifier,
|
||||
)
|
||||
from ra_aid.models_tokens import DEFAULT_TOKEN_LIMIT, models_tokens
|
||||
from ra_aid.models_params import DEFAULT_TOKEN_LIMIT, models_params
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
|
@ -36,7 +36,7 @@ def test_get_model_token_limit_anthropic(mock_memory):
|
|||
config = {"provider": "anthropic", "model": "claude2"}
|
||||
|
||||
token_limit = get_model_token_limit(config)
|
||||
assert token_limit == models_tokens["anthropic"]["claude2"]
|
||||
assert token_limit == models_params["anthropic"]["claude2"]["token_limit"]
|
||||
|
||||
|
||||
def test_get_model_token_limit_openai(mock_memory):
|
||||
|
|
@ -44,7 +44,7 @@ def test_get_model_token_limit_openai(mock_memory):
|
|||
config = {"provider": "openai", "model": "gpt-4"}
|
||||
|
||||
token_limit = get_model_token_limit(config)
|
||||
assert token_limit == models_tokens["openai"]["gpt-4"]
|
||||
assert token_limit == models_params["openai"]["gpt-4"]["token_limit"]
|
||||
|
||||
|
||||
def test_get_model_token_limit_unknown(mock_memory):
|
||||
|
|
@ -82,7 +82,7 @@ def test_get_model_token_limit_litellm_not_found():
|
|||
message="Model not found", model="claude-2", llm_provider="anthropic"
|
||||
)
|
||||
token_limit = get_model_token_limit(config)
|
||||
assert token_limit == models_tokens["anthropic"]["claude2"]
|
||||
assert token_limit == models_params["anthropic"]["claude2"]["token_limit"]
|
||||
|
||||
|
||||
def test_get_model_token_limit_litellm_error():
|
||||
|
|
@ -92,7 +92,7 @@ def test_get_model_token_limit_litellm_error():
|
|||
with patch("ra_aid.agent_utils.get_model_info") as mock_get_info:
|
||||
mock_get_info.side_effect = Exception("Unknown error")
|
||||
token_limit = get_model_token_limit(config)
|
||||
assert token_limit == models_tokens["anthropic"]["claude2"]
|
||||
assert token_limit == models_params["anthropic"]["claude2"]["token_limit"]
|
||||
|
||||
|
||||
def test_get_model_token_limit_unexpected_error():
|
||||
|
|
@ -127,7 +127,7 @@ def test_create_agent_openai(mock_model, mock_memory):
|
|||
|
||||
assert agent == "ciayn_agent"
|
||||
mock_ciayn.assert_called_once_with(
|
||||
mock_model, [], max_tokens=models_tokens["openai"]["gpt-4"]
|
||||
mock_model, [], max_tokens=models_params["openai"]["gpt-4"]["token_limit"]
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -201,7 +201,7 @@ def test_create_agent_with_checkpointer(mock_model, mock_memory):
|
|||
|
||||
assert agent == "ciayn_agent"
|
||||
mock_ciayn.assert_called_once_with(
|
||||
mock_model, [], max_tokens=models_tokens["openai"]["gpt-4"]
|
||||
mock_model, [], max_tokens=models_params["openai"]["gpt-4"]["token_limit"]
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue