From f7e02fc98e2469175aaf74a67a464e0b9ddf999b Mon Sep 17 00:00:00 2001 From: AI Christianson Date: Fri, 31 Jan 2025 16:05:44 -0500 Subject: [PATCH] Refactor models_tokens to be models_params so we can track multiple parameters on a per-model basis. --- CHANGELOG.md | 23 ++++ ra_aid/agent_utils.py | 15 +-- ra_aid/agents/ciayn_agent.py | 2 +- ra_aid/models_params.py | 203 +++++++++++++++++++++++++++++++ tests/ra_aid/test_agent_utils.py | 14 +-- 5 files changed, 242 insertions(+), 15 deletions(-) create mode 100644 ra_aid/models_params.py diff --git a/CHANGELOG.md b/CHANGELOG.md index f66b709..f4e9f87 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,29 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Added +- WebUI (#61) + +### Changed +- Convert list input to string to handle create-react-agent tool calls correctly (#66) +- Add commands for code checking and fixing using ruff (#63) + +### Fixed +- Fix token estimation +- Fix tests +- Prevent duplicate files (#64) +- Ensure default temperature is set correctly for different providers +- Do not incorrectly give temp parameter to expert model +- Correcting URLs that were referencing ai-christianson/ra-aid - should be ai-christianson/RA.Aid (#69) + +### Improved +- Integrate litellm to retrieve model token limits for better flexibility (#51) +- Handle user defined test cmd (#59) +- Run tests during Github CICD (#58) +- Refactor models_tokens to be models_params so we can track multiple parameters on a per-model basis. + ## [0.13.0] - 2025-01-22 ### Added diff --git a/ra_aid/agent_utils.py b/ra_aid/agent_utils.py index 009a734..05b7bde 100644 --- a/ra_aid/agent_utils.py +++ b/ra_aid/agent_utils.py @@ -26,7 +26,7 @@ from ra_aid.console.formatting import print_error, print_stage_header from ra_aid.console.output import print_agent_output from ra_aid.exceptions import AgentInterrupt from ra_aid.logging_config import get_logger -from ra_aid.models_tokens import DEFAULT_TOKEN_LIMIT, models_tokens +from ra_aid.models_params import DEFAULT_TOKEN_LIMIT, models_params from ra_aid.project_info import ( display_project_status, format_project_info, @@ -143,23 +143,24 @@ def get_model_token_limit(config: Dict[str, Any]) -> Optional[int]: return max_input_tokens except litellm.exceptions.NotFoundError: logger.debug( - f"Model {model_name} not found in litellm, falling back to models_tokens" + f"Model {model_name} not found in litellm, falling back to models_params" ) except Exception as e: logger.debug( - f"Error getting model info from litellm: {e}, falling back to models_tokens" + f"Error getting model info from litellm: {e}, falling back to models_params" ) - # Fallback to models_tokens dict + # Fallback to models_params dict # Normalize model name for fallback lookup (e.g. claude-2 -> claude2) normalized_name = model_name.replace("-", "") - provider_tokens = models_tokens.get(provider, {}) - max_input_tokens = provider_tokens.get(normalized_name, None) - if max_input_tokens: + provider_tokens = models_params.get(provider, {}) + if normalized_name in provider_tokens: + max_input_tokens = provider_tokens[normalized_name]["token_limit"] logger.debug( f"Found token limit for {provider}/{model_name}: {max_input_tokens}" ) else: + max_input_tokens = None logger.debug(f"Could not find token limit for {provider}/{model_name}") return max_input_tokens diff --git a/ra_aid/agents/ciayn_agent.py b/ra_aid/agents/ciayn_agent.py index 3c3a51d..b089b10 100644 --- a/ra_aid/agents/ciayn_agent.py +++ b/ra_aid/agents/ciayn_agent.py @@ -6,7 +6,7 @@ from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, System from ra_aid.exceptions import ToolExecutionError from ra_aid.logging_config import get_logger -from ra_aid.models_tokens import DEFAULT_TOKEN_LIMIT +from ra_aid.models_params import DEFAULT_TOKEN_LIMIT from ra_aid.tools.reflection import get_function_info logger = get_logger(__name__) diff --git a/ra_aid/models_params.py b/ra_aid/models_params.py new file mode 100644 index 0000000..536eb05 --- /dev/null +++ b/ra_aid/models_params.py @@ -0,0 +1,203 @@ +""" +List of model parameters +""" + +DEFAULT_TOKEN_LIMIT = 100000 + +models_params = { + "openai": { + "gpt-3.5-turbo-0125": {"token_limit": 16385}, + "gpt-3.5": {"token_limit": 4096}, + "gpt-3.5-turbo": {"token_limit": 16385}, + "gpt-3.5-turbo-1106": {"token_limit": 16385}, + "gpt-3.5-turbo-instruct": {"token_limit": 4096}, + "gpt-4-0125-preview": {"token_limit": 128000}, + "gpt-4-turbo-preview": {"token_limit": 128000}, + "gpt-4-turbo": {"token_limit": 128000}, + "gpt-4-turbo-2024-04-09": {"token_limit": 128000}, + "gpt-4-1106-preview": {"token_limit": 128000}, + "gpt-4-vision-preview": {"token_limit": 128000}, + "gpt-4": {"token_limit": 8192}, + "gpt-4-0613": {"token_limit": 8192}, + "gpt-4-32k": {"token_limit": 32768}, + "gpt-4-32k-0613": {"token_limit": 32768}, + "gpt-4o": {"token_limit": 128000}, + "gpt-4o-2024-08-06": {"token_limit": 128000}, + "gpt-4o-2024-05-13": {"token_limit": 128000}, + "gpt-4o-mini": {"token_limit": 128000}, + "o1-preview": {"token_limit": 128000}, + "o1-mini": {"token_limit": 128000} + }, + "azure_openai": { + "gpt-3.5-turbo-0125": {"token_limit": 16385}, + "gpt-3.5": {"token_limit": 4096}, + "gpt-3.5-turbo": {"token_limit": 16385}, + "gpt-3.5-turbo-1106": {"token_limit": 16385}, + "gpt-3.5-turbo-instruct": {"token_limit": 4096}, + "gpt-4-0125-preview": {"token_limit": 128000}, + "gpt-4-turbo-preview": {"token_limit": 128000}, + "gpt-4-turbo": {"token_limit": 128000}, + "gpt-4-turbo-2024-04-09": {"token_limit": 128000}, + "gpt-4-1106-preview": {"token_limit": 128000}, + "gpt-4-vision-preview": {"token_limit": 128000}, + "gpt-4": {"token_limit": 8192}, + "gpt-4-0613": {"token_limit": 8192}, + "gpt-4-32k": {"token_limit": 32768}, + "gpt-4-32k-0613": {"token_limit": 32768}, + "gpt-4o": {"token_limit": 128000}, + "gpt-4o-mini": {"token_limit": 128000}, + "chatgpt-4o-latest": {"token_limit": 128000}, + "o1-preview": {"token_limit": 128000}, + "o1-mini": {"token_limit": 128000} + }, + "google_genai": { + "gemini-pro": {"token_limit": 128000}, + "gemini-1.5-flash-latest": {"token_limit": 128000}, + "gemini-1.5-pro-latest": {"token_limit": 128000}, + "models/embedding-001": {"token_limit": 2048} + }, + "google_vertexai": { + "gemini-1.5-flash": {"token_limit": 128000}, + "gemini-1.5-pro": {"token_limit": 128000}, + "gemini-1.0-pro": {"token_limit": 128000} + }, + "ollama": { + "command-r": {"token_limit": 12800}, + "codellama": {"token_limit": 16000}, + "dbrx": {"token_limit": 32768}, + "deepseek-coder:33b": {"token_limit": 16000}, + "falcon": {"token_limit": 2048}, + "llama2": {"token_limit": 4096}, + "llama2:7b": {"token_limit": 4096}, + "llama2:13b": {"token_limit": 4096}, + "llama2:70b": {"token_limit": 4096}, + "llama3": {"token_limit": 8192}, + "llama3:8b": {"token_limit": 8192}, + "llama3:70b": {"token_limit": 8192}, + "llama3.1": {"token_limit": 128000}, + "llama3.1:8b": {"token_limit": 128000}, + "llama3.1:70b": {"token_limit": 128000}, + "lama3.1:405b": {"token_limit": 128000}, + "llama3.2": {"token_limit": 128000}, + "llama3.2:1b": {"token_limit": 128000}, + "llama3.2:3b": {"token_limit": 128000}, + "llama3.3:70b": {"token_limit": 128000}, + "scrapegraph": {"token_limit": 8192}, + "mistral-small": {"token_limit": 128000}, + "mistral-openorca": {"token_limit": 32000}, + "mistral-large": {"token_limit": 128000}, + "grok-1": {"token_limit": 8192}, + "llava": {"token_limit": 4096}, + "mixtral:8x22b-instruct": {"token_limit": 65536}, + "nomic-embed-text": {"token_limit": 8192}, + "nous-hermes2:34b": {"token_limit": 4096}, + "orca-mini": {"token_limit": 2048}, + "phi3:3.8b": {"token_limit": 12800}, + "phi3:14b": {"token_limit": 128000}, + "qwen:0.5b": {"token_limit": 32000}, + "qwen:1.8b": {"token_limit": 32000}, + "qwen:4b": {"token_limit": 32000}, + "qwen:14b": {"token_limit": 32000}, + "qwen:32b": {"token_limit": 32000}, + "qwen:72b": {"token_limit": 32000}, + "qwen:110b": {"token_limit": 32000}, + "stablelm-zephyr": {"token_limit": 8192}, + "wizardlm2:8x22b": {"token_limit": 65536}, + "mistral": {"token_limit": 128000}, + "gemma2": {"token_limit": 128000}, + "gemma2:9b": {"token_limit": 128000}, + "gemma2:27b": {"token_limit": 128000}, + # embedding models + "shaw/dmeta-embedding-zh-small-q4": {"token_limit": 8192}, + "shaw/dmeta-embedding-zh-q4": {"token_limit": 8192}, + "chevalblanc/acge_text_embedding": {"token_limit": 8192}, + "martcreation/dmeta-embedding-zh": {"token_limit": 8192}, + "snowflake-arctic-embed": {"token_limit": 8192}, + "mxbai-embed-large": {"token_limit": 512} + }, + "oneapi": { + "qwen-turbo": {"token_limit": 6000} + }, + "nvidia": { + "meta/llama3-70b-instruct": {"token_limit": 419}, + "meta/llama3-8b-instruct": {"token_limit": 419}, + "nemotron-4-340b-instruct": {"token_limit": 1024}, + "databricks/dbrx-instruct": {"token_limit": 4096}, + "google/codegemma-7b": {"token_limit": 8192}, + "google/gemma-2b": {"token_limit": 2048}, + "google/gemma-7b": {"token_limit": 8192}, + "google/recurrentgemma-2b": {"token_limit": 2048}, + "meta/codellama-70b": {"token_limit": 16384}, + "meta/llama2-70b": {"token_limit": 4096}, + "microsoft/phi-3-mini-128k-instruct": {"token_limit": 122880}, + "mistralai/mistral-7b-instruct-v0.2": {"token_limit": 4096}, + "mistralai/mistral-large": {"token_limit": 8192}, + "mistralai/mixtral-8x22b-instruct-v0.1": {"token_limit": 32768}, + "mistralai/mixtral-8x7b-instruct-v0.1": {"token_limit": 8192}, + "snowflake/arctic": {"token_limit": 16384} + }, + "groq": { + "llama3-8b-8192": {"token_limit": 8192}, + "llama3-70b-8192": {"token_limit": 8192}, + "mixtral-8x7b-32768": {"token_limit": 32768}, + "gemma-7b-it": {"token_limit": 8192}, + "claude-3-haiku-20240307'": {"token_limit": 8192} + }, + "toghetherai": { + "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": {"token_limit": 128000}, + "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": {"token_limit": 128000}, + "mistralai/Mixtral-8x22B-Instruct-v0.1": {"token_limit": 128000}, + "stabilityai/stable-diffusion-xl-base-1.0": {"token_limit": 2048}, + "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": {"token_limit": 128000}, + "NousResearch/Hermes-3-Llama-3.1-405B-Turbo": {"token_limit": 128000}, + "Gryphe/MythoMax-L2-13b-Lite": {"token_limit": 8192}, + "Salesforce/Llama-Rank-V1": {"token_limit": 8192}, + "meta-llama/Meta-Llama-Guard-3-8B": {"token_limit": 128000}, + "meta-llama/Meta-Llama-3-70B-Instruct-Turbo": {"token_limit": 128000}, + "meta-llama/Llama-3-8b-chat-hf": {"token_limit": 8192}, + "meta-llama/Llama-3-70b-chat-hf": {"token_limit": 8192}, + "Qwen/Qwen2-72B-Instruct": {"token_limit": 128000}, + "google/gemma-2-27b-it": {"token_limit": 8192} + }, + "anthropic": { + "claude_instant": {"token_limit": 100000}, + "claude2": {"token_limit": 9000}, + "claude2.1": {"token_limit": 200000}, + "claude3": {"token_limit": 200000}, + "claude3.5": {"token_limit": 200000}, + "claude-3-opus-20240229": {"token_limit": 200000}, + "claude-3-sonnet-20240229": {"token_limit": 200000}, + "claude-3-haiku-20240307": {"token_limit": 200000}, + "claude-3-5-sonnet-20240620": {"token_limit": 200000}, + "claude-3-5-sonnet-20241022": {"token_limit": 200000}, + "claude-3-5-haiku-latest": {"token_limit": 200000} + }, + "bedrock": { + "anthropic.claude-3-haiku-20240307-v1:0": {"token_limit": 200000}, + "anthropic.claude-3-sonnet-20240229-v1:0": {"token_limit": 200000}, + "anthropic.claude-3-opus-20240229-v1:0": {"token_limit": 200000}, + "anthropic.claude-3-5-sonnet-20240620-v1:0": {"token_limit": 200000}, + "claude-3-5-haiku-latest": {"token_limit": 200000}, + "anthropic.claude-v2:1": {"token_limit": 200000}, + "anthropic.claude-v2": {"token_limit": 100000}, + "anthropic.claude-instant-v1": {"token_limit": 100000}, + "meta.llama3-8b-instruct-v1:0": {"token_limit": 8192}, + "meta.llama3-70b-instruct-v1:0": {"token_limit": 8192}, + "meta.llama2-13b-chat-v1": {"token_limit": 4096}, + "meta.llama2-70b-chat-v1": {"token_limit": 4096}, + "mistral.mistral-7b-instruct-v0:2": {"token_limit": 32768}, + "mistral.mixtral-8x7b-instruct-v0:1": {"token_limit": 32768}, + "mistral.mistral-large-2402-v1:0": {"token_limit": 32768}, + "mistral.mistral-small-2402-v1:0": {"token_limit": 32768}, + "amazon.titan-embed-text-v1": {"token_limit": 8000}, + "amazon.titan-embed-text-v2:0": {"token_limit": 8000}, + "cohere.embed-english-v3": {"token_limit": 512}, + "cohere.embed-multilingual-v3": {"token_limit": 512} + }, + "mistralai": { + "mistral-large-latest": {"token_limit": 128000}, + "open-mistral-nemo": {"token_limit": 128000}, + "codestral-latest": {"token_limit": 32000} + }, + "togetherai": {"Meta-Llama-3.1-70B-Instruct-Turbo": {"token_limit": 128000}} +} diff --git a/tests/ra_aid/test_agent_utils.py b/tests/ra_aid/test_agent_utils.py index 35e59d1..d557c97 100644 --- a/tests/ra_aid/test_agent_utils.py +++ b/tests/ra_aid/test_agent_utils.py @@ -13,7 +13,7 @@ from ra_aid.agent_utils import ( get_model_token_limit, state_modifier, ) -from ra_aid.models_tokens import DEFAULT_TOKEN_LIMIT, models_tokens +from ra_aid.models_params import DEFAULT_TOKEN_LIMIT, models_params @pytest.fixture @@ -36,7 +36,7 @@ def test_get_model_token_limit_anthropic(mock_memory): config = {"provider": "anthropic", "model": "claude2"} token_limit = get_model_token_limit(config) - assert token_limit == models_tokens["anthropic"]["claude2"] + assert token_limit == models_params["anthropic"]["claude2"]["token_limit"] def test_get_model_token_limit_openai(mock_memory): @@ -44,7 +44,7 @@ def test_get_model_token_limit_openai(mock_memory): config = {"provider": "openai", "model": "gpt-4"} token_limit = get_model_token_limit(config) - assert token_limit == models_tokens["openai"]["gpt-4"] + assert token_limit == models_params["openai"]["gpt-4"]["token_limit"] def test_get_model_token_limit_unknown(mock_memory): @@ -82,7 +82,7 @@ def test_get_model_token_limit_litellm_not_found(): message="Model not found", model="claude-2", llm_provider="anthropic" ) token_limit = get_model_token_limit(config) - assert token_limit == models_tokens["anthropic"]["claude2"] + assert token_limit == models_params["anthropic"]["claude2"]["token_limit"] def test_get_model_token_limit_litellm_error(): @@ -92,7 +92,7 @@ def test_get_model_token_limit_litellm_error(): with patch("ra_aid.agent_utils.get_model_info") as mock_get_info: mock_get_info.side_effect = Exception("Unknown error") token_limit = get_model_token_limit(config) - assert token_limit == models_tokens["anthropic"]["claude2"] + assert token_limit == models_params["anthropic"]["claude2"]["token_limit"] def test_get_model_token_limit_unexpected_error(): @@ -127,7 +127,7 @@ def test_create_agent_openai(mock_model, mock_memory): assert agent == "ciayn_agent" mock_ciayn.assert_called_once_with( - mock_model, [], max_tokens=models_tokens["openai"]["gpt-4"] + mock_model, [], max_tokens=models_params["openai"]["gpt-4"]["token_limit"] ) @@ -201,7 +201,7 @@ def test_create_agent_with_checkpointer(mock_model, mock_memory): assert agent == "ciayn_agent" mock_ciayn.assert_called_once_with( - mock_model, [], max_tokens=models_tokens["openai"]["gpt-4"] + mock_model, [], max_tokens=models_params["openai"]["gpt-4"]["token_limit"] )