Adjust token/bytes ratio to resolve errors on swebench-lite.

This commit is contained in:
AI Christianson 2025-02-08 08:07:37 -05:00
parent b673cf61b6
commit 5861f3a2bf
3 changed files with 8 additions and 8 deletions

View File

@ -252,7 +252,7 @@ Output **ONLY THE CODE** and **NO MARKDOWN BACKTICKS**"""
def _estimate_tokens(content: Optional[Union[str, BaseMessage]]) -> int:
"""Estimate number of tokens in content using simple byte length heuristic.
Estimates 1 token per 3 bytes of content. For messages, uses the content field.
Estimates 1 token per 2.0 bytes of content. For messages, uses the content field.
Args:
content: String content or Message object to estimate tokens for
@ -275,7 +275,7 @@ Output **ONLY THE CODE** and **NO MARKDOWN BACKTICKS**"""
if not text:
return 0
return len(text.encode("utf-8")) // 3
return len(text.encode("utf-8")) // 2.0
def _trim_chat_history(
self, initial_messages: List[Any], chat_history: List[Any]

View File

@ -108,7 +108,7 @@ def test_trim_chat_history_empty_chat(agent):
def test_trim_chat_history_token_limit():
"""Test trimming based on token limit."""
agent = CiaynAgent(Mock(), [], max_history_messages=10, max_tokens=20)
agent = CiaynAgent(Mock(), [], max_history_messages=10, max_tokens=25)
initial_messages = [HumanMessage(content="Initial")] # ~2 tokens
chat_history = [
@ -146,7 +146,7 @@ def test_trim_chat_history_no_token_limit():
def test_trim_chat_history_both_limits():
"""Test trimming with both message count and token limits."""
agent = CiaynAgent(Mock(), [], max_history_messages=3, max_tokens=15)
agent = CiaynAgent(Mock(), [], max_history_messages=3, max_tokens=35)
initial_messages = [HumanMessage(content="Init")] # ~1 token
chat_history = [

View File

@ -127,13 +127,13 @@ def test_estimate_tokens():
assert CiaynAgent._estimate_tokens("") == 0
# Test string content
assert CiaynAgent._estimate_tokens("test") == 1 # 4 bytes
assert CiaynAgent._estimate_tokens("hello world") == 3 # 11 bytes
assert CiaynAgent._estimate_tokens("🚀") == 1 # 4 bytes
assert CiaynAgent._estimate_tokens("test") == 2 # 4 bytes
assert CiaynAgent._estimate_tokens("hello world") == 5 # 11 bytes
assert CiaynAgent._estimate_tokens("🚀") == 2 # 4 bytes
# Test message content
msg = HumanMessage(content="test message")
assert CiaynAgent._estimate_tokens(msg) == 4 # 11 bytes
assert CiaynAgent._estimate_tokens(msg) == 6 # 11 bytes
def test_initialize_openai(clean_env, mock_openai):