multiple tool results in ciayn

This commit is contained in:
AI Christianson 2025-03-04 07:59:05 -05:00
parent bee7416bf2
commit 3d5b5850b4
6 changed files with 256 additions and 20 deletions

View File

@ -1,5 +1,7 @@
import re import re
import ast import ast
import string
import random
from dataclasses import dataclass from dataclasses import dataclass
from typing import Any, Dict, Generator, List, Optional, Union, Tuple from typing import Any, Dict, Generator, List, Optional, Union, Tuple
@ -246,6 +248,8 @@ class CiaynAgent:
# If we have multiple valid bundleable calls, execute them in sequence # If we have multiple valid bundleable calls, execute them in sequence
if len(tool_calls) > 1: if len(tool_calls) > 1:
results = [] results = []
result_strings = []
for call in tool_calls: for call in tool_calls:
# Validate and fix each call if needed # Validate and fix each call if needed
if validate_function_call_pattern(call): if validate_function_call_pattern(call):
@ -256,8 +260,12 @@ class CiaynAgent:
result = eval(call.strip(), globals_dict) result = eval(call.strip(), globals_dict)
results.append(result) results.append(result)
# Return the result of the last tool call # Generate a random ID for this result
return results[-1] result_id = self._generate_random_id()
result_strings.append(f"<result-{result_id}>\n{result}\n</result-{result_id}>")
# Return all results as one big string with tagged sections
return "\n\n".join(result_strings)
# Regular single tool call case # Regular single tool call case
if validate_function_call_pattern(code): if validate_function_call_pattern(code):
@ -284,6 +292,18 @@ class CiaynAgent:
error_msg, base_message=msg, tool_name=tool_name error_msg, base_message=msg, tool_name=tool_name
) from e ) from e
def _generate_random_id(self, length: int = 6) -> str:
"""Generate a random ID string for result tagging.
Args:
length: Length of the random ID to generate
Returns:
String of random alphanumeric characters
"""
chars = string.ascii_lowercase + string.digits
return ''.join(random.choice(chars) for _ in range(length))
def extract_tool_name(self, code: str) -> str: def extract_tool_name(self, code: str) -> str:
"""Extract the tool name from the code.""" """Extract the tool name from the code."""
match = re.match(r"\s*([\w_\-]+)\s*\(", code) match = re.match(r"\s*([\w_\-]+)\s*\(", code)

View File

@ -93,6 +93,22 @@ PERFORMING WELL AS AN EFFICIENT YET COMPLETE AGENT WILL HELP MY CAREER.
4. WHEN USING put_complete_file_contents, ALWAYS PUT THE ENTIRE FILE CONTENT INSIDE ONE TRIPLE-QUOTED STRING 4. WHEN USING put_complete_file_contents, ALWAYS PUT THE ENTIRE FILE CONTENT INSIDE ONE TRIPLE-QUOTED STRING
</critical rules> </critical rules>
<bundled tools format>
When you bundle multiple tool calls in one response, you'll receive results in the following format:
```
<result-abc123>
First tool result content
</result-abc123>
<result-def456>
Second tool result content
</result-def456>
```
Each result will have a unique random ID tag, and the order of results will match the order of your tool calls.
</bundled tools format>
DO NOT CLAIM YOU ARE FINISHED UNTIL YOU ACTUALLY ARE! DO NOT CLAIM YOU ARE FINISHED UNTIL YOU ACTUALLY ARE!
Output **ONLY THE CODE** and **NO MARKDOWN BACKTICKS** Output **ONLY THE CODE** and **NO MARKDOWN BACKTICKS**
""" """
@ -115,19 +131,6 @@ def main():
\"\"\") \"\"\")
</multiline content reminder> </multiline content reminder>
<bundleable tools reminder>
You can bundle multiple calls to these tools in one response:
- emit_expert_context
- ask_expert
- emit_key_facts
- emit_key_snippet
Example of bundled tools:
emit_key_facts(["Important fact 1", "Important fact 2"])
emit_expert_context("Additional context")
ask_expert("Question about this context")
</bundleable tools reminder>
--- EXAMPLE GOOD OUTPUTS --- --- EXAMPLE GOOD OUTPUTS ---
<example good output> <example good output>
@ -169,5 +172,13 @@ CORRECT: put_complete_file_contents("/path/to/file.py", \"\"\"def main():
print("Hello") print("Hello")
\"\"\") \"\"\")
NOTE: You can also bundle multiple calls to certain tools (emit_expert_context, ask_expert, emit_key_facts, emit_key_snippet) in one response. NOTE: You can also bundle multiple calls to certain tools (emit_expert_context, ask_expert, emit_key_facts, emit_key_snippet, and others) in one response. When bundling tools, each result will be returned with a unique random ID tag in this format:
<result-abc123>
First tool result content
</result-abc123>
<result-def456>
Second tool result content
</result-def456>
""" """

View File

@ -120,8 +120,12 @@ ask_expert("What does this mean?")'''
# Execute # Execute
result = agent._execute_tool(mock_message) result = agent._execute_tool(mock_message)
# Assert # Assert: We now verify that the result contains both tool call results with tagging
assert result == "Expert answer" # Should return the result of the last tool call assert "<result-" in result # Check for result tag start
assert "</result-" in result # Check for result tag end
assert "Context emitted" in result # Check first result content
assert "Expert answer" in result # Check second result content
# Verify the correct function calls were made with the right parameters
mock_emit_expert_context.assert_called_once_with("Important context") mock_emit_expert_context.assert_called_once_with("Important context")
mock_ask_expert.assert_called_once_with("What does this mean?") mock_ask_expert.assert_called_once_with("What does this mean?")
@ -170,5 +174,8 @@ emit_key_snippet({"file": "example.py", "start_line": 10, "end_line": 20})'''
# Execute # Execute
result = agent._execute_tool(mock_message) result = agent._execute_tool(mock_message)
# Assert # Assert: Verify both tool results are included in the tagged output
assert result == "Snippet emitted" # Should return the result of the last tool call assert "<result-" in result # Check for result tag start
assert "</result-" in result # Check for result tag end
assert "Facts emitted" in result # Check first result content
assert "Snippet emitted" in result # Check second result content

View File

@ -0,0 +1,87 @@
import unittest
from unittest.mock import MagicMock, patch
from langchain_core.messages import AIMessage
from ra_aid.agent_backends.ciayn_agent import CiaynAgent
class TestCiaynBundledTools(unittest.TestCase):
"""Test the bundled tool call functionality in the CiaynAgent."""
def setUp(self):
"""Set up the test case with mocked model and tools."""
self.model = MagicMock()
self.tools = [
MagicMock(func=lambda content: f"Result of tool1: {content}"),
MagicMock(func=lambda content: f"Result of tool2: {content}"),
]
# Set up function names
self.tools[0].func.__name__ = "emit_expert_context"
self.tools[1].func.__name__ = "ask_expert"
# Create agent
self.agent = CiaynAgent(model=self.model, tools=self.tools)
# Mock the validation to always return False (valid)
self.validate_patcher = patch('ra_aid.agent_backends.ciayn_agent.validate_function_call_pattern', return_value=False)
self.mock_validate = self.validate_patcher.start()
# Mock _extract_tool_call in case it's needed
self.extract_patcher = patch.object(self.agent, '_extract_tool_call', return_value="mocked_tool_call")
self.mock_extract = self.extract_patcher.start()
def tearDown(self):
"""Clean up patches after the test."""
self.validate_patcher.stop()
self.extract_patcher.stop()
@patch('random.choice', side_effect=lambda chars: chars[0]) # Make random IDs predictable for testing
def test_bundled_tool_calls(self, mock_random):
"""Test that bundled tool calls return results properly formatted with result tags."""
# Create a message with multiple bundled tool calls
msg = AIMessage(content="""emit_expert_context("Expert context 1")
ask_expert("Expert question 1")""")
# Execute the tool calls
result = self.agent._execute_tool(msg)
# Verify the result has both results with proper format
self.assertIn("<result-", result)
self.assertIn("</result-", result)
self.assertIn("Result of tool1: Expert context 1", result)
self.assertIn("Result of tool2: Expert question 1", result)
# Verify we have two result sections
self.assertEqual(2, result.count("<result-"))
self.assertEqual(2, result.count("</result-"))
@patch('random.choice', side_effect=lambda chars: chars[0]) # Make random IDs predictable for testing
def test_single_tool_call(self, mock_random):
"""Test that single tool calls still return just the result without tags."""
# Create a message with a single tool call
msg = AIMessage(content="emit_expert_context(\"Expert context 1\")")
# Execute the tool call
result = self.agent._execute_tool(msg)
# Verify the result is just the plain text result without result tags
self.assertEqual("Result of tool1: Expert context 1", result)
self.assertNotIn("<result-", result)
self.assertNotIn("</result-", result)
def test_random_id_generation(self):
"""Test that the _generate_random_id method creates IDs of the correct length."""
# Generate IDs of different lengths
id1 = self.agent._generate_random_id(length=6)
id2 = self.agent._generate_random_id(length=10)
# Verify the IDs are of the correct length
self.assertEqual(6, len(id1))
self.assertEqual(10, len(id2))
# Verify the IDs are different
self.assertNotEqual(id1, id2[:6])
if __name__ == "__main__":
unittest.main()

View File

@ -0,0 +1,47 @@
import unittest
from unittest.mock import MagicMock, patch
from langchain_core.messages import AIMessage
from ra_aid.agent_backends.ciayn_agent import CiaynAgent
class TestCiaynSingleTool(unittest.TestCase):
"""Test that single tool calls still work correctly with the bundling implementation."""
def setUp(self):
"""Set up the test case with mocked model and tools."""
self.model = MagicMock()
self.tools = [
MagicMock(func=lambda content: f"Result: {content}"),
]
# Set up function name
self.tools[0].func.__name__ = "non_bundleable_tool"
# Create agent
self.agent = CiaynAgent(model=self.model, tools=self.tools)
# Mock validate_function_call_pattern to always return False (valid)
self.validate_patcher = patch('ra_aid.agent_backends.ciayn_agent.validate_function_call_pattern', return_value=False)
self.mock_validate = self.validate_patcher.start()
def tearDown(self):
"""Clean up after tests."""
self.validate_patcher.stop()
def test_single_tool_call(self):
"""Test that single tool calls return the result directly without result tags."""
# Create a message with a single tool call
msg = AIMessage(content="non_bundleable_tool(\"Test content\")")
# Execute the tool call
result = self.agent._execute_tool(msg)
# Verify the result is just the plain text result without result tags
self.assertEqual("Result: Test content", result)
self.assertNotIn("<result-", result)
self.assertNotIn("</result-", result)
if __name__ == "__main__":
unittest.main()

View File

@ -0,0 +1,64 @@
import unittest
from unittest.mock import MagicMock, patch
from langchain_core.messages import AIMessage
from ra_aid.agent_backends.ciayn_agent import CiaynAgent
class TestCiaynStreamBundling(unittest.TestCase):
"""Test that the _execute_tool method correctly formats bundled tool call results."""
def setUp(self):
"""Set up the test case with mocked model and tools."""
# Create mock model
self.model = MagicMock()
# Set up the tools
self.tools = [
MagicMock(func=lambda content: f"Result of tool1: {content}"),
MagicMock(func=lambda content: f"Result of tool2: {content}"),
]
# Set up function names
self.tools[0].func.__name__ = "emit_expert_context"
self.tools[1].func.__name__ = "ask_expert"
# Create agent
self.agent = CiaynAgent(model=self.model, tools=self.tools)
# Mock validate_function_call_pattern to always return False (valid)
self.validate_patcher = patch('ra_aid.agent_backends.ciayn_agent.validate_function_call_pattern', return_value=False)
self.mock_validate = self.validate_patcher.start()
def tearDown(self):
"""Clean up after tests."""
self.validate_patcher.stop()
@patch('random.choice', side_effect=lambda chars: chars[0]) # Make random IDs predictable for testing
def test_execute_tool_with_bundled_calls(self, mock_random):
"""Test that _execute_tool correctly formats bundled tool call results."""
# Create a message with bundled tool calls
bundled_calls = """emit_expert_context("Test content 1")
ask_expert("Test question 1")"""
msg = AIMessage(content=bundled_calls)
# Call _execute_tool directly
result = self.agent._execute_tool(msg)
# Verify the format of the result
self.assertIn("<result-", result)
self.assertIn("</result-", result)
self.assertIn("Result of tool1: Test content 1", result)
self.assertIn("Result of tool2: Test question 1", result)
# Verify we have the expected number of result sections
self.assertEqual(2, result.count("<result-"))
self.assertEqual(2, result.count("</result-"))
# Verify the specific format with predictable IDs (using our mock for random.choice)
expected_format = "<result-aaaaaa>\nResult of tool1: Test content 1\n</result-aaaaaa>\n\n<result-aaaaaa>\nResult of tool2: Test question 1\n</result-aaaaaa>"
self.assertEqual(expected_format, result)
if __name__ == "__main__":
unittest.main()