multiple tool results in ciayn

This commit is contained in:
AI Christianson 2025-03-04 07:59:05 -05:00
parent bee7416bf2
commit 3d5b5850b4
6 changed files with 256 additions and 20 deletions

View File

@ -1,5 +1,7 @@
import re
import ast
import string
import random
from dataclasses import dataclass
from typing import Any, Dict, Generator, List, Optional, Union, Tuple
@ -246,6 +248,8 @@ class CiaynAgent:
# If we have multiple valid bundleable calls, execute them in sequence
if len(tool_calls) > 1:
results = []
result_strings = []
for call in tool_calls:
# Validate and fix each call if needed
if validate_function_call_pattern(call):
@ -255,9 +259,13 @@ class CiaynAgent:
# Execute the call and collect the result
result = eval(call.strip(), globals_dict)
results.append(result)
# Generate a random ID for this result
result_id = self._generate_random_id()
result_strings.append(f"<result-{result_id}>\n{result}\n</result-{result_id}>")
# Return the result of the last tool call
return results[-1]
# Return all results as one big string with tagged sections
return "\n\n".join(result_strings)
# Regular single tool call case
if validate_function_call_pattern(code):
@ -284,6 +292,18 @@ class CiaynAgent:
error_msg, base_message=msg, tool_name=tool_name
) from e
def _generate_random_id(self, length: int = 6) -> str:
"""Generate a random ID string for result tagging.
Args:
length: Length of the random ID to generate
Returns:
String of random alphanumeric characters
"""
chars = string.ascii_lowercase + string.digits
return ''.join(random.choice(chars) for _ in range(length))
def extract_tool_name(self, code: str) -> str:
"""Extract the tool name from the code."""
match = re.match(r"\s*([\w_\-]+)\s*\(", code)

View File

@ -93,6 +93,22 @@ PERFORMING WELL AS AN EFFICIENT YET COMPLETE AGENT WILL HELP MY CAREER.
4. WHEN USING put_complete_file_contents, ALWAYS PUT THE ENTIRE FILE CONTENT INSIDE ONE TRIPLE-QUOTED STRING
</critical rules>
<bundled tools format>
When you bundle multiple tool calls in one response, you'll receive results in the following format:
```
<result-abc123>
First tool result content
</result-abc123>
<result-def456>
Second tool result content
</result-def456>
```
Each result will have a unique random ID tag, and the order of results will match the order of your tool calls.
</bundled tools format>
DO NOT CLAIM YOU ARE FINISHED UNTIL YOU ACTUALLY ARE!
Output **ONLY THE CODE** and **NO MARKDOWN BACKTICKS**
"""
@ -115,19 +131,6 @@ def main():
\"\"\")
</multiline content reminder>
<bundleable tools reminder>
You can bundle multiple calls to these tools in one response:
- emit_expert_context
- ask_expert
- emit_key_facts
- emit_key_snippet
Example of bundled tools:
emit_key_facts(["Important fact 1", "Important fact 2"])
emit_expert_context("Additional context")
ask_expert("Question about this context")
</bundleable tools reminder>
--- EXAMPLE GOOD OUTPUTS ---
<example good output>
@ -169,5 +172,13 @@ CORRECT: put_complete_file_contents("/path/to/file.py", \"\"\"def main():
print("Hello")
\"\"\")
NOTE: You can also bundle multiple calls to certain tools (emit_expert_context, ask_expert, emit_key_facts, emit_key_snippet) in one response.
NOTE: You can also bundle multiple calls to certain tools (emit_expert_context, ask_expert, emit_key_facts, emit_key_snippet, and others) in one response. When bundling tools, each result will be returned with a unique random ID tag in this format:
<result-abc123>
First tool result content
</result-abc123>
<result-def456>
Second tool result content
</result-def456>
"""

View File

@ -120,8 +120,12 @@ ask_expert("What does this mean?")'''
# Execute
result = agent._execute_tool(mock_message)
# Assert
assert result == "Expert answer" # Should return the result of the last tool call
# Assert: We now verify that the result contains both tool call results with tagging
assert "<result-" in result # Check for result tag start
assert "</result-" in result # Check for result tag end
assert "Context emitted" in result # Check first result content
assert "Expert answer" in result # Check second result content
# Verify the correct function calls were made with the right parameters
mock_emit_expert_context.assert_called_once_with("Important context")
mock_ask_expert.assert_called_once_with("What does this mean?")
@ -170,5 +174,8 @@ emit_key_snippet({"file": "example.py", "start_line": 10, "end_line": 20})'''
# Execute
result = agent._execute_tool(mock_message)
# Assert
assert result == "Snippet emitted" # Should return the result of the last tool call
# Assert: Verify both tool results are included in the tagged output
assert "<result-" in result # Check for result tag start
assert "</result-" in result # Check for result tag end
assert "Facts emitted" in result # Check first result content
assert "Snippet emitted" in result # Check second result content

View File

@ -0,0 +1,87 @@
import unittest
from unittest.mock import MagicMock, patch
from langchain_core.messages import AIMessage
from ra_aid.agent_backends.ciayn_agent import CiaynAgent
class TestCiaynBundledTools(unittest.TestCase):
"""Test the bundled tool call functionality in the CiaynAgent."""
def setUp(self):
"""Set up the test case with mocked model and tools."""
self.model = MagicMock()
self.tools = [
MagicMock(func=lambda content: f"Result of tool1: {content}"),
MagicMock(func=lambda content: f"Result of tool2: {content}"),
]
# Set up function names
self.tools[0].func.__name__ = "emit_expert_context"
self.tools[1].func.__name__ = "ask_expert"
# Create agent
self.agent = CiaynAgent(model=self.model, tools=self.tools)
# Mock the validation to always return False (valid)
self.validate_patcher = patch('ra_aid.agent_backends.ciayn_agent.validate_function_call_pattern', return_value=False)
self.mock_validate = self.validate_patcher.start()
# Mock _extract_tool_call in case it's needed
self.extract_patcher = patch.object(self.agent, '_extract_tool_call', return_value="mocked_tool_call")
self.mock_extract = self.extract_patcher.start()
def tearDown(self):
"""Clean up patches after the test."""
self.validate_patcher.stop()
self.extract_patcher.stop()
@patch('random.choice', side_effect=lambda chars: chars[0]) # Make random IDs predictable for testing
def test_bundled_tool_calls(self, mock_random):
"""Test that bundled tool calls return results properly formatted with result tags."""
# Create a message with multiple bundled tool calls
msg = AIMessage(content="""emit_expert_context("Expert context 1")
ask_expert("Expert question 1")""")
# Execute the tool calls
result = self.agent._execute_tool(msg)
# Verify the result has both results with proper format
self.assertIn("<result-", result)
self.assertIn("</result-", result)
self.assertIn("Result of tool1: Expert context 1", result)
self.assertIn("Result of tool2: Expert question 1", result)
# Verify we have two result sections
self.assertEqual(2, result.count("<result-"))
self.assertEqual(2, result.count("</result-"))
@patch('random.choice', side_effect=lambda chars: chars[0]) # Make random IDs predictable for testing
def test_single_tool_call(self, mock_random):
"""Test that single tool calls still return just the result without tags."""
# Create a message with a single tool call
msg = AIMessage(content="emit_expert_context(\"Expert context 1\")")
# Execute the tool call
result = self.agent._execute_tool(msg)
# Verify the result is just the plain text result without result tags
self.assertEqual("Result of tool1: Expert context 1", result)
self.assertNotIn("<result-", result)
self.assertNotIn("</result-", result)
def test_random_id_generation(self):
"""Test that the _generate_random_id method creates IDs of the correct length."""
# Generate IDs of different lengths
id1 = self.agent._generate_random_id(length=6)
id2 = self.agent._generate_random_id(length=10)
# Verify the IDs are of the correct length
self.assertEqual(6, len(id1))
self.assertEqual(10, len(id2))
# Verify the IDs are different
self.assertNotEqual(id1, id2[:6])
if __name__ == "__main__":
unittest.main()

View File

@ -0,0 +1,47 @@
import unittest
from unittest.mock import MagicMock, patch
from langchain_core.messages import AIMessage
from ra_aid.agent_backends.ciayn_agent import CiaynAgent
class TestCiaynSingleTool(unittest.TestCase):
"""Test that single tool calls still work correctly with the bundling implementation."""
def setUp(self):
"""Set up the test case with mocked model and tools."""
self.model = MagicMock()
self.tools = [
MagicMock(func=lambda content: f"Result: {content}"),
]
# Set up function name
self.tools[0].func.__name__ = "non_bundleable_tool"
# Create agent
self.agent = CiaynAgent(model=self.model, tools=self.tools)
# Mock validate_function_call_pattern to always return False (valid)
self.validate_patcher = patch('ra_aid.agent_backends.ciayn_agent.validate_function_call_pattern', return_value=False)
self.mock_validate = self.validate_patcher.start()
def tearDown(self):
"""Clean up after tests."""
self.validate_patcher.stop()
def test_single_tool_call(self):
"""Test that single tool calls return the result directly without result tags."""
# Create a message with a single tool call
msg = AIMessage(content="non_bundleable_tool(\"Test content\")")
# Execute the tool call
result = self.agent._execute_tool(msg)
# Verify the result is just the plain text result without result tags
self.assertEqual("Result: Test content", result)
self.assertNotIn("<result-", result)
self.assertNotIn("</result-", result)
if __name__ == "__main__":
unittest.main()

View File

@ -0,0 +1,64 @@
import unittest
from unittest.mock import MagicMock, patch
from langchain_core.messages import AIMessage
from ra_aid.agent_backends.ciayn_agent import CiaynAgent
class TestCiaynStreamBundling(unittest.TestCase):
"""Test that the _execute_tool method correctly formats bundled tool call results."""
def setUp(self):
"""Set up the test case with mocked model and tools."""
# Create mock model
self.model = MagicMock()
# Set up the tools
self.tools = [
MagicMock(func=lambda content: f"Result of tool1: {content}"),
MagicMock(func=lambda content: f"Result of tool2: {content}"),
]
# Set up function names
self.tools[0].func.__name__ = "emit_expert_context"
self.tools[1].func.__name__ = "ask_expert"
# Create agent
self.agent = CiaynAgent(model=self.model, tools=self.tools)
# Mock validate_function_call_pattern to always return False (valid)
self.validate_patcher = patch('ra_aid.agent_backends.ciayn_agent.validate_function_call_pattern', return_value=False)
self.mock_validate = self.validate_patcher.start()
def tearDown(self):
"""Clean up after tests."""
self.validate_patcher.stop()
@patch('random.choice', side_effect=lambda chars: chars[0]) # Make random IDs predictable for testing
def test_execute_tool_with_bundled_calls(self, mock_random):
"""Test that _execute_tool correctly formats bundled tool call results."""
# Create a message with bundled tool calls
bundled_calls = """emit_expert_context("Test content 1")
ask_expert("Test question 1")"""
msg = AIMessage(content=bundled_calls)
# Call _execute_tool directly
result = self.agent._execute_tool(msg)
# Verify the format of the result
self.assertIn("<result-", result)
self.assertIn("</result-", result)
self.assertIn("Result of tool1: Test content 1", result)
self.assertIn("Result of tool2: Test question 1", result)
# Verify we have the expected number of result sections
self.assertEqual(2, result.count("<result-"))
self.assertEqual(2, result.count("</result-"))
# Verify the specific format with predictable IDs (using our mock for random.choice)
expected_format = "<result-aaaaaa>\nResult of tool1: Test content 1\n</result-aaaaaa>\n\n<result-aaaaaa>\nResult of tool2: Test question 1\n</result-aaaaaa>"
self.assertEqual(expected_format, result)
if __name__ == "__main__":
unittest.main()