multiple tool results in ciayn
This commit is contained in:
parent
bee7416bf2
commit
3d5b5850b4
|
|
@ -1,5 +1,7 @@
|
|||
import re
|
||||
import ast
|
||||
import string
|
||||
import random
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, Generator, List, Optional, Union, Tuple
|
||||
|
||||
|
|
@ -246,6 +248,8 @@ class CiaynAgent:
|
|||
# If we have multiple valid bundleable calls, execute them in sequence
|
||||
if len(tool_calls) > 1:
|
||||
results = []
|
||||
result_strings = []
|
||||
|
||||
for call in tool_calls:
|
||||
# Validate and fix each call if needed
|
||||
if validate_function_call_pattern(call):
|
||||
|
|
@ -255,9 +259,13 @@ class CiaynAgent:
|
|||
# Execute the call and collect the result
|
||||
result = eval(call.strip(), globals_dict)
|
||||
results.append(result)
|
||||
|
||||
# Generate a random ID for this result
|
||||
result_id = self._generate_random_id()
|
||||
result_strings.append(f"<result-{result_id}>\n{result}\n</result-{result_id}>")
|
||||
|
||||
# Return the result of the last tool call
|
||||
return results[-1]
|
||||
# Return all results as one big string with tagged sections
|
||||
return "\n\n".join(result_strings)
|
||||
|
||||
# Regular single tool call case
|
||||
if validate_function_call_pattern(code):
|
||||
|
|
@ -284,6 +292,18 @@ class CiaynAgent:
|
|||
error_msg, base_message=msg, tool_name=tool_name
|
||||
) from e
|
||||
|
||||
def _generate_random_id(self, length: int = 6) -> str:
|
||||
"""Generate a random ID string for result tagging.
|
||||
|
||||
Args:
|
||||
length: Length of the random ID to generate
|
||||
|
||||
Returns:
|
||||
String of random alphanumeric characters
|
||||
"""
|
||||
chars = string.ascii_lowercase + string.digits
|
||||
return ''.join(random.choice(chars) for _ in range(length))
|
||||
|
||||
def extract_tool_name(self, code: str) -> str:
|
||||
"""Extract the tool name from the code."""
|
||||
match = re.match(r"\s*([\w_\-]+)\s*\(", code)
|
||||
|
|
|
|||
|
|
@ -93,6 +93,22 @@ PERFORMING WELL AS AN EFFICIENT YET COMPLETE AGENT WILL HELP MY CAREER.
|
|||
4. WHEN USING put_complete_file_contents, ALWAYS PUT THE ENTIRE FILE CONTENT INSIDE ONE TRIPLE-QUOTED STRING
|
||||
</critical rules>
|
||||
|
||||
<bundled tools format>
|
||||
When you bundle multiple tool calls in one response, you'll receive results in the following format:
|
||||
|
||||
```
|
||||
<result-abc123>
|
||||
First tool result content
|
||||
</result-abc123>
|
||||
|
||||
<result-def456>
|
||||
Second tool result content
|
||||
</result-def456>
|
||||
```
|
||||
|
||||
Each result will have a unique random ID tag, and the order of results will match the order of your tool calls.
|
||||
</bundled tools format>
|
||||
|
||||
DO NOT CLAIM YOU ARE FINISHED UNTIL YOU ACTUALLY ARE!
|
||||
Output **ONLY THE CODE** and **NO MARKDOWN BACKTICKS**
|
||||
"""
|
||||
|
|
@ -115,19 +131,6 @@ def main():
|
|||
\"\"\")
|
||||
</multiline content reminder>
|
||||
|
||||
<bundleable tools reminder>
|
||||
You can bundle multiple calls to these tools in one response:
|
||||
- emit_expert_context
|
||||
- ask_expert
|
||||
- emit_key_facts
|
||||
- emit_key_snippet
|
||||
|
||||
Example of bundled tools:
|
||||
emit_key_facts(["Important fact 1", "Important fact 2"])
|
||||
emit_expert_context("Additional context")
|
||||
ask_expert("Question about this context")
|
||||
</bundleable tools reminder>
|
||||
|
||||
--- EXAMPLE GOOD OUTPUTS ---
|
||||
|
||||
<example good output>
|
||||
|
|
@ -169,5 +172,13 @@ CORRECT: put_complete_file_contents("/path/to/file.py", \"\"\"def main():
|
|||
print("Hello")
|
||||
\"\"\")
|
||||
|
||||
NOTE: You can also bundle multiple calls to certain tools (emit_expert_context, ask_expert, emit_key_facts, emit_key_snippet) in one response.
|
||||
NOTE: You can also bundle multiple calls to certain tools (emit_expert_context, ask_expert, emit_key_facts, emit_key_snippet, and others) in one response. When bundling tools, each result will be returned with a unique random ID tag in this format:
|
||||
|
||||
<result-abc123>
|
||||
First tool result content
|
||||
</result-abc123>
|
||||
|
||||
<result-def456>
|
||||
Second tool result content
|
||||
</result-def456>
|
||||
"""
|
||||
|
|
@ -120,8 +120,12 @@ ask_expert("What does this mean?")'''
|
|||
# Execute
|
||||
result = agent._execute_tool(mock_message)
|
||||
|
||||
# Assert
|
||||
assert result == "Expert answer" # Should return the result of the last tool call
|
||||
# Assert: We now verify that the result contains both tool call results with tagging
|
||||
assert "<result-" in result # Check for result tag start
|
||||
assert "</result-" in result # Check for result tag end
|
||||
assert "Context emitted" in result # Check first result content
|
||||
assert "Expert answer" in result # Check second result content
|
||||
# Verify the correct function calls were made with the right parameters
|
||||
mock_emit_expert_context.assert_called_once_with("Important context")
|
||||
mock_ask_expert.assert_called_once_with("What does this mean?")
|
||||
|
||||
|
|
@ -170,5 +174,8 @@ emit_key_snippet({"file": "example.py", "start_line": 10, "end_line": 20})'''
|
|||
# Execute
|
||||
result = agent._execute_tool(mock_message)
|
||||
|
||||
# Assert
|
||||
assert result == "Snippet emitted" # Should return the result of the last tool call
|
||||
# Assert: Verify both tool results are included in the tagged output
|
||||
assert "<result-" in result # Check for result tag start
|
||||
assert "</result-" in result # Check for result tag end
|
||||
assert "Facts emitted" in result # Check first result content
|
||||
assert "Snippet emitted" in result # Check second result content
|
||||
|
|
|
|||
|
|
@ -0,0 +1,87 @@
|
|||
import unittest
|
||||
from unittest.mock import MagicMock, patch
|
||||
from langchain_core.messages import AIMessage
|
||||
|
||||
from ra_aid.agent_backends.ciayn_agent import CiaynAgent
|
||||
|
||||
class TestCiaynBundledTools(unittest.TestCase):
|
||||
"""Test the bundled tool call functionality in the CiaynAgent."""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up the test case with mocked model and tools."""
|
||||
self.model = MagicMock()
|
||||
self.tools = [
|
||||
MagicMock(func=lambda content: f"Result of tool1: {content}"),
|
||||
MagicMock(func=lambda content: f"Result of tool2: {content}"),
|
||||
]
|
||||
|
||||
# Set up function names
|
||||
self.tools[0].func.__name__ = "emit_expert_context"
|
||||
self.tools[1].func.__name__ = "ask_expert"
|
||||
|
||||
# Create agent
|
||||
self.agent = CiaynAgent(model=self.model, tools=self.tools)
|
||||
|
||||
# Mock the validation to always return False (valid)
|
||||
self.validate_patcher = patch('ra_aid.agent_backends.ciayn_agent.validate_function_call_pattern', return_value=False)
|
||||
self.mock_validate = self.validate_patcher.start()
|
||||
|
||||
# Mock _extract_tool_call in case it's needed
|
||||
self.extract_patcher = patch.object(self.agent, '_extract_tool_call', return_value="mocked_tool_call")
|
||||
self.mock_extract = self.extract_patcher.start()
|
||||
|
||||
def tearDown(self):
|
||||
"""Clean up patches after the test."""
|
||||
self.validate_patcher.stop()
|
||||
self.extract_patcher.stop()
|
||||
|
||||
@patch('random.choice', side_effect=lambda chars: chars[0]) # Make random IDs predictable for testing
|
||||
def test_bundled_tool_calls(self, mock_random):
|
||||
"""Test that bundled tool calls return results properly formatted with result tags."""
|
||||
# Create a message with multiple bundled tool calls
|
||||
msg = AIMessage(content="""emit_expert_context("Expert context 1")
|
||||
ask_expert("Expert question 1")""")
|
||||
|
||||
# Execute the tool calls
|
||||
result = self.agent._execute_tool(msg)
|
||||
|
||||
# Verify the result has both results with proper format
|
||||
self.assertIn("<result-", result)
|
||||
self.assertIn("</result-", result)
|
||||
self.assertIn("Result of tool1: Expert context 1", result)
|
||||
self.assertIn("Result of tool2: Expert question 1", result)
|
||||
|
||||
# Verify we have two result sections
|
||||
self.assertEqual(2, result.count("<result-"))
|
||||
self.assertEqual(2, result.count("</result-"))
|
||||
|
||||
@patch('random.choice', side_effect=lambda chars: chars[0]) # Make random IDs predictable for testing
|
||||
def test_single_tool_call(self, mock_random):
|
||||
"""Test that single tool calls still return just the result without tags."""
|
||||
# Create a message with a single tool call
|
||||
msg = AIMessage(content="emit_expert_context(\"Expert context 1\")")
|
||||
|
||||
# Execute the tool call
|
||||
result = self.agent._execute_tool(msg)
|
||||
|
||||
# Verify the result is just the plain text result without result tags
|
||||
self.assertEqual("Result of tool1: Expert context 1", result)
|
||||
self.assertNotIn("<result-", result)
|
||||
self.assertNotIn("</result-", result)
|
||||
|
||||
def test_random_id_generation(self):
|
||||
"""Test that the _generate_random_id method creates IDs of the correct length."""
|
||||
# Generate IDs of different lengths
|
||||
id1 = self.agent._generate_random_id(length=6)
|
||||
id2 = self.agent._generate_random_id(length=10)
|
||||
|
||||
# Verify the IDs are of the correct length
|
||||
self.assertEqual(6, len(id1))
|
||||
self.assertEqual(10, len(id2))
|
||||
|
||||
# Verify the IDs are different
|
||||
self.assertNotEqual(id1, id2[:6])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
import unittest
|
||||
from unittest.mock import MagicMock, patch
|
||||
from langchain_core.messages import AIMessage
|
||||
|
||||
from ra_aid.agent_backends.ciayn_agent import CiaynAgent
|
||||
|
||||
|
||||
class TestCiaynSingleTool(unittest.TestCase):
|
||||
"""Test that single tool calls still work correctly with the bundling implementation."""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up the test case with mocked model and tools."""
|
||||
self.model = MagicMock()
|
||||
self.tools = [
|
||||
MagicMock(func=lambda content: f"Result: {content}"),
|
||||
]
|
||||
|
||||
# Set up function name
|
||||
self.tools[0].func.__name__ = "non_bundleable_tool"
|
||||
|
||||
# Create agent
|
||||
self.agent = CiaynAgent(model=self.model, tools=self.tools)
|
||||
|
||||
# Mock validate_function_call_pattern to always return False (valid)
|
||||
self.validate_patcher = patch('ra_aid.agent_backends.ciayn_agent.validate_function_call_pattern', return_value=False)
|
||||
self.mock_validate = self.validate_patcher.start()
|
||||
|
||||
def tearDown(self):
|
||||
"""Clean up after tests."""
|
||||
self.validate_patcher.stop()
|
||||
|
||||
def test_single_tool_call(self):
|
||||
"""Test that single tool calls return the result directly without result tags."""
|
||||
# Create a message with a single tool call
|
||||
msg = AIMessage(content="non_bundleable_tool(\"Test content\")")
|
||||
|
||||
# Execute the tool call
|
||||
result = self.agent._execute_tool(msg)
|
||||
|
||||
# Verify the result is just the plain text result without result tags
|
||||
self.assertEqual("Result: Test content", result)
|
||||
self.assertNotIn("<result-", result)
|
||||
self.assertNotIn("</result-", result)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
@ -0,0 +1,64 @@
|
|||
import unittest
|
||||
from unittest.mock import MagicMock, patch
|
||||
from langchain_core.messages import AIMessage
|
||||
|
||||
from ra_aid.agent_backends.ciayn_agent import CiaynAgent
|
||||
|
||||
|
||||
class TestCiaynStreamBundling(unittest.TestCase):
|
||||
"""Test that the _execute_tool method correctly formats bundled tool call results."""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up the test case with mocked model and tools."""
|
||||
# Create mock model
|
||||
self.model = MagicMock()
|
||||
|
||||
# Set up the tools
|
||||
self.tools = [
|
||||
MagicMock(func=lambda content: f"Result of tool1: {content}"),
|
||||
MagicMock(func=lambda content: f"Result of tool2: {content}"),
|
||||
]
|
||||
|
||||
# Set up function names
|
||||
self.tools[0].func.__name__ = "emit_expert_context"
|
||||
self.tools[1].func.__name__ = "ask_expert"
|
||||
|
||||
# Create agent
|
||||
self.agent = CiaynAgent(model=self.model, tools=self.tools)
|
||||
|
||||
# Mock validate_function_call_pattern to always return False (valid)
|
||||
self.validate_patcher = patch('ra_aid.agent_backends.ciayn_agent.validate_function_call_pattern', return_value=False)
|
||||
self.mock_validate = self.validate_patcher.start()
|
||||
|
||||
def tearDown(self):
|
||||
"""Clean up after tests."""
|
||||
self.validate_patcher.stop()
|
||||
|
||||
@patch('random.choice', side_effect=lambda chars: chars[0]) # Make random IDs predictable for testing
|
||||
def test_execute_tool_with_bundled_calls(self, mock_random):
|
||||
"""Test that _execute_tool correctly formats bundled tool call results."""
|
||||
# Create a message with bundled tool calls
|
||||
bundled_calls = """emit_expert_context("Test content 1")
|
||||
ask_expert("Test question 1")"""
|
||||
msg = AIMessage(content=bundled_calls)
|
||||
|
||||
# Call _execute_tool directly
|
||||
result = self.agent._execute_tool(msg)
|
||||
|
||||
# Verify the format of the result
|
||||
self.assertIn("<result-", result)
|
||||
self.assertIn("</result-", result)
|
||||
self.assertIn("Result of tool1: Test content 1", result)
|
||||
self.assertIn("Result of tool2: Test question 1", result)
|
||||
|
||||
# Verify we have the expected number of result sections
|
||||
self.assertEqual(2, result.count("<result-"))
|
||||
self.assertEqual(2, result.count("</result-"))
|
||||
|
||||
# Verify the specific format with predictable IDs (using our mock for random.choice)
|
||||
expected_format = "<result-aaaaaa>\nResult of tool1: Test content 1\n</result-aaaaaa>\n\n<result-aaaaaa>\nResult of tool2: Test question 1\n</result-aaaaaa>"
|
||||
self.assertEqual(expected_format, result)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Loading…
Reference in New Issue