multiple tool results in ciayn

2025-03-04 07:59:05 -05:00 · 2025-03-04 07:59:05 -05:00 · 3d5b5850b4
parent bee7416bf2
commit 3d5b5850b4
6 changed files with 256 additions and 20 deletions
--- a/ra_aid/agent_backends/ciayn_agent.py
+++ b/ra_aid/agent_backends/ciayn_agent.py
@ -1,5 +1,7 @@
 import re
 import ast
 import string
 import random
 from dataclasses import dataclass
 from typing import Any, Dict, Generator, List, Optional, Union, Tuple
@ -246,6 +248,8 @@ class CiaynAgent:
            # If we have multiple valid bundleable calls, execute them in sequence
            if len(tool_calls) > 1:
                results = []
                result_strings = []
                for call in tool_calls:
                    # Validate and fix each call if needed
                    if validate_function_call_pattern(call):
@ -256,8 +260,12 @@ class CiaynAgent:
                    result = eval(call.strip(), globals_dict)
                    results.append(result)
-                # Return the result of the last tool call
+                    # Generate a random ID for this result
-                return results[-1]
+                    result_id = self._generate_random_id()
                    result_strings.append(f"<result-{result_id}>\n{result}\n</result-{result_id}>")
                # Return all results as one big string with tagged sections
                return "\n\n".join(result_strings)
            # Regular single tool call case
            if validate_function_call_pattern(code):
@ -284,6 +292,18 @@ class CiaynAgent:
                error_msg, base_message=msg, tool_name=tool_name
            ) from e
    def _generate_random_id(self, length: int = 6) -> str:
        """Generate a random ID string for result tagging.
        Args:
            length: Length of the random ID to generate
        Returns:
            String of random alphanumeric characters
        """
        chars = string.ascii_lowercase + string.digits
        return ''.join(random.choice(chars) for _ in range(length))
    def extract_tool_name(self, code: str) -> str:
        """Extract the tool name from the code."""
        match = re.match(r"\s*([\w_\-]+)\s*\(", code)
--- a/ra_aid/prompts/ciayn_prompts.py
+++ b/ra_aid/prompts/ciayn_prompts.py
@ -93,6 +93,22 @@ PERFORMING WELL AS AN EFFICIENT YET COMPLETE AGENT WILL HELP MY CAREER.
 4. WHEN USING put_complete_file_contents, ALWAYS PUT THE ENTIRE FILE CONTENT INSIDE ONE TRIPLE-QUOTED STRING
 </critical rules>
 <bundled tools format>
 When you bundle multiple tool calls in one response, you'll receive results in the following format:
 ```
 <result-abc123>
 First tool result content
 </result-abc123>
 <result-def456>
 Second tool result content
 </result-def456>
 ```
 Each result will have a unique random ID tag, and the order of results will match the order of your tool calls.
 </bundled tools format>
 DO NOT CLAIM YOU ARE FINISHED UNTIL YOU ACTUALLY ARE!
 Output **ONLY THE CODE** and **NO MARKDOWN BACKTICKS**
 """
@ -115,19 +131,6 @@ def main():
 \"\"\")
 </multiline content reminder>
 <bundleable tools reminder>
 You can bundle multiple calls to these tools in one response:
 - emit_expert_context
 - ask_expert
 - emit_key_facts
 - emit_key_snippet
 Example of bundled tools:
 emit_key_facts(["Important fact 1", "Important fact 2"])
 emit_expert_context("Additional context")
 ask_expert("Question about this context")
 </bundleable tools reminder>
 --- EXAMPLE GOOD OUTPUTS ---
 <example good output>
@ -169,5 +172,13 @@ CORRECT:   put_complete_file_contents("/path/to/file.py", \"\"\"def main():
    print("Hello")
 \"\"\")
-NOTE: You can also bundle multiple calls to certain tools (emit_expert_context, ask_expert, emit_key_facts, emit_key_snippet) in one response.
+NOTE: You can also bundle multiple calls to certain tools (emit_expert_context, ask_expert, emit_key_facts, emit_key_snippet, and others) in one response. When bundling tools, each result will be returned with a unique random ID tag in this format:
 <result-abc123>
 First tool result content
 </result-abc123>
 <result-def456>
 Second tool result content
 </result-def456>
 """
--- a/tests/agent_backends/test_bundled_tools.py
+++ b/tests/agent_backends/test_bundled_tools.py
@ -120,8 +120,12 @@ ask_expert("What does this mean?")'''
        # Execute
        result = agent._execute_tool(mock_message)
-    # Assert
+    # Assert: We now verify that the result contains both tool call results with tagging
-    assert result == "Expert answer"  # Should return the result of the last tool call
+    assert "<result-" in result  # Check for result tag start
    assert "</result-" in result  # Check for result tag end
    assert "Context emitted" in result  # Check first result content
    assert "Expert answer" in result  # Check second result content
    # Verify the correct function calls were made with the right parameters
    mock_emit_expert_context.assert_called_once_with("Important context")
    mock_ask_expert.assert_called_once_with("What does this mean?")
@ -170,5 +174,8 @@ emit_key_snippet({"file": "example.py", "start_line": 10, "end_line": 20})'''
            # Execute
            result = agent._execute_tool(mock_message)
-    # Assert
+    # Assert: Verify both tool results are included in the tagged output
-    assert result == "Snippet emitted"  # Should return the result of the last tool call
+    assert "<result-" in result  # Check for result tag start
    assert "</result-" in result  # Check for result tag end
    assert "Facts emitted" in result  # Check first result content
    assert "Snippet emitted" in result  # Check second result content
--- a/tests/ra_aid/agent_backends/test_ciayn_bundled_tools.py
+++ b/tests/ra_aid/agent_backends/test_ciayn_bundled_tools.py
@ -0,0 +1,87 @@
 import unittest
 from unittest.mock import MagicMock, patch
 from langchain_core.messages import AIMessage
 from ra_aid.agent_backends.ciayn_agent import CiaynAgent
 class TestCiaynBundledTools(unittest.TestCase):
    """Test the bundled tool call functionality in the CiaynAgent."""
    def setUp(self):
        """Set up the test case with mocked model and tools."""
        self.model = MagicMock()
        self.tools = [
            MagicMock(func=lambda content: f"Result of tool1: {content}"),
            MagicMock(func=lambda content: f"Result of tool2: {content}"),
        ]
        # Set up function names
        self.tools[0].func.__name__ = "emit_expert_context"
        self.tools[1].func.__name__ = "ask_expert"
        # Create agent
        self.agent = CiaynAgent(model=self.model, tools=self.tools)
        # Mock the validation to always return False (valid)
        self.validate_patcher = patch('ra_aid.agent_backends.ciayn_agent.validate_function_call_pattern', return_value=False)
        self.mock_validate = self.validate_patcher.start()
        # Mock _extract_tool_call in case it's needed
        self.extract_patcher = patch.object(self.agent, '_extract_tool_call', return_value="mocked_tool_call")
        self.mock_extract = self.extract_patcher.start()
    def tearDown(self):
        """Clean up patches after the test."""
        self.validate_patcher.stop()
        self.extract_patcher.stop()
    @patch('random.choice', side_effect=lambda chars: chars[0])  # Make random IDs predictable for testing
    def test_bundled_tool_calls(self, mock_random):
        """Test that bundled tool calls return results properly formatted with result tags."""
        # Create a message with multiple bundled tool calls
        msg = AIMessage(content="""emit_expert_context("Expert context 1")
 ask_expert("Expert question 1")""")
        # Execute the tool calls
        result = self.agent._execute_tool(msg)
        # Verify the result has both results with proper format
        self.assertIn("<result-", result)
        self.assertIn("</result-", result)
        self.assertIn("Result of tool1: Expert context 1", result)
        self.assertIn("Result of tool2: Expert question 1", result)
        # Verify we have two result sections
        self.assertEqual(2, result.count("<result-"))
        self.assertEqual(2, result.count("</result-"))
    @patch('random.choice', side_effect=lambda chars: chars[0])  # Make random IDs predictable for testing
    def test_single_tool_call(self, mock_random):
        """Test that single tool calls still return just the result without tags."""
        # Create a message with a single tool call
        msg = AIMessage(content="emit_expert_context(\"Expert context 1\")")
        # Execute the tool call
        result = self.agent._execute_tool(msg)
        # Verify the result is just the plain text result without result tags
        self.assertEqual("Result of tool1: Expert context 1", result)
        self.assertNotIn("<result-", result)
        self.assertNotIn("</result-", result)
    def test_random_id_generation(self):
        """Test that the _generate_random_id method creates IDs of the correct length."""
        # Generate IDs of different lengths
        id1 = self.agent._generate_random_id(length=6)
        id2 = self.agent._generate_random_id(length=10)
        # Verify the IDs are of the correct length
        self.assertEqual(6, len(id1))
        self.assertEqual(10, len(id2))
        # Verify the IDs are different
        self.assertNotEqual(id1, id2[:6])
 if __name__ == "__main__":
    unittest.main()
--- a/tests/ra_aid/agent_backends/test_ciayn_single_tool.py
+++ b/tests/ra_aid/agent_backends/test_ciayn_single_tool.py
@ -0,0 +1,47 @@
 import unittest
 from unittest.mock import MagicMock, patch
 from langchain_core.messages import AIMessage
 from ra_aid.agent_backends.ciayn_agent import CiaynAgent
 class TestCiaynSingleTool(unittest.TestCase):
    """Test that single tool calls still work correctly with the bundling implementation."""
    def setUp(self):
        """Set up the test case with mocked model and tools."""
        self.model = MagicMock()
        self.tools = [
            MagicMock(func=lambda content: f"Result: {content}"),
        ]
        # Set up function name
        self.tools[0].func.__name__ = "non_bundleable_tool"
        # Create agent
        self.agent = CiaynAgent(model=self.model, tools=self.tools)
        # Mock validate_function_call_pattern to always return False (valid)
        self.validate_patcher = patch('ra_aid.agent_backends.ciayn_agent.validate_function_call_pattern', return_value=False)
        self.mock_validate = self.validate_patcher.start()
    def tearDown(self):
        """Clean up after tests."""
        self.validate_patcher.stop()
    def test_single_tool_call(self):
        """Test that single tool calls return the result directly without result tags."""
        # Create a message with a single tool call
        msg = AIMessage(content="non_bundleable_tool(\"Test content\")")
        # Execute the tool call
        result = self.agent._execute_tool(msg)
        # Verify the result is just the plain text result without result tags
        self.assertEqual("Result: Test content", result)
        self.assertNotIn("<result-", result)
        self.assertNotIn("</result-", result)
 if __name__ == "__main__":
    unittest.main()
--- a/tests/ra_aid/agent_backends/test_ciayn_stream_bundling.py
+++ b/tests/ra_aid/agent_backends/test_ciayn_stream_bundling.py
@ -0,0 +1,64 @@
 import unittest
 from unittest.mock import MagicMock, patch
 from langchain_core.messages import AIMessage
 from ra_aid.agent_backends.ciayn_agent import CiaynAgent
 class TestCiaynStreamBundling(unittest.TestCase):
    """Test that the _execute_tool method correctly formats bundled tool call results."""
    def setUp(self):
        """Set up the test case with mocked model and tools."""
        # Create mock model
        self.model = MagicMock()
        # Set up the tools
        self.tools = [
            MagicMock(func=lambda content: f"Result of tool1: {content}"),
            MagicMock(func=lambda content: f"Result of tool2: {content}"),
        ]
        # Set up function names
        self.tools[0].func.__name__ = "emit_expert_context"
        self.tools[1].func.__name__ = "ask_expert"
        # Create agent
        self.agent = CiaynAgent(model=self.model, tools=self.tools)
        # Mock validate_function_call_pattern to always return False (valid)
        self.validate_patcher = patch('ra_aid.agent_backends.ciayn_agent.validate_function_call_pattern', return_value=False)
        self.mock_validate = self.validate_patcher.start()
    def tearDown(self):
        """Clean up after tests."""
        self.validate_patcher.stop()
    @patch('random.choice', side_effect=lambda chars: chars[0])  # Make random IDs predictable for testing
    def test_execute_tool_with_bundled_calls(self, mock_random):
        """Test that _execute_tool correctly formats bundled tool call results."""
        # Create a message with bundled tool calls
        bundled_calls = """emit_expert_context("Test content 1")
 ask_expert("Test question 1")"""
        msg = AIMessage(content=bundled_calls)
        # Call _execute_tool directly
        result = self.agent._execute_tool(msg)
        # Verify the format of the result
        self.assertIn("<result-", result)
        self.assertIn("</result-", result)
        self.assertIn("Result of tool1: Test content 1", result)
        self.assertIn("Result of tool2: Test question 1", result)
        # Verify we have the expected number of result sections
        self.assertEqual(2, result.count("<result-"))
        self.assertEqual(2, result.count("</result-"))
        # Verify the specific format with predictable IDs (using our mock for random.choice)
        expected_format = "<result-aaaaaa>\nResult of tool1: Test content 1\n</result-aaaaaa>\n\n<result-aaaaaa>\nResult of tool2: Test question 1\n</result-aaaaaa>"
        self.assertEqual(expected_format, result)
 if __name__ == "__main__":
    unittest.main()