multiple tool results in ciayn

2025-03-04 07:59:05 -05:00 · 2025-03-04 07:59:05 -05:00 · 3d5b5850b4
parent bee7416bf2
commit 3d5b5850b4
6 changed files with 256 additions and 20 deletions
--- a/ra_aid/agent_backends/ciayn_agent.py
+++ b/ra_aid/agent_backends/ciayn_agent.py
@ -1,5 +1,7 @@
 import re
 import ast
+import string
+import random
 from dataclasses import dataclass
 from typing import Any, Dict, Generator, List, Optional, Union, Tuple

@ -246,6 +248,8 @@ class CiaynAgent:
            # If we have multiple valid bundleable calls, execute them in sequence
            if len(tool_calls) > 1:
                results = []
+                result_strings = []
+                
                for call in tool_calls:
                    # Validate and fix each call if needed
                    if validate_function_call_pattern(call):
@ -255,9 +259,13 @@ class CiaynAgent:
                    # Execute the call and collect the result
                    result = eval(call.strip(), globals_dict)
                    results.append(result)
+                    
+                    # Generate a random ID for this result
+                    result_id = self._generate_random_id()
+                    result_strings.append(f"<result-{result_id}>\n{result}\n</result-{result_id}>")
                
-                # Return the result of the last tool call
-                return results[-1]
+                # Return all results as one big string with tagged sections
+                return "\n\n".join(result_strings)
            
            # Regular single tool call case
            if validate_function_call_pattern(code):
@ -284,6 +292,18 @@ class CiaynAgent:
                error_msg, base_message=msg, tool_name=tool_name
            ) from e

+    def _generate_random_id(self, length: int = 6) -> str:
+        """Generate a random ID string for result tagging.
+        
+        Args:
+            length: Length of the random ID to generate
+            
+        Returns:
+            String of random alphanumeric characters
+        """
+        chars = string.ascii_lowercase + string.digits
+        return ''.join(random.choice(chars) for _ in range(length))
+
    def extract_tool_name(self, code: str) -> str:
        """Extract the tool name from the code."""
        match = re.match(r"\s*([\w_\-]+)\s*\(", code)
--- a/ra_aid/prompts/ciayn_prompts.py
+++ b/ra_aid/prompts/ciayn_prompts.py
@ -93,6 +93,22 @@ PERFORMING WELL AS AN EFFICIENT YET COMPLETE AGENT WILL HELP MY CAREER.
 4. WHEN USING put_complete_file_contents, ALWAYS PUT THE ENTIRE FILE CONTENT INSIDE ONE TRIPLE-QUOTED STRING
 </critical rules>

+<bundled tools format>
+When you bundle multiple tool calls in one response, you'll receive results in the following format:
+
+```
+<result-abc123>
+First tool result content
+</result-abc123>
+
+<result-def456>
+Second tool result content
+</result-def456>
+```
+
+Each result will have a unique random ID tag, and the order of results will match the order of your tool calls.
+</bundled tools format>
+
 DO NOT CLAIM YOU ARE FINISHED UNTIL YOU ACTUALLY ARE!
 Output **ONLY THE CODE** and **NO MARKDOWN BACKTICKS**
 """
@ -115,19 +131,6 @@ def main():
 \"\"\")
 </multiline content reminder>

-<bundleable tools reminder>
-You can bundle multiple calls to these tools in one response:
- emit_expert_context
- ask_expert
- emit_key_facts
- emit_key_snippet
-
-Example of bundled tools:
-emit_key_facts(["Important fact 1", "Important fact 2"])
-emit_expert_context("Additional context")
-ask_expert("Question about this context")
-</bundleable tools reminder>
-
 --- EXAMPLE GOOD OUTPUTS ---

 <example good output>
@ -169,5 +172,13 @@ CORRECT:   put_complete_file_contents("/path/to/file.py", \"\"\"def main():
    print("Hello")
 \"\"\")

-NOTE: You can also bundle multiple calls to certain tools (emit_expert_context, ask_expert, emit_key_facts, emit_key_snippet) in one response.
+NOTE: You can also bundle multiple calls to certain tools (emit_expert_context, ask_expert, emit_key_facts, emit_key_snippet, and others) in one response. When bundling tools, each result will be returned with a unique random ID tag in this format:
+
+<result-abc123>
+First tool result content
+</result-abc123>
+
+<result-def456>
+Second tool result content
+</result-def456>
 """
--- a/tests/agent_backends/test_bundled_tools.py
+++ b/tests/agent_backends/test_bundled_tools.py
@ -120,8 +120,12 @@ ask_expert("What does this mean?")'''
        # Execute
        result = agent._execute_tool(mock_message)
    
-    # Assert
-    assert result == "Expert answer"  # Should return the result of the last tool call
+    # Assert: We now verify that the result contains both tool call results with tagging
+    assert "<result-" in result  # Check for result tag start
+    assert "</result-" in result  # Check for result tag end
+    assert "Context emitted" in result  # Check first result content
+    assert "Expert answer" in result  # Check second result content
+    # Verify the correct function calls were made with the right parameters
    mock_emit_expert_context.assert_called_once_with("Important context")
    mock_ask_expert.assert_called_once_with("What does this mean?")

@ -170,5 +174,8 @@ emit_key_snippet({"file": "example.py", "start_line": 10, "end_line": 20})'''
            # Execute
            result = agent._execute_tool(mock_message)
    
-    # Assert
-    assert result == "Snippet emitted"  # Should return the result of the last tool call
+    # Assert: Verify both tool results are included in the tagged output
+    assert "<result-" in result  # Check for result tag start
+    assert "</result-" in result  # Check for result tag end
+    assert "Facts emitted" in result  # Check first result content
+    assert "Snippet emitted" in result  # Check second result content
--- a/tests/ra_aid/agent_backends/test_ciayn_bundled_tools.py
+++ b/tests/ra_aid/agent_backends/test_ciayn_bundled_tools.py
@ -0,0 +1,87 @@
+import unittest
+from unittest.mock import MagicMock, patch
+from langchain_core.messages import AIMessage
+
+from ra_aid.agent_backends.ciayn_agent import CiaynAgent
+
+class TestCiaynBundledTools(unittest.TestCase):
+    """Test the bundled tool call functionality in the CiaynAgent."""
+
+    def setUp(self):
+        """Set up the test case with mocked model and tools."""
+        self.model = MagicMock()
+        self.tools = [
+            MagicMock(func=lambda content: f"Result of tool1: {content}"),
+            MagicMock(func=lambda content: f"Result of tool2: {content}"),
+        ]
+        
+        # Set up function names
+        self.tools[0].func.__name__ = "emit_expert_context"
+        self.tools[1].func.__name__ = "ask_expert"
+        
+        # Create agent
+        self.agent = CiaynAgent(model=self.model, tools=self.tools)
+        
+        # Mock the validation to always return False (valid)
+        self.validate_patcher = patch('ra_aid.agent_backends.ciayn_agent.validate_function_call_pattern', return_value=False)
+        self.mock_validate = self.validate_patcher.start()
+        
+        # Mock _extract_tool_call in case it's needed
+        self.extract_patcher = patch.object(self.agent, '_extract_tool_call', return_value="mocked_tool_call")
+        self.mock_extract = self.extract_patcher.start()
+
+    def tearDown(self):
+        """Clean up patches after the test."""
+        self.validate_patcher.stop()
+        self.extract_patcher.stop()
+
+    @patch('random.choice', side_effect=lambda chars: chars[0])  # Make random IDs predictable for testing
+    def test_bundled_tool_calls(self, mock_random):
+        """Test that bundled tool calls return results properly formatted with result tags."""
+        # Create a message with multiple bundled tool calls
+        msg = AIMessage(content="""emit_expert_context("Expert context 1")
+ask_expert("Expert question 1")""")
+        
+        # Execute the tool calls
+        result = self.agent._execute_tool(msg)
+        
+        # Verify the result has both results with proper format
+        self.assertIn("<result-", result)
+        self.assertIn("</result-", result)
+        self.assertIn("Result of tool1: Expert context 1", result)
+        self.assertIn("Result of tool2: Expert question 1", result)
+        
+        # Verify we have two result sections
+        self.assertEqual(2, result.count("<result-"))
+        self.assertEqual(2, result.count("</result-"))
+
+    @patch('random.choice', side_effect=lambda chars: chars[0])  # Make random IDs predictable for testing
+    def test_single_tool_call(self, mock_random):
+        """Test that single tool calls still return just the result without tags."""
+        # Create a message with a single tool call
+        msg = AIMessage(content="emit_expert_context(\"Expert context 1\")")
+        
+        # Execute the tool call
+        result = self.agent._execute_tool(msg)
+        
+        # Verify the result is just the plain text result without result tags
+        self.assertEqual("Result of tool1: Expert context 1", result)
+        self.assertNotIn("<result-", result)
+        self.assertNotIn("</result-", result)
+
+    def test_random_id_generation(self):
+        """Test that the _generate_random_id method creates IDs of the correct length."""
+        # Generate IDs of different lengths
+        id1 = self.agent._generate_random_id(length=6)
+        id2 = self.agent._generate_random_id(length=10)
+        
+        # Verify the IDs are of the correct length
+        self.assertEqual(6, len(id1))
+        self.assertEqual(10, len(id2))
+        
+        # Verify the IDs are different
+        self.assertNotEqual(id1, id2[:6])
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/ra_aid/agent_backends/test_ciayn_single_tool.py
+++ b/tests/ra_aid/agent_backends/test_ciayn_single_tool.py
@ -0,0 +1,47 @@
+import unittest
+from unittest.mock import MagicMock, patch
+from langchain_core.messages import AIMessage
+
+from ra_aid.agent_backends.ciayn_agent import CiaynAgent
+
+
+class TestCiaynSingleTool(unittest.TestCase):
+    """Test that single tool calls still work correctly with the bundling implementation."""
+
+    def setUp(self):
+        """Set up the test case with mocked model and tools."""
+        self.model = MagicMock()
+        self.tools = [
+            MagicMock(func=lambda content: f"Result: {content}"),
+        ]
+        
+        # Set up function name
+        self.tools[0].func.__name__ = "non_bundleable_tool"
+        
+        # Create agent
+        self.agent = CiaynAgent(model=self.model, tools=self.tools)
+        
+        # Mock validate_function_call_pattern to always return False (valid)
+        self.validate_patcher = patch('ra_aid.agent_backends.ciayn_agent.validate_function_call_pattern', return_value=False)
+        self.mock_validate = self.validate_patcher.start()
+
+    def tearDown(self):
+        """Clean up after tests."""
+        self.validate_patcher.stop()
+
+    def test_single_tool_call(self):
+        """Test that single tool calls return the result directly without result tags."""
+        # Create a message with a single tool call
+        msg = AIMessage(content="non_bundleable_tool(\"Test content\")")
+        
+        # Execute the tool call
+        result = self.agent._execute_tool(msg)
+        
+        # Verify the result is just the plain text result without result tags
+        self.assertEqual("Result: Test content", result)
+        self.assertNotIn("<result-", result)
+        self.assertNotIn("</result-", result)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/ra_aid/agent_backends/test_ciayn_stream_bundling.py
+++ b/tests/ra_aid/agent_backends/test_ciayn_stream_bundling.py
@ -0,0 +1,64 @@
+import unittest
+from unittest.mock import MagicMock, patch
+from langchain_core.messages import AIMessage
+
+from ra_aid.agent_backends.ciayn_agent import CiaynAgent
+
+
+class TestCiaynStreamBundling(unittest.TestCase):
+    """Test that the _execute_tool method correctly formats bundled tool call results."""
+
+    def setUp(self):
+        """Set up the test case with mocked model and tools."""
+        # Create mock model
+        self.model = MagicMock()
+        
+        # Set up the tools
+        self.tools = [
+            MagicMock(func=lambda content: f"Result of tool1: {content}"),
+            MagicMock(func=lambda content: f"Result of tool2: {content}"),
+        ]
+        
+        # Set up function names
+        self.tools[0].func.__name__ = "emit_expert_context"
+        self.tools[1].func.__name__ = "ask_expert"
+        
+        # Create agent
+        self.agent = CiaynAgent(model=self.model, tools=self.tools)
+        
+        # Mock validate_function_call_pattern to always return False (valid)
+        self.validate_patcher = patch('ra_aid.agent_backends.ciayn_agent.validate_function_call_pattern', return_value=False)
+        self.mock_validate = self.validate_patcher.start()
+
+    def tearDown(self):
+        """Clean up after tests."""
+        self.validate_patcher.stop()
+
+    @patch('random.choice', side_effect=lambda chars: chars[0])  # Make random IDs predictable for testing
+    def test_execute_tool_with_bundled_calls(self, mock_random):
+        """Test that _execute_tool correctly formats bundled tool call results."""
+        # Create a message with bundled tool calls
+        bundled_calls = """emit_expert_context("Test content 1")
+ask_expert("Test question 1")"""
+        msg = AIMessage(content=bundled_calls)
+        
+        # Call _execute_tool directly
+        result = self.agent._execute_tool(msg)
+        
+        # Verify the format of the result
+        self.assertIn("<result-", result)
+        self.assertIn("</result-", result)
+        self.assertIn("Result of tool1: Test content 1", result)
+        self.assertIn("Result of tool2: Test question 1", result)
+        
+        # Verify we have the expected number of result sections
+        self.assertEqual(2, result.count("<result-"))
+        self.assertEqual(2, result.count("</result-"))
+        
+        # Verify the specific format with predictable IDs (using our mock for random.choice)
+        expected_format = "<result-aaaaaa>\nResult of tool1: Test content 1\n</result-aaaaaa>\n\n<result-aaaaaa>\nResult of tool2: Test question 1\n</result-aaaaaa>"
+        self.assertEqual(expected_format, result)
+
+
+if __name__ == "__main__":
+    unittest.main()