From 1c007980d975cc6f29ab0f36e5c0112eb64b92ae Mon Sep 17 00:00:00 2001
From: AI Christianson <ai.christianson@christianson.ai>
Date: Fri, 27 Dec 2024 18:32:02 -0500
Subject: [PATCH 01/15] Initial llm fn calling experiment

---
 experiment/llm_test.py | 134 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 134 insertions(+)
 create mode 100644 experiment/llm_test.py
diff --git a/experiment/llm_test.py b/experiment/llm_test.py
new file mode 100644
index 0000000..e7fc132
--- /dev/null
+++ b/experiment/llm_test.py
@@ -0,0 +1,134 @@
+import os
+from dotenv import load_dotenv
+from langchain_openai import ChatOpenAI
+from langchain_core.messages import HumanMessage, SystemMessage
+
+# Load environment variables
+load_dotenv()
+
+def check_weather(location: str) -> str:
+    return f"The weather in {location} is sunny!"
+
+def ask_human(query: str) -> str:
+    print()
+    print(f"Assistant: {query}")
+    user_input = input("\nYou: ").strip()
+    return user_input
+
+def output_message(message: str, prompt_user_input: bool = False) -> str:
+    print()
+    print(f"Assistant: {message.strip()}")
+    if prompt_user_input:
+        user_input = input("\nYou: ").strip()
+        return user_input
+    return ""
+
+def evaluate_response(code: str) -> any:
+    """
+    Evaluates a single function call and returns its result
+    """
+    globals_dict = {
+        'check_weather': check_weather,
+        'ask_human': ask_human,
+        'output_message': output_message
+    }
+    
+    try:
+        # Using eval() instead of exec() since we're evaluating a single expression
+        result = eval(code, globals_dict)
+        return result
+    except Exception as e:
+        return f"Error executing code: {str(e)}"
+
+def create_chat_interface():
+    # Initialize the chat model
+    chat = ChatOpenAI(
+        api_key=os.getenv("OPENROUTER_API_KEY"),
+        temperature=0.3,
+        base_url="https://openrouter.ai/api/v1",
+        # model="deepseek/deepseek-chat"
+        model="qwen/qwen-2.5-coder-32b-instruct"
+    )
+    
+    # Chat loop
+    print("Welcome to the Chat Interface! (Type 'quit' to exit)")
+    
+    chat_history = []
+    last_result = None
+    first_iteration = True
+    
+    while True:
+        base_prompt = ""
+        
+        # Add the last result to the prompt if it's not the first iteration
+        if not first_iteration and last_result is not None:
+            base_prompt += f"\n<last result>{last_result}</last result>"
+            
+        # Construct the tool documentation and context
+        base_prompt += """
+        <available functions>
+        # Get the weather at a location:
+        check_weather(location: str) -> str
+        
+        # Output a message and optionally get their response:
+        output_message(message: str, prompt_user_input: bool = False) -> str
+        </available functions>
+        """
+        
+        base_prompt += """
+        <agent instructions>
+            You are a ReAct agent. You run in a loop and use ONE of the available functions per iteration.
+            If the current query does not require a function call, just use output_message to say what you would normally say.
+            The result of that function call will be given to you in the next message.
+            Call one function at a time. Function arguments can be complex objects, long strings, etc. if needed.
+            The user cannot see the results of function calls, so you have to explicitly call output_message if you want them to see something.
+            You must always respond with a single line of python that calls one of the available tools.
+            Start by asking the user what they want.
+        </agent instructions>
+        
+        <example response>
+            check_weather("London")
+        </example response>
+            
+        <example response>
+            output_message(\"\"\"
+            How can I help you today?
+            \"\"\", True)
+        </example response>
+        """
+        
+        base_prompt += "\nOutput **ONLY THE CODE** and **NO MARKDOWN BACKTICKS**"
+            
+        # Add user message to history
+        # Remove the previous messages if they exist
+        # if len(chat_history) > 1:
+        #     chat_history.pop()  # Remove the last assistant message
+        #     chat_history.pop()  # Remove the last human message
+            
+        chat_history.append(HumanMessage(content=base_prompt))
+        
+        try:
+            # Get response from model
+            response = chat.invoke(chat_history)
+            
+            # # Print the code response
+            # print("\nAssistant generated code:")
+            # print(response.content)
+            
+            # Evaluate the code
+            # print("\nExecuting code:")
+            last_result = evaluate_response(response.content.strip())
+            # if last_result is not None:
+                # print(f"Result: {last_result}")
+            
+            # Add assistant response to history
+            chat_history.append(response)
+            
+            # Set first_iteration to False after the first loop
+            first_iteration = False
+            
+        except Exception as e:
+            print(f"\nError: {str(e)}")
+
+if __name__ == "__main__":
+    create_chat_interface()
\ No newline at end of file

From 83e094bba04f506106444531005a209fea265118 Mon Sep 17 00:00:00 2001
From: AI Christianson <ai.christianson@christianson.ai>
Date: Fri, 27 Dec 2024 18:56:47 -0500
Subject: [PATCH 02/15] initial working agent

---
 experiment/llm_test.py | 85 +++++++++++++++++++++++++++++++-----------
 1 file changed, 64 insertions(+), 21 deletions(-)

diff --git a/experiment/llm_test.py b/experiment/llm_test.py
index e7fc132..2afe8d2 100644
--- a/experiment/llm_test.py
+++ b/experiment/llm_test.py
@@ -1,21 +1,52 @@
 import os
 from dotenv import load_dotenv
 from langchain_openai import ChatOpenAI
+from langchain_core.tools import tool
 from langchain_core.messages import HumanMessage, SystemMessage
+from ra_aid.tools.list_directory import list_directory_tree
+from ra_aid.tool_configs import get_read_only_tools
+import inspect
 
 # Load environment variables
 load_dotenv()
 
+
+def get_function_info(func):
+    """
+    Returns a well-formatted string containing the function signature and docstring,
+    designed to be easily readable by both humans and LLMs.
+    """
+    # Get signature
+    signature = inspect.signature(func)
+    
+    # Get docstring - use getdoc to clean up indentation
+    docstring = inspect.getdoc(func)
+    if docstring is None:
+        docstring = "No docstring provided"
+        
+    # Format full signature including return type
+    full_signature = f"{func.__name__}{signature}"
+    
+    # Build the complete string
+    info = f"""{full_signature}
+\"\"\"
+{docstring}
+\"\"\"  """
+    
+    return info
+
+@tool
 def check_weather(location: str) -> str:
+    """
+    Gets the weather at the given location.
+    """
     return f"The weather in {location} is sunny!"
 
-def ask_human(query: str) -> str:
-    print()
-    print(f"Assistant: {query}")
-    user_input = input("\nYou: ").strip()
-    return user_input
-
+@tool
 def output_message(message: str, prompt_user_input: bool = False) -> str:
+    """
+    Outputs a message to the user, optionally prompting for input.
+    """
     print()
     print(f"Assistant: {message.strip()}")
     if prompt_user_input:
@@ -23,14 +54,21 @@ def output_message(message: str, prompt_user_input: bool = False) -> str:
         return user_input
     return ""
 
-def evaluate_response(code: str) -> any:
+def evaluate_response(code: str, tools: list) -> any:
     """
     Evaluates a single function call and returns its result
+    
+    Args:
+        code (str): The code to evaluate
+        tools (list): List of tool objects that have a .func property
+        
+    Returns:
+        any: Result of the code evaluation
     """
+    # Create globals dictionary from tool functions
     globals_dict = {
-        'check_weather': check_weather,
-        'ask_human': ask_human,
-        'output_message': output_message
+        tool.func.__name__: tool.func
+        for tool in tools
     }
     
     try:
@@ -57,6 +95,15 @@ def create_chat_interface():
     last_result = None
     first_iteration = True
     
+    tools = get_read_only_tools(True, True)
+    
+    tools.extend([output_message])
+    
+    available_functions = []
+    
+    for t in tools:
+        available_functions.append(get_function_info(t.func))
+    
     while True:
         base_prompt = ""
         
@@ -65,13 +112,9 @@ def create_chat_interface():
             base_prompt += f"\n<last result>{last_result}</last result>"
             
         # Construct the tool documentation and context
-        base_prompt += """
+        base_prompt += f"""
         <available functions>
-        # Get the weather at a location:
-        check_weather(location: str) -> str
-        
-        # Output a message and optionally get their response:
-        output_message(message: str, prompt_user_input: bool = False) -> str
+        {"\n\n".join(available_functions)}
         </available functions>
         """
         
@@ -112,14 +155,14 @@ def create_chat_interface():
             response = chat.invoke(chat_history)
             
             # # Print the code response
-            # print("\nAssistant generated code:")
-            # print(response.content)
+            print("\nAssistant generated code:")
+            print(response.content)
             
             # Evaluate the code
             # print("\nExecuting code:")
-            last_result = evaluate_response(response.content.strip())
-            # if last_result is not None:
-                # print(f"Result: {last_result}")
+            last_result = evaluate_response(response.content.strip(), tools)
+            if last_result is not None:
+                print(f"Result: {last_result}")
             
             # Add assistant response to history
             chat_history.append(response)

From 1198834261365ce105748a8fe7cdcd4cefced169 Mon Sep 17 00:00:00 2001
From: AI Christianson <ai.christianson@christianson.ai>
Date: Fri, 27 Dec 2024 19:54:04 -0500
Subject: [PATCH 03/15] agent

---
 experiment/llm_test.py | 39 +++++++++++++++++++++++++++++----------
 1 file changed, 29 insertions(+), 10 deletions(-)

diff --git a/experiment/llm_test.py b/experiment/llm_test.py
index 2afe8d2..ac090b0 100644
--- a/experiment/llm_test.py
+++ b/experiment/llm_test.py
@@ -6,6 +6,11 @@ from langchain_core.messages import HumanMessage, SystemMessage
 from ra_aid.tools.list_directory import list_directory_tree
 from ra_aid.tool_configs import get_read_only_tools
 import inspect
+from rich.panel import Panel
+from rich.markdown import Markdown
+from rich.console import Console
+
+console = Console()
 
 # Load environment variables
 load_dotenv()
@@ -48,9 +53,10 @@ def output_message(message: str, prompt_user_input: bool = False) -> str:
     Outputs a message to the user, optionally prompting for input.
     """
     print()
-    print(f"Assistant: {message.strip()}")
+    console.print(Panel(Markdown(message.strip())))
     if prompt_user_input:
-        user_input = input("\nYou: ").strip()
+        user_input = input("\n> ").strip()
+        print()
         return user_input
     return ""
 
@@ -76,16 +82,23 @@ def evaluate_response(code: str, tools: list) -> any:
         result = eval(code, globals_dict)
         return result
     except Exception as e:
+        print(f"Code:\n\n{code}\n\n")
+        print(f"Error executing code: {str(e)}")
         return f"Error executing code: {str(e)}"
 
 def create_chat_interface():
     # Initialize the chat model
     chat = ChatOpenAI(
-        api_key=os.getenv("OPENROUTER_API_KEY"),
-        temperature=0.3,
-        base_url="https://openrouter.ai/api/v1",
+        # api_key=os.getenv("OPENROUTER_API_KEY"),
+        api_key=os.getenv("DEEPSEEK_API_KEY"),
+        temperature=0.7 ,
+        # base_url="https://openrouter.ai/api/v1",
+        base_url="https://api.deepseek.com/v1",
         # model="deepseek/deepseek-chat"
-        model="qwen/qwen-2.5-coder-32b-instruct"
+        model="deepseek-chat"
+        # model="openai/gpt-4o-mini"
+        # model="qwen/qwen-2.5-coder-32b-instruct"
+        # model="qwen/qwen-2.5-72b-instruct"
     )
     
     # Chat loop
@@ -126,6 +139,7 @@ def create_chat_interface():
             Call one function at a time. Function arguments can be complex objects, long strings, etc. if needed.
             The user cannot see the results of function calls, so you have to explicitly call output_message if you want them to see something.
             You must always respond with a single line of python that calls one of the available tools.
+            Use as many steps as you need to in order to fully complete the task.
             Start by asking the user what they want.
         </agent instructions>
         
@@ -152,23 +166,28 @@ def create_chat_interface():
         
         try:
             # Get response from model
+            # print("PRECHAT")
             response = chat.invoke(chat_history)
+            # print("POSTCHAT")
             
             # # Print the code response
-            print("\nAssistant generated code:")
-            print(response.content)
+            # print("\nAssistant generated code:")
+            # print(response.content)
             
             # Evaluate the code
             # print("\nExecuting code:")
+            # print("PREEVAL")
             last_result = evaluate_response(response.content.strip(), tools)
-            if last_result is not None:
-                print(f"Result: {last_result}")
+            # print("POSTEVAL")
+            # if last_result is not None:
+            #     print(f"Result: {last_result}")
             
             # Add assistant response to history
             chat_history.append(response)
             
             # Set first_iteration to False after the first loop
             first_iteration = False
+            # print("LOOP")
             
         except Exception as e:
             print(f"\nError: {str(e)}")

From c70411744491b7936c4b602899958608c9a2d6e9 Mon Sep 17 00:00:00 2001
From: AI Christianson <ai.christianson@christianson.ai>
Date: Sat, 28 Dec 2024 14:02:52 -0500
Subject: [PATCH 04/15] create compatible class structure

---
 experiment/llm_test.py | 292 +++++++++++++++++++----------------------
 1 file changed, 135 insertions(+), 157 deletions(-)

diff --git a/experiment/llm_test.py b/experiment/llm_test.py
index ac090b0..4e37711 100644
--- a/experiment/llm_test.py
+++ b/experiment/llm_test.py
@@ -1,8 +1,11 @@
 import os
+import uuid
 from dotenv import load_dotenv
+from ra_aid.agent_utils import run_agent_with_retry
+from typing import Dict, Any, Generator, List, Optional
+from langchain_core.messages import AIMessage, HumanMessage
 from langchain_openai import ChatOpenAI
 from langchain_core.tools import tool
-from langchain_core.messages import HumanMessage, SystemMessage
 from ra_aid.tools.list_directory import list_directory_tree
 from ra_aid.tool_configs import get_read_only_tools
 import inspect
@@ -15,44 +18,14 @@ console = Console()
 # Load environment variables
 load_dotenv()
 
-
-def get_function_info(func):
-    """
-    Returns a well-formatted string containing the function signature and docstring,
-    designed to be easily readable by both humans and LLMs.
-    """
-    # Get signature
-    signature = inspect.signature(func)
-    
-    # Get docstring - use getdoc to clean up indentation
-    docstring = inspect.getdoc(func)
-    if docstring is None:
-        docstring = "No docstring provided"
-        
-    # Format full signature including return type
-    full_signature = f"{func.__name__}{signature}"
-    
-    # Build the complete string
-    info = f"""{full_signature}
-\"\"\"
-{docstring}
-\"\"\"  """
-    
-    return info
-
 @tool
 def check_weather(location: str) -> str:
-    """
-    Gets the weather at the given location.
-    """
+    """Gets the weather at the given location."""
     return f"The weather in {location} is sunny!"
 
 @tool
 def output_message(message: str, prompt_user_input: bool = False) -> str:
-    """
-    Outputs a message to the user, optionally prompting for input.
-    """
-    print()
+    """Outputs a message to the user, optionally prompting for input."""
     console.print(Panel(Markdown(message.strip())))
     if prompt_user_input:
         user_input = input("\n> ").strip()
@@ -60,137 +33,142 @@ def output_message(message: str, prompt_user_input: bool = False) -> str:
         return user_input
     return ""
 
-def evaluate_response(code: str, tools: list) -> any:
-    """
-    Evaluates a single function call and returns its result
-    
-    Args:
-        code (str): The code to evaluate
-        tools (list): List of tool objects that have a .func property
-        
-    Returns:
-        any: Result of the code evaluation
-    """
-    # Create globals dictionary from tool functions
-    globals_dict = {
-        tool.func.__name__: tool.func
-        for tool in tools
-    }
-    
-    try:
-        # Using eval() instead of exec() since we're evaluating a single expression
-        result = eval(code, globals_dict)
-        return result
-    except Exception as e:
-        print(f"Code:\n\n{code}\n\n")
-        print(f"Error executing code: {str(e)}")
-        return f"Error executing code: {str(e)}"
+class CiaynAgent:
+    def get_function_info(self, func):
+        """
+        Returns a well-formatted string containing the function signature and docstring,
+        designed to be easily readable by both humans and LLMs.
+        """
+        signature = inspect.signature(func)
+        docstring = inspect.getdoc(func)
+        if docstring is None:
+            docstring = "No docstring provided"
+        full_signature = f"{func.__name__}{signature}"
+        info = f"""{full_signature}
+\"\"\"
+{docstring}
+\"\"\"  """
+        return info
 
-def create_chat_interface():
-    # Initialize the chat model
-    chat = ChatOpenAI(
-        # api_key=os.getenv("OPENROUTER_API_KEY"),
-        api_key=os.getenv("DEEPSEEK_API_KEY"),
-        temperature=0.7 ,
-        # base_url="https://openrouter.ai/api/v1",
-        base_url="https://api.deepseek.com/v1",
-        # model="deepseek/deepseek-chat"
-        model="deepseek-chat"
-        # model="openai/gpt-4o-mini"
-        # model="qwen/qwen-2.5-coder-32b-instruct"
-        # model="qwen/qwen-2.5-72b-instruct"
-    )
-    
-    # Chat loop
-    print("Welcome to the Chat Interface! (Type 'quit' to exit)")
-    
-    chat_history = []
-    last_result = None
-    first_iteration = True
-    
-    tools = get_read_only_tools(True, True)
-    
-    tools.extend([output_message])
-    
-    available_functions = []
-    
-    for t in tools:
-        available_functions.append(get_function_info(t.func))
-    
-    while True:
+    def __init__(self, model, tools: list):
+        """Initialize the agent with a model and list of tools."""
+        self.model = model
+        self.tools = tools
+        self.available_functions = []
+        for t in tools:
+            self.available_functions.append(self.get_function_info(t.func))
+
+    def _build_prompt(self, last_result: Optional[str] = None) -> str:
+        """Build the prompt for the agent including available tools and context."""
         base_prompt = ""
-        
-        # Add the last result to the prompt if it's not the first iteration
-        if not first_iteration and last_result is not None:
+        if last_result is not None:
             base_prompt += f"\n<last result>{last_result}</last result>"
             
-        # Construct the tool documentation and context
         base_prompt += f"""
-        <available functions>
-        {"\n\n".join(available_functions)}
-        </available functions>
-        """
-        
-        base_prompt += """
-        <agent instructions>
-            You are a ReAct agent. You run in a loop and use ONE of the available functions per iteration.
-            If the current query does not require a function call, just use output_message to say what you would normally say.
-            The result of that function call will be given to you in the next message.
-            Call one function at a time. Function arguments can be complex objects, long strings, etc. if needed.
-            The user cannot see the results of function calls, so you have to explicitly call output_message if you want them to see something.
-            You must always respond with a single line of python that calls one of the available tools.
-            Use as many steps as you need to in order to fully complete the task.
-            Start by asking the user what they want.
-        </agent instructions>
-        
-        <example response>
-            check_weather("London")
-        </example response>
-            
-        <example response>
-            output_message(\"\"\"
-            How can I help you today?
-            \"\"\", True)
-        </example response>
-        """
-        
-        base_prompt += "\nOutput **ONLY THE CODE** and **NO MARKDOWN BACKTICKS**"
-            
-        # Add user message to history
-        # Remove the previous messages if they exist
-        # if len(chat_history) > 1:
-        #     chat_history.pop()  # Remove the last assistant message
-        #     chat_history.pop()  # Remove the last human message
-            
-        chat_history.append(HumanMessage(content=base_prompt))
+<available functions>
+{"\n\n".join(self.available_functions)}
+</available functions>
+
+<agent instructions>
+You are a ReAct agent. You run in a loop and use ONE of the available functions per iteration.
+If the current query does not require a function call, just use output_message to say what you would normally say.
+The result of that function call will be given to you in the next message.
+Call one function at a time. Function arguments can be complex objects, long strings, etc. if needed.
+The user cannot see the results of function calls, so you have to explicitly call output_message if you want them to see something.
+You must always respond with a single line of python that calls one of the available tools.
+Use as many steps as you need to in order to fully complete the task.
+Start by asking the user what they want.
+</agent instructions>
+
+<example response>
+check_weather("London")
+</example response>
+    
+<example response>
+output_message(\"\"\"How can I help you today?\"\"\", True)
+</example response>
+
+Output **ONLY THE CODE** and **NO MARKDOWN BACKTICKS**"""
+        return base_prompt
+
+    def _execute_tool(self, code: str) -> str:
+        """Execute a tool call and return its result."""
+        globals_dict = {
+            tool.func.__name__: tool.func
+            for tool in self.tools
+        }
         
         try:
-            # Get response from model
-            # print("PRECHAT")
-            response = chat.invoke(chat_history)
-            # print("POSTCHAT")
-            
-            # # Print the code response
-            # print("\nAssistant generated code:")
-            # print(response.content)
-            
-            # Evaluate the code
-            # print("\nExecuting code:")
-            # print("PREEVAL")
-            last_result = evaluate_response(response.content.strip(), tools)
-            # print("POSTEVAL")
-            # if last_result is not None:
-            #     print(f"Result: {last_result}")
-            
-            # Add assistant response to history
-            chat_history.append(response)
-            
-            # Set first_iteration to False after the first loop
-            first_iteration = False
-            # print("LOOP")
-            
+            result = eval(code.strip(), globals_dict)
+            return result
         except Exception as e:
-            print(f"\nError: {str(e)}")
+            error_msg = f"Error executing code: {str(e)}"
+            console.print(f"[red]Error:[/red] {error_msg}")
+            return error_msg
+
+
+    def _create_agent_chunk(self, content: str) -> Dict[str, Any]:
+        """Create an agent chunk in the format expected by print_agent_output."""
+        return {
+            "agent": {
+                "messages": [AIMessage(content=content)]
+            }
+        }
+
+    def _create_error_chunk(self, content: str) -> Dict[str, Any]:
+        """Create an error chunk in the format expected by print_agent_output."""
+        return {
+            "tools": {
+                "messages": [{"status": "error", "content": content}]
+            }
+        }
+
+    def stream(self, messages_dict: Dict[str, List[Any]], config: Dict[str, Any] = None) -> Generator[Dict[str, Any], None, None]:
+        """Stream agent responses in a format compatible with print_agent_output."""
+        initial_messages = messages_dict.get("messages", [])
+        chat_history = []
+        last_result = None
+        first_iteration = True
+        
+        while True:
+            base_prompt = self._build_prompt(None if first_iteration else last_result)
+            chat_history.append(HumanMessage(content=base_prompt))
+            
+            try:
+                full_history = initial_messages + chat_history
+                response = self.model.invoke(full_history)
+                
+                last_result = self._execute_tool(response.content)
+                chat_history.append(response)
+                first_iteration = False
+                yield {}
+                
+            except Exception as e:
+                error_msg = f"Error: {str(e)}"
+                yield self._create_error_chunk(error_msg)
+                break
 
 if __name__ == "__main__":
-    create_chat_interface()
\ No newline at end of file
+    # Initialize the chat model
+    chat = ChatOpenAI(
+        api_key=os.getenv("OPENROUTER_API_KEY"),
+        temperature=0.7,
+        base_url="https://openrouter.ai/api/v1",
+        model="qwen/qwen-2.5-coder-32b-instruct"
+    )
+    
+    # Get tools
+    tools = get_read_only_tools(True, True)
+    tools.append(output_message)
+    
+    # Initialize agent
+    agent = CiaynAgent(chat, tools)
+    
+    # Test chat prompt
+    test_prompt = "Find the tests in this codebase."
+
+    # Run the agent using run_agent_with_retry
+    result = run_agent_with_retry(agent, test_prompt, {"configurable": {"thread_id": str(uuid.uuid4())}})
+    
+    # Initial greeting
+    print("Welcome to the Chat Interface! (Type 'quit' to exit)")

From d8a3c88624a1be265f31f50218e7bfb212429fe9 Mon Sep 17 00:00:00 2001
From: AI Christianson <ai.christianson@christianson.ai>
Date: Sat, 28 Dec 2024 14:31:14 -0500
Subject: [PATCH 05/15] add ciayn agent

---
 ra_aid/agents/ciayn_agent.py | 122 +++++++++++++++++++++++++++++++++++
 1 file changed, 122 insertions(+)
 create mode 100644 ra_aid/agents/ciayn_agent.py

diff --git a/ra_aid/agents/ciayn_agent.py b/ra_aid/agents/ciayn_agent.py
new file mode 100644
index 0000000..2104b1b
--- /dev/null
+++ b/ra_aid/agents/ciayn_agent.py
@@ -0,0 +1,122 @@
+import inspect
+from typing import Dict, Any, Generator, List, Optional
+from langchain_core.messages import AIMessage, HumanMessage
+from rich.panel import Panel
+from rich.markdown import Markdown
+from rich.console import Console
+
+console = Console()
+
+class CiaynAgent:
+    def _get_function_info(self, func):
+        """
+        Returns a well-formatted string containing the function signature and docstring,
+        designed to be easily readable by both humans and LLMs.
+        """
+        signature = inspect.signature(func)
+        docstring = inspect.getdoc(func)
+        if docstring is None:
+            docstring = "No docstring provided"
+        full_signature = f"{func.__name__}{signature}"
+        info = f"""{full_signature}
+\"\"\"
+{docstring}
+\"\"\""""
+        return info
+
+    def __init__(self, model, tools: list):
+        """Initialize the agent with a model and list of tools."""
+        self.model = model
+        self.tools = tools
+        self.available_functions = []
+        for t in tools:
+            self.available_functions.append(self._get_function_info(t.func))
+
+    def _build_prompt(self, last_result: Optional[str] = None) -> str:
+        """Build the prompt for the agent including available tools and context."""
+        base_prompt = ""
+        if last_result is not None:
+            base_prompt += f"\n<last result>{last_result}</last result>"
+            
+        base_prompt += f"""
+<available functions>
+{"\n\n".join(self.available_functions)}
+</available functions>
+
+<agent instructions>
+You are a ReAct agent. You run in a loop and use ONE of the available functions per iteration.
+If the current query does not require a function call, just use output_message to say what you would normally say.
+The result of that function call will be given to you in the next message.
+Call one function at a time. Function arguments can be complex objects, long strings, etc. if needed.
+The user cannot see the results of function calls, so you have to explicitly call output_message if you want them to see something.
+You must always respond with a single line of python that calls one of the available tools.
+Use as many steps as you need to in order to fully complete the task.
+Start by asking the user what they want.
+</agent instructions>
+
+<example response>
+check_weather("London")
+</example response>
+    
+<example response>
+output_message(\"\"\"How can I help you today?\"\"\", True)
+</example response>
+
+Output **ONLY THE CODE** and **NO MARKDOWN BACKTICKS**"""
+        return base_prompt
+
+    def _execute_tool(self, code: str) -> str:
+        """Execute a tool call and return its result."""
+        globals_dict = {
+            tool.func.__name__: tool.func
+            for tool in self.tools
+        }
+        
+        try:
+            result = eval(code.strip(), globals_dict)
+            return result
+        except Exception as e:
+            error_msg = f"Error executing code: {str(e)}"
+            console.print(f"[red]Error:[/red] {error_msg}")
+            return error_msg
+
+    def _create_agent_chunk(self, content: str) -> Dict[str, Any]:
+        """Create an agent chunk in the format expected by print_agent_output."""
+        return {
+            "agent": {
+                "messages": [AIMessage(content=content)]
+            }
+        }
+
+    def _create_error_chunk(self, content: str) -> Dict[str, Any]:
+        """Create an error chunk in the format expected by print_agent_output."""
+        return {
+            "tools": {
+                "messages": [{"status": "error", "content": content}]
+            }
+        }
+
+    def stream(self, messages_dict: Dict[str, List[Any]], config: Dict[str, Any] = None) -> Generator[Dict[str, Any], None, None]:
+        """Stream agent responses in a format compatible with print_agent_output."""
+        initial_messages = messages_dict.get("messages", [])
+        chat_history = []
+        last_result = None
+        first_iteration = True
+        
+        while True:
+            base_prompt = self._build_prompt(None if first_iteration else last_result)
+            chat_history.append(HumanMessage(content=base_prompt))
+            
+            try:
+                full_history = initial_messages + chat_history
+                response = self.model.invoke(full_history)
+                
+                last_result = self._execute_tool(response.content)
+                chat_history.append(response)
+                first_iteration = False
+                yield {}
+                
+            except Exception as e:
+                error_msg = f"Error: {str(e)}"
+                yield self._create_error_chunk(error_msg)
+                break

From 535be97c1f7c8d03002ee2b6d153463f76d341ff Mon Sep 17 00:00:00 2001
From: AI Christianson <ai.christianson@christianson.ai>
Date: Sat, 28 Dec 2024 14:41:39 -0500
Subject: [PATCH 06/15] ciayn

---
 ra_aid/agents/ciayn_agent.py | 21 ++++++++-------------
 ra_aid/exceptions.py         |  9 +++++++++
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/ra_aid/agents/ciayn_agent.py b/ra_aid/agents/ciayn_agent.py
index 2104b1b..3d15e77 100644
--- a/ra_aid/agents/ciayn_agent.py
+++ b/ra_aid/agents/ciayn_agent.py
@@ -1,11 +1,7 @@
 import inspect
 from typing import Dict, Any, Generator, List, Optional
 from langchain_core.messages import AIMessage, HumanMessage
-from rich.panel import Panel
-from rich.markdown import Markdown
-from rich.console import Console
-
-console = Console()
+from ra_aid.exceptions import ToolExecutionError
 
 class CiaynAgent:
     def _get_function_info(self, func):
@@ -78,7 +74,7 @@ Output **ONLY THE CODE** and **NO MARKDOWN BACKTICKS**"""
         except Exception as e:
             error_msg = f"Error executing code: {str(e)}"
             console.print(f"[red]Error:[/red] {error_msg}")
-            return error_msg
+            raise ToolExecutionError(error_msg)
 
     def _create_agent_chunk(self, content: str) -> Dict[str, Any]:
         """Create an agent chunk in the format expected by print_agent_output."""
@@ -107,16 +103,15 @@ Output **ONLY THE CODE** and **NO MARKDOWN BACKTICKS**"""
             base_prompt = self._build_prompt(None if first_iteration else last_result)
             chat_history.append(HumanMessage(content=base_prompt))
             
-            try:
-                full_history = initial_messages + chat_history
-                response = self.model.invoke(full_history)
+            full_history = initial_messages + chat_history
+            response = self.model.invoke(full_history)
                 
+            try:
                 last_result = self._execute_tool(response.content)
                 chat_history.append(response)
                 first_iteration = False
                 yield {}
-                
-            except Exception as e:
-                error_msg = f"Error: {str(e)}"
-                yield self._create_error_chunk(error_msg)
+
+            except ToolExecutionError as e:
+                yield self._create_error_chunk(str(e))
                 break
diff --git a/ra_aid/exceptions.py b/ra_aid/exceptions.py
index 2b9c0b7..9831a24 100644
--- a/ra_aid/exceptions.py
+++ b/ra_aid/exceptions.py
@@ -7,3 +7,12 @@ class AgentInterrupt(Exception):
     separate from KeyboardInterrupt which is reserved for top-level handling.
     """
     pass
+
+
+class ToolExecutionError(Exception):
+    """Exception raised when a tool execution fails.
+    
+    This exception is used to distinguish tool execution failures
+    from other types of errors in the agent system.
+    """
+    pass

From 9074cae2f58fb4d4dcdec49a951a038cfbeda0c9 Mon Sep 17 00:00:00 2001
From: AI Christianson <ai.christianson@christianson.ai>
Date: Sat, 28 Dec 2024 14:47:34 -0500
Subject: [PATCH 07/15] ciayn

---
 ra_aid/agents/ciayn_agent.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ra_aid/agents/ciayn_agent.py b/ra_aid/agents/ciayn_agent.py
index 3d15e77..0c1b85d 100644
--- a/ra_aid/agents/ciayn_agent.py
+++ b/ra_aid/agents/ciayn_agent.py
@@ -73,7 +73,6 @@ Output **ONLY THE CODE** and **NO MARKDOWN BACKTICKS**"""
             return result
         except Exception as e:
             error_msg = f"Error executing code: {str(e)}"
-            console.print(f"[red]Error:[/red] {error_msg}")
             raise ToolExecutionError(error_msg)
 
     def _create_agent_chunk(self, content: str) -> Dict[str, Any]:

From ac2bdfd69bc9cf19152430f637e52de6f1ee0117 Mon Sep 17 00:00:00 2001
From: AI Christianson <ai.christianson@christianson.ai>
Date: Sat, 28 Dec 2024 14:54:31 -0500
Subject: [PATCH 08/15] ciayn

---
 ra_aid/agents/ciayn_agent.py | 32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/ra_aid/agents/ciayn_agent.py b/ra_aid/agents/ciayn_agent.py
index 0c1b85d..a9cde81 100644
--- a/ra_aid/agents/ciayn_agent.py
+++ b/ra_aid/agents/ciayn_agent.py
@@ -20,10 +20,17 @@ class CiaynAgent:
 \"\"\""""
         return info
 
-    def __init__(self, model, tools: list):
-        """Initialize the agent with a model and list of tools."""
+    def __init__(self, model, tools: list, max_history_messages: int = 50):
+        """Initialize the agent with a model and list of tools.
+        
+        Args:
+            model: The language model to use
+            tools: List of tools available to the agent
+            max_history_messages: Maximum number of messages to keep in chat history
+        """
         self.model = model
         self.tools = tools
+        self.max_history_messages = max_history_messages
         self.available_functions = []
         for t in tools:
             self.available_functions.append(self._get_function_info(t.func))
@@ -91,6 +98,25 @@ Output **ONLY THE CODE** and **NO MARKDOWN BACKTICKS**"""
             }
         }
 
+    def _trim_chat_history(self, initial_messages: List[Any], chat_history: List[Any]) -> List[Any]:
+        """Trim chat history to maximum length while preserving initial messages.
+        
+        Only trims the chat_history portion while preserving all initial messages.
+        Returns the concatenated list of initial_messages + trimmed chat_history.
+        
+        Args:
+            initial_messages: List of initial messages to preserve
+            chat_history: List of chat messages that may be trimmed
+            
+        Returns:
+            List[Any]: Concatenated initial_messages + trimmed chat_history
+        """
+        if len(chat_history) <= self.max_history_messages:
+            return initial_messages + chat_history
+            
+        # Keep last max_history_messages from chat_history
+        return initial_messages + chat_history[-self.max_history_messages:]
+
     def stream(self, messages_dict: Dict[str, List[Any]], config: Dict[str, Any] = None) -> Generator[Dict[str, Any], None, None]:
         """Stream agent responses in a format compatible with print_agent_output."""
         initial_messages = messages_dict.get("messages", [])
@@ -102,7 +128,7 @@ Output **ONLY THE CODE** and **NO MARKDOWN BACKTICKS**"""
             base_prompt = self._build_prompt(None if first_iteration else last_result)
             chat_history.append(HumanMessage(content=base_prompt))
             
-            full_history = initial_messages + chat_history
+            full_history = self._trim_chat_history(initial_messages, chat_history)
             response = self.model.invoke(full_history)
                 
             try:

From 4cb98370c20d7fcd53b6ba7bd568ee86b46eed30 Mon Sep 17 00:00:00 2001
From: AI Christianson <ai.christianson@christianson.ai>
Date: Sat, 28 Dec 2024 14:58:16 -0500
Subject: [PATCH 09/15] ciayn

---
 tests/ra_aid/agents/test_ciayn_agent.py | 100 ++++++++++++++++++++++++
 1 file changed, 100 insertions(+)
 create mode 100644 tests/ra_aid/agents/test_ciayn_agent.py

diff --git a/tests/ra_aid/agents/test_ciayn_agent.py b/tests/ra_aid/agents/test_ciayn_agent.py
new file mode 100644
index 0000000..2a3bae1
--- /dev/null
+++ b/tests/ra_aid/agents/test_ciayn_agent.py
@@ -0,0 +1,100 @@
+import pytest
+from unittest.mock import Mock, patch
+from langchain_core.messages import HumanMessage, AIMessage
+from ra_aid.agents.ciayn_agent import CiaynAgent
+
+@pytest.fixture
+def mock_model():
+    """Create a mock language model."""
+    model = Mock()
+    model.invoke = Mock()
+    return model
+
+@pytest.fixture
+def agent(mock_model):
+    """Create a CiaynAgent instance with mock model."""
+    tools = []  # Empty tools list for testing trimming functionality
+    return CiaynAgent(mock_model, tools, max_history_messages=3)
+
+def test_trim_chat_history_preserves_initial_messages(agent):
+    """Test that initial messages are preserved during trimming."""
+    initial_messages = [
+        HumanMessage(content="Initial 1"),
+        AIMessage(content="Initial 2")
+    ]
+    chat_history = [
+        HumanMessage(content="Chat 1"),
+        AIMessage(content="Chat 2"),
+        HumanMessage(content="Chat 3"),
+        AIMessage(content="Chat 4")
+    ]
+    
+    result = agent._trim_chat_history(initial_messages, chat_history)
+    
+    # Verify initial messages are preserved
+    assert result[:2] == initial_messages
+    # Verify only last 3 chat messages are kept (due to max_history_messages=3)
+    assert len(result[2:]) == 3
+    assert result[2:] == chat_history[-3:]
+
+def test_trim_chat_history_under_limit(agent):
+    """Test trimming when chat history is under the maximum limit."""
+    initial_messages = [HumanMessage(content="Initial")]
+    chat_history = [
+        HumanMessage(content="Chat 1"),
+        AIMessage(content="Chat 2")
+    ]
+    
+    result = agent._trim_chat_history(initial_messages, chat_history)
+    
+    # Verify no trimming occurred
+    assert len(result) == 3
+    assert result == initial_messages + chat_history
+
+def test_trim_chat_history_over_limit(agent):
+    """Test trimming when chat history exceeds the maximum limit."""
+    initial_messages = [HumanMessage(content="Initial")]
+    chat_history = [
+        HumanMessage(content="Chat 1"),
+        AIMessage(content="Chat 2"),
+        HumanMessage(content="Chat 3"),
+        AIMessage(content="Chat 4"),
+        HumanMessage(content="Chat 5")
+    ]
+    
+    result = agent._trim_chat_history(initial_messages, chat_history)
+    
+    # Verify correct trimming
+    assert len(result) == 4  # initial + max_history_messages
+    assert result[0] == initial_messages[0]  # Initial message preserved
+    assert result[1:] == chat_history[-3:]  # Last 3 chat messages kept
+
+def test_trim_chat_history_empty_initial(agent):
+    """Test trimming with empty initial messages."""
+    initial_messages = []
+    chat_history = [
+        HumanMessage(content="Chat 1"),
+        AIMessage(content="Chat 2"),
+        HumanMessage(content="Chat 3"),
+        AIMessage(content="Chat 4")
+    ]
+    
+    result = agent._trim_chat_history(initial_messages, chat_history)
+    
+    # Verify only last 3 messages are kept
+    assert len(result) == 3
+    assert result == chat_history[-3:]
+
+def test_trim_chat_history_empty_chat(agent):
+    """Test trimming with empty chat history."""
+    initial_messages = [
+        HumanMessage(content="Initial 1"),
+        AIMessage(content="Initial 2")
+    ]
+    chat_history = []
+    
+    result = agent._trim_chat_history(initial_messages, chat_history)
+    
+    # Verify initial messages are preserved and no trimming occurred
+    assert result == initial_messages
+    assert len(result) == 2

From 6f10db811e128b3213d38d1f015a7ddb995ef974 Mon Sep 17 00:00:00 2001
From: AI Christianson <ai.christianson@christianson.ai>
Date: Sat, 28 Dec 2024 15:09:32 -0500
Subject: [PATCH 10/15] ciayn

---
 ra_aid/agents/ciayn_agent.py            | 61 +++++++++++++++++++++----
 tests/ra_aid/agents/test_ciayn_agent.py | 56 +++++++++++++++++++++++
 tests/ra_aid/test_llm.py                | 17 +++++++
 3 files changed, 124 insertions(+), 10 deletions(-)

diff --git a/ra_aid/agents/ciayn_agent.py b/ra_aid/agents/ciayn_agent.py
index a9cde81..8f21f35 100644
--- a/ra_aid/agents/ciayn_agent.py
+++ b/ra_aid/agents/ciayn_agent.py
@@ -1,6 +1,6 @@
 import inspect
-from typing import Dict, Any, Generator, List, Optional
-from langchain_core.messages import AIMessage, HumanMessage
+from typing import Dict, Any, Generator, List, Optional, Union
+from langchain_core.messages import AIMessage, HumanMessage, BaseMessage
 from ra_aid.exceptions import ToolExecutionError
 
 class CiaynAgent:
@@ -20,17 +20,19 @@ class CiaynAgent:
 \"\"\""""
         return info
 
-    def __init__(self, model, tools: list, max_history_messages: int = 50):
+    def __init__(self, model, tools: list, max_history_messages: int = 50, max_tokens: Optional[int] = None):
         """Initialize the agent with a model and list of tools.
         
         Args:
             model: The language model to use
             tools: List of tools available to the agent
             max_history_messages: Maximum number of messages to keep in chat history
+            max_tokens: Maximum number of tokens allowed in message history (None for no limit)
         """
         self.model = model
         self.tools = tools
         self.max_history_messages = max_history_messages
+        self.max_tokens = max_tokens
         self.available_functions = []
         for t in tools:
             self.available_functions.append(self._get_function_info(t.func))
@@ -98,11 +100,36 @@ Output **ONLY THE CODE** and **NO MARKDOWN BACKTICKS**"""
             }
         }
 
-    def _trim_chat_history(self, initial_messages: List[Any], chat_history: List[Any]) -> List[Any]:
-        """Trim chat history to maximum length while preserving initial messages.
+    @staticmethod
+    def _estimate_tokens(content: Optional[Union[str, BaseMessage]]) -> int:
+        """Estimate number of tokens in content using simple byte length heuristic.
         
-        Only trims the chat_history portion while preserving all initial messages.
-        Returns the concatenated list of initial_messages + trimmed chat_history.
+        Estimates 1 token per 4 bytes of content. For messages, uses the content field.
+        
+        Args:
+            content: String content or Message object to estimate tokens for
+            
+        Returns:
+            int: Estimated number of tokens, 0 if content is None/empty
+        """
+        if content is None:
+            return 0
+            
+        if isinstance(content, BaseMessage):
+            text = content.content
+        else:
+            text = content
+            
+        if not text:
+            return 0
+            
+        return len(text.encode('utf-8')) // 4
+
+    def _trim_chat_history(self, initial_messages: List[Any], chat_history: List[Any]) -> List[Any]:
+        """Trim chat history based on message count and token limits while preserving initial messages.
+        
+        Applies both message count and token limits (if configured) to chat_history,
+        while preserving all initial_messages. Returns concatenated result.
         
         Args:
             initial_messages: List of initial messages to preserve
@@ -111,11 +138,25 @@ Output **ONLY THE CODE** and **NO MARKDOWN BACKTICKS**"""
         Returns:
             List[Any]: Concatenated initial_messages + trimmed chat_history
         """
-        if len(chat_history) <= self.max_history_messages:
+        # First apply message count limit
+        if len(chat_history) > self.max_history_messages:
+            chat_history = chat_history[-self.max_history_messages:]
+            
+        # Skip token limiting if max_tokens is None
+        if self.max_tokens is None:
             return initial_messages + chat_history
             
-        # Keep last max_history_messages from chat_history
-        return initial_messages + chat_history[-self.max_history_messages:]
+        # Calculate initial messages token count
+        initial_tokens = sum(self._estimate_tokens(msg) for msg in initial_messages)
+        
+        # Remove messages from start of chat_history until under token limit
+        while chat_history:
+            total_tokens = initial_tokens + sum(self._estimate_tokens(msg) for msg in chat_history)
+            if total_tokens <= self.max_tokens:
+                break
+            chat_history.pop(0)
+            
+        return initial_messages + chat_history
 
     def stream(self, messages_dict: Dict[str, List[Any]], config: Dict[str, Any] = None) -> Generator[Dict[str, Any], None, None]:
         """Stream agent responses in a format compatible with print_agent_output."""
diff --git a/tests/ra_aid/agents/test_ciayn_agent.py b/tests/ra_aid/agents/test_ciayn_agent.py
index 2a3bae1..71bc1a8 100644
--- a/tests/ra_aid/agents/test_ciayn_agent.py
+++ b/tests/ra_aid/agents/test_ciayn_agent.py
@@ -98,3 +98,59 @@ def test_trim_chat_history_empty_chat(agent):
     # Verify initial messages are preserved and no trimming occurred
     assert result == initial_messages
     assert len(result) == 2
+
+def test_trim_chat_history_token_limit():
+    """Test trimming based on token limit."""
+    agent = CiaynAgent(Mock(), [], max_history_messages=10, max_tokens=20)
+    
+    initial_messages = [HumanMessage(content="Initial")] # ~2 tokens
+    chat_history = [
+        HumanMessage(content="A" * 40),  # ~10 tokens
+        AIMessage(content="B" * 40),     # ~10 tokens
+        HumanMessage(content="C" * 40)   # ~10 tokens
+    ]
+    
+    result = agent._trim_chat_history(initial_messages, chat_history)
+    
+    # Should keep initial message (~2 tokens) and last message (~10 tokens)
+    assert len(result) == 2
+    assert result[0] == initial_messages[0]
+    assert result[1] == chat_history[-1]
+
+def test_trim_chat_history_no_token_limit():
+    """Test trimming with no token limit set."""
+    agent = CiaynAgent(Mock(), [], max_history_messages=2, max_tokens=None)
+    
+    initial_messages = [HumanMessage(content="Initial")]
+    chat_history = [
+        HumanMessage(content="A" * 1000),
+        AIMessage(content="B" * 1000),
+        HumanMessage(content="C" * 1000)
+    ]
+    
+    result = agent._trim_chat_history(initial_messages, chat_history)
+    
+    # Should keep initial message and last 2 messages (max_history_messages=2)
+    assert len(result) == 3
+    assert result[0] == initial_messages[0]
+    assert result[1:] == chat_history[-2:]
+
+def test_trim_chat_history_both_limits():
+    """Test trimming with both message count and token limits."""
+    agent = CiaynAgent(Mock(), [], max_history_messages=3, max_tokens=15)
+    
+    initial_messages = [HumanMessage(content="Init")] # ~1 token
+    chat_history = [
+        HumanMessage(content="A" * 40),  # ~10 tokens
+        AIMessage(content="B" * 40),     # ~10 tokens
+        HumanMessage(content="C" * 40),  # ~10 tokens
+        AIMessage(content="D" * 40)      # ~10 tokens
+    ]
+    
+    result = agent._trim_chat_history(initial_messages, chat_history)
+    
+    # Should first apply message limit (keeping last 3)
+    # Then token limit should further reduce to fit under 15 tokens
+    assert len(result) == 2  # Initial message + 1 message under token limit
+    assert result[0] == initial_messages[0]
+    assert result[1] == chat_history[-1]
diff --git a/tests/ra_aid/test_llm.py b/tests/ra_aid/test_llm.py
index ee5197d..9ff011f 100644
--- a/tests/ra_aid/test_llm.py
+++ b/tests/ra_aid/test_llm.py
@@ -3,7 +3,9 @@ import pytest
 from unittest.mock import patch, Mock
 from langchain_openai.chat_models import ChatOpenAI
 from langchain_anthropic.chat_models import ChatAnthropic
+from langchain_core.messages import HumanMessage
 from dataclasses import dataclass
+from ra_aid.agents.ciayn_agent import CiaynAgent
 
 from ra_aid.env import validate_environment
 from ra_aid.llm import initialize_llm, initialize_expert_llm
@@ -87,6 +89,21 @@ def test_initialize_expert_unsupported_provider(clean_env):
     with pytest.raises(ValueError, match=r"Unsupported provider: unknown"):
         initialize_expert_llm("unknown", "model")
 
+def test_estimate_tokens():
+    """Test token estimation functionality."""
+    # Test empty/None cases
+    assert CiaynAgent._estimate_tokens(None) == 0
+    assert CiaynAgent._estimate_tokens('') == 0
+    
+    # Test string content
+    assert CiaynAgent._estimate_tokens('test') == 1  # 4 bytes
+    assert CiaynAgent._estimate_tokens('hello world') == 2  # 11 bytes
+    assert CiaynAgent._estimate_tokens('🚀') == 1  # 4 bytes
+    
+    # Test message content
+    msg = HumanMessage(content='test message')
+    assert CiaynAgent._estimate_tokens(msg) == 3  # 11 bytes
+
 def test_initialize_openai(clean_env, mock_openai):
     """Test OpenAI provider initialization"""
     os.environ["OPENAI_API_KEY"] = "test-key"

From 5c5df6569469757b78c026146f841e60aecb864c Mon Sep 17 00:00:00 2001
From: AI Christianson <ai.christianson@christianson.ai>
Date: Sat, 28 Dec 2024 15:11:43 -0500
Subject: [PATCH 11/15] ciayn

---
 experiment/llm_test.py       | 119 +----------------------------------
 ra_aid/agents/ciayn_agent.py |   2 +-
 2 files changed, 2 insertions(+), 119 deletions(-)

diff --git a/experiment/llm_test.py b/experiment/llm_test.py
index 4e37711..90115b0 100644
--- a/experiment/llm_test.py
+++ b/experiment/llm_test.py
@@ -2,16 +2,14 @@ import os
 import uuid
 from dotenv import load_dotenv
 from ra_aid.agent_utils import run_agent_with_retry
-from typing import Dict, Any, Generator, List, Optional
-from langchain_core.messages import AIMessage, HumanMessage
 from langchain_openai import ChatOpenAI
 from langchain_core.tools import tool
 from ra_aid.tools.list_directory import list_directory_tree
 from ra_aid.tool_configs import get_read_only_tools
-import inspect
 from rich.panel import Panel
 from rich.markdown import Markdown
 from rich.console import Console
+from ra_aid.agents.ciayn_agent import CiaynAgent
 
 console = Console()
 
@@ -33,121 +31,6 @@ def output_message(message: str, prompt_user_input: bool = False) -> str:
         return user_input
     return ""
 
-class CiaynAgent:
-    def get_function_info(self, func):
-        """
-        Returns a well-formatted string containing the function signature and docstring,
-        designed to be easily readable by both humans and LLMs.
-        """
-        signature = inspect.signature(func)
-        docstring = inspect.getdoc(func)
-        if docstring is None:
-            docstring = "No docstring provided"
-        full_signature = f"{func.__name__}{signature}"
-        info = f"""{full_signature}
-\"\"\"
-{docstring}
-\"\"\"  """
-        return info
-
-    def __init__(self, model, tools: list):
-        """Initialize the agent with a model and list of tools."""
-        self.model = model
-        self.tools = tools
-        self.available_functions = []
-        for t in tools:
-            self.available_functions.append(self.get_function_info(t.func))
-
-    def _build_prompt(self, last_result: Optional[str] = None) -> str:
-        """Build the prompt for the agent including available tools and context."""
-        base_prompt = ""
-        if last_result is not None:
-            base_prompt += f"\n<last result>{last_result}</last result>"
-            
-        base_prompt += f"""
-<available functions>
-{"\n\n".join(self.available_functions)}
-</available functions>
-
-<agent instructions>
-You are a ReAct agent. You run in a loop and use ONE of the available functions per iteration.
-If the current query does not require a function call, just use output_message to say what you would normally say.
-The result of that function call will be given to you in the next message.
-Call one function at a time. Function arguments can be complex objects, long strings, etc. if needed.
-The user cannot see the results of function calls, so you have to explicitly call output_message if you want them to see something.
-You must always respond with a single line of python that calls one of the available tools.
-Use as many steps as you need to in order to fully complete the task.
-Start by asking the user what they want.
-</agent instructions>
-
-<example response>
-check_weather("London")
-</example response>
-    
-<example response>
-output_message(\"\"\"How can I help you today?\"\"\", True)
-</example response>
-
-Output **ONLY THE CODE** and **NO MARKDOWN BACKTICKS**"""
-        return base_prompt
-
-    def _execute_tool(self, code: str) -> str:
-        """Execute a tool call and return its result."""
-        globals_dict = {
-            tool.func.__name__: tool.func
-            for tool in self.tools
-        }
-        
-        try:
-            result = eval(code.strip(), globals_dict)
-            return result
-        except Exception as e:
-            error_msg = f"Error executing code: {str(e)}"
-            console.print(f"[red]Error:[/red] {error_msg}")
-            return error_msg
-
-
-    def _create_agent_chunk(self, content: str) -> Dict[str, Any]:
-        """Create an agent chunk in the format expected by print_agent_output."""
-        return {
-            "agent": {
-                "messages": [AIMessage(content=content)]
-            }
-        }
-
-    def _create_error_chunk(self, content: str) -> Dict[str, Any]:
-        """Create an error chunk in the format expected by print_agent_output."""
-        return {
-            "tools": {
-                "messages": [{"status": "error", "content": content}]
-            }
-        }
-
-    def stream(self, messages_dict: Dict[str, List[Any]], config: Dict[str, Any] = None) -> Generator[Dict[str, Any], None, None]:
-        """Stream agent responses in a format compatible with print_agent_output."""
-        initial_messages = messages_dict.get("messages", [])
-        chat_history = []
-        last_result = None
-        first_iteration = True
-        
-        while True:
-            base_prompt = self._build_prompt(None if first_iteration else last_result)
-            chat_history.append(HumanMessage(content=base_prompt))
-            
-            try:
-                full_history = initial_messages + chat_history
-                response = self.model.invoke(full_history)
-                
-                last_result = self._execute_tool(response.content)
-                chat_history.append(response)
-                first_iteration = False
-                yield {}
-                
-            except Exception as e:
-                error_msg = f"Error: {str(e)}"
-                yield self._create_error_chunk(error_msg)
-                break
-
 if __name__ == "__main__":
     # Initialize the chat model
     chat = ChatOpenAI(
diff --git a/ra_aid/agents/ciayn_agent.py b/ra_aid/agents/ciayn_agent.py
index 8f21f35..cb9f06f 100644
--- a/ra_aid/agents/ciayn_agent.py
+++ b/ra_aid/agents/ciayn_agent.py
@@ -20,7 +20,7 @@ class CiaynAgent:
 \"\"\""""
         return info
 
-    def __init__(self, model, tools: list, max_history_messages: int = 50, max_tokens: Optional[int] = None):
+    def __init__(self, model, tools: list, max_history_messages: int = 50, max_tokens: Optional[int] = 100000):
         """Initialize the agent with a model and list of tools.
         
         Args:

From 2af7f8a6231e042a34328973bc23424fa4caea00 Mon Sep 17 00:00:00 2001
From: AI Christianson <ai.christianson@christianson.ai>
Date: Sat, 28 Dec 2024 15:18:58 -0500
Subject: [PATCH 12/15] ciayn

---
 ra_aid/agent_utils.py        |  2 ++
 ra_aid/agents/__init__.py    |  0
 ra_aid/agents/ciayn_agent.py | 30 ++++++++++++++++++++++++++++++
 3 files changed, 32 insertions(+)
 create mode 100644 ra_aid/agents/__init__.py

diff --git a/ra_aid/agent_utils.py b/ra_aid/agent_utils.py
index c11fdfa..d281185 100644
--- a/ra_aid/agent_utils.py
+++ b/ra_aid/agent_utils.py
@@ -422,6 +422,8 @@ def run_task_implementation_agent(
     try:
         logger.debug("Implementation agent completed successfully")
         return run_agent_with_retry(agent, prompt, run_config)
+    except (KeyboardInterrupt, AgentInterrupt):
+        raise
     except Exception as e:
         logger.error("Implementation agent failed: %s", str(e), exc_info=True)
         raise
diff --git a/ra_aid/agents/__init__.py b/ra_aid/agents/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/ra_aid/agents/ciayn_agent.py b/ra_aid/agents/ciayn_agent.py
index cb9f06f..7d4dde7 100644
--- a/ra_aid/agents/ciayn_agent.py
+++ b/ra_aid/agents/ciayn_agent.py
@@ -4,6 +4,36 @@ from langchain_core.messages import AIMessage, HumanMessage, BaseMessage
 from ra_aid.exceptions import ToolExecutionError
 
 class CiaynAgent:
+    """Code Is All You Need (CIAYN) agent that uses generated Python code for tool interaction.
+
+    The CIAYN philosophy emphasizes direct code generation and execution over structured APIs:
+    - Language model generates executable Python code snippets
+    - Tools are invoked through natural Python code rather than fixed schemas
+    - Flexible and adaptable approach to tool usage through dynamic code
+    - Complex workflows emerge from composing code segments
+
+    Code Generation & Function Calling:
+    - Dynamic generation of Python code for tool invocation
+    - Handles complex nested function calls and argument structures
+    - Natural integration of tool outputs into Python data flow
+    - Runtime code composition for multi-step operations
+
+    ReAct Pattern Implementation:
+    - Observation: Captures tool execution results
+    - Reasoning: Analyzes outputs to determine next steps
+    - Action: Generates and executes appropriate code
+    - Reflection: Updates state and plans next iteration
+    - Maintains conversation context across iterations
+
+    Core Capabilities:
+    - Dynamic tool registration with automatic documentation
+    - Sandboxed code execution environment
+    - Token-aware chat history management
+    - Comprehensive error handling and recovery
+    - Streaming interface for real-time interaction
+    - Memory management with configurable limits
+    """
+
     def _get_function_info(self, func):
         """
         Returns a well-formatted string containing the function signature and docstring,

From fd664c0886372405e737a9633e20b78ef484789e Mon Sep 17 00:00:00 2001
From: AI Christianson <ai.christianson@christianson.ai>
Date: Sat, 28 Dec 2024 15:36:09 -0500
Subject: [PATCH 13/15] changelog

---
 CHANGELOG.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 36af79e..c8e5d11 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,7 +5,11 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [0.10.3] - 1024-12-27
+## [Unreleased]
+
+- Add CiaynAgent to support models that do not have, or are not good at, agentic function calling.
+
+## [0.10.3] - 2024-12-27
 
 - Fix logging on interrupt.
 - Fix web research prompt.

From 13b953bf7fe64296f1053360afaf537467124005 Mon Sep 17 00:00:00 2001
From: AI Christianson <ai.christianson@christianson.ai>
Date: Sat, 28 Dec 2024 15:39:33 -0500
Subject: [PATCH 14/15] extract creat agent method

---
 ra_aid/__main__.py    |  6 +++---
 ra_aid/agent_utils.py | 31 +++++++++++++++++++++++++------
 2 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/ra_aid/__main__.py b/ra_aid/__main__.py
index 49f7b7a..7ff77e3 100644
--- a/ra_aid/__main__.py
+++ b/ra_aid/__main__.py
@@ -4,7 +4,6 @@ import uuid
 from rich.panel import Panel
 from rich.console import Console
 from langgraph.checkpoint.memory import MemorySaver
-from langgraph.prebuilt import create_react_agent
 from ra_aid.env import validate_environment
 from ra_aid.tools.memory import _global_memory
 from ra_aid.tools.human import ask_human
@@ -15,7 +14,8 @@ from ra_aid.agent_utils import (
     AgentInterrupt,
     run_agent_with_retry,
     run_research_agent,
-    run_planning_agent
+    run_planning_agent,
+    create_agent
 )
 from ra_aid.prompts import (
     CHAT_PROMPT,
@@ -177,7 +177,7 @@ def main():
             initial_request = ask_human.invoke({"question": "What would you like help with?"})
 
             # Create chat agent with appropriate tools
-            chat_agent = create_react_agent(
+            chat_agent = create_agent(
                 model,
                 get_chat_tools(expert_enabled=expert_enabled, web_research_enabled=web_research_enabled),
                 checkpointer=MemorySaver()
diff --git a/ra_aid/agent_utils.py b/ra_aid/agent_utils.py
index d281185..90b0650 100644
--- a/ra_aid/agent_utils.py
+++ b/ra_aid/agent_utils.py
@@ -10,8 +10,10 @@ import threading
 import time
 from typing import Optional
 
-from langgraph.prebuilt import create_react_agent
+from langgraph.prebuilt import create_react_agent 
 from ra_aid.console.formatting import print_stage_header, print_error
+from langchain_core.language_models import BaseChatModel
+from typing import List, Any
 from ra_aid.console.output import print_agent_output
 from ra_aid.logging_config import get_logger
 from ra_aid.exceptions import AgentInterrupt
@@ -41,7 +43,6 @@ from ra_aid.prompts import (
 from langgraph.checkpoint.memory import MemorySaver
 
 from langchain_core.messages import HumanMessage
-from langchain_core.messages import BaseMessage
 from anthropic import APIError, APITimeoutError, RateLimitError, InternalServerError
 from rich.console import Console
 from rich.markdown import Markdown
@@ -65,6 +66,24 @@ console = Console()
 
 logger = get_logger(__name__)
 
+def create_agent(
+    model: BaseChatModel,
+    tools: List[Any],
+    *,
+    checkpointer: Any = None
+) -> Any:
+    """Create a react agent with the given configuration.
+    
+    Args:
+        model: The LLM model to use
+        tools: List of tools to provide to the agent
+        checkpointer: Optional memory checkpointer
+        
+    Returns:
+        The created agent instance
+    """
+    return create_react_agent(model, tools, checkpointer=checkpointer)
+
 def run_research_agent(
     base_task_or_query: str,
     model,
@@ -125,7 +144,7 @@ def run_research_agent(
     )
 
     # Create agent
-    agent = create_react_agent(model, tools, checkpointer=memory)
+    agent = create_agent(model, tools, checkpointer=memory)
 
     # Format prompt sections
     expert_section = EXPERT_PROMPT_SECTION_RESEARCH if expert_enabled else ""
@@ -223,7 +242,7 @@ def run_web_research_agent(
     tools = get_web_research_tools(expert_enabled=expert_enabled)
 
     # Create agent
-    agent = create_react_agent(model, tools, checkpointer=memory)
+    agent = create_agent(model, tools, checkpointer=memory)
 
     # Format prompt sections
     expert_section = EXPERT_PROMPT_SECTION_RESEARCH if expert_enabled else ""
@@ -306,7 +325,7 @@ def run_planning_agent(
     tools = get_planning_tools(expert_enabled=expert_enabled, web_research_enabled=config.get('web_research_enabled', False))
 
     # Create agent
-    agent = create_react_agent(model, tools, checkpointer=memory)
+    agent = create_agent(model, tools, checkpointer=memory)
 
     # Format prompt sections
     expert_section = EXPERT_PROMPT_SECTION_PLANNING if expert_enabled else ""
@@ -393,7 +412,7 @@ def run_task_implementation_agent(
     tools = get_implementation_tools(expert_enabled=expert_enabled, web_research_enabled=config.get('web_research_enabled', False))
 
     # Create agent
-    agent = create_react_agent(model, tools, checkpointer=memory)
+    agent = create_agent(model, tools, checkpointer=memory)
 
     # Build prompt
     prompt = IMPLEMENTATION_PROMPT.format(

From 406d1a5358db5a51791d0eb6706f089ad93c2a5a Mon Sep 17 00:00:00 2001
From: AI Christianson <ai.christianson@christianson.ai>
Date: Sat, 28 Dec 2024 16:44:06 -0500
Subject: [PATCH 15/15] ciayn

---
 ra_aid/agent_utils.py        | 39 +++++++++++++++++++++++++++++++++---
 ra_aid/agents/ciayn_agent.py | 27 ++++++++++++++-----------
 ra_aid/tools/memory.py       |  2 ++
 3 files changed, 53 insertions(+), 15 deletions(-)

diff --git a/ra_aid/agent_utils.py b/ra_aid/agent_utils.py
index 90b0650..a0210ab 100644
--- a/ra_aid/agent_utils.py
+++ b/ra_aid/agent_utils.py
@@ -10,9 +10,12 @@ import threading
 import time
 from typing import Optional
 
-from langgraph.prebuilt import create_react_agent 
+from langgraph.prebuilt import create_react_agent
+from ra_aid.agents.ciayn_agent import CiaynAgent
+from ra_aid.agents.ciayn_agent import CiaynAgent
 from ra_aid.console.formatting import print_stage_header, print_error
 from langchain_core.language_models import BaseChatModel
+from langchain_core.tools import tool
 from typing import List, Any
 from ra_aid.console.output import print_agent_output
 from ra_aid.logging_config import get_logger
@@ -66,6 +69,12 @@ console = Console()
 
 logger = get_logger(__name__)
 
+@tool
+def output_markdown_message(message: str) -> str:
+    """Outputs a message to the user, optionally prompting for input."""
+    console.print(Panel(Markdown(message.strip()), title="🤖 Assistant"))
+    return "Message output."
+
 def create_agent(
     model: BaseChatModel,
     tools: List[Any],
@@ -82,7 +91,27 @@ def create_agent(
     Returns:
         The created agent instance
     """
-    return create_react_agent(model, tools, checkpointer=checkpointer)
+    try:
+        # Extract model info from module path
+        module_path = model.__class__.__module__.split('.')
+        if len(module_path) > 1:
+            provider = module_path[1] # e.g. anthropic from langchain_anthropic
+        else:
+            provider = None
+            
+        # Get model name if available
+        model_name = getattr(model, 'model_name', '').lower()
+        
+        # Use REACT agent for Anthropic Claude models, otherwise use CIAYN
+        if provider == 'anthropic' and 'claude' in model_name:
+            return create_react_agent(model, tools, checkpointer=checkpointer)
+        else:
+            return CiaynAgent(model, tools)
+            
+    except Exception as e:
+        # Default to REACT agent if provider/model detection fails
+        logger.warning(f"Failed to detect model type: {e}. Defaulting to REACT agent.")
+        return create_react_agent(model, tools, checkpointer=checkpointer)
 
 def run_research_agent(
     base_task_or_query: str,
@@ -499,7 +528,11 @@ def run_agent_with_retry(agent, prompt: str, config: dict) -> Optional[str]:
                         logger.debug("Agent output: %s", chunk)
                         check_interrupt()
                         print_agent_output(chunk)
-                        logger.debug("Agent run completed successfully")
+                        if _global_memory['task_completed']:
+                            _global_memory['task_completed'] = False
+                            _global_memory['completion_message'] = ''
+                            break
+                    logger.debug("Agent run completed successfully")
                     return "Agent run completed successfully"
                 except (KeyboardInterrupt, AgentInterrupt):
                     raise
diff --git a/ra_aid/agents/ciayn_agent.py b/ra_aid/agents/ciayn_agent.py
index 7d4dde7..5f021d7 100644
--- a/ra_aid/agents/ciayn_agent.py
+++ b/ra_aid/agents/ciayn_agent.py
@@ -1,7 +1,12 @@
 import inspect
+from dataclasses import dataclass
 from typing import Dict, Any, Generator, List, Optional, Union
 from langchain_core.messages import AIMessage, HumanMessage, BaseMessage
 from ra_aid.exceptions import ToolExecutionError
+@dataclass
+class ChunkMessage:
+    content: str
+    status: str
 
 class CiaynAgent:
     """Code Is All You Need (CIAYN) agent that uses generated Python code for tool interaction.
@@ -74,9 +79,6 @@ class CiaynAgent:
             base_prompt += f"\n<last result>{last_result}</last result>"
             
         base_prompt += f"""
-<available functions>
-{"\n\n".join(self.available_functions)}
-</available functions>
 
 <agent instructions>
 You are a ReAct agent. You run in a loop and use ONE of the available functions per iteration.
@@ -89,13 +91,13 @@ Use as many steps as you need to in order to fully complete the task.
 Start by asking the user what they want.
 </agent instructions>
 
-<example response>
-check_weather("London")
-</example response>
-    
-<example response>
-output_message(\"\"\"How can I help you today?\"\"\", True)
-</example response>
+You must carefully review the conversation history, which functions were called so far, returned results, etc., and make sure the very next function call you make makes sense in order to achieve the original goal.
+
+You must ONLY use ONE of the following functions (these are the ONLY functions that exist):
+
+<available functions>
+{"\n\n".join(self.available_functions)}
+</available functions>
 
 Output **ONLY THE CODE** and **NO MARKDOWN BACKTICKS**"""
         return base_prompt
@@ -124,9 +126,10 @@ Output **ONLY THE CODE** and **NO MARKDOWN BACKTICKS**"""
 
     def _create_error_chunk(self, content: str) -> Dict[str, Any]:
         """Create an error chunk in the format expected by print_agent_output."""
+        message = ChunkMessage(content=content, status="error")
         return {
             "tools": {
-                "messages": [{"status": "error", "content": content}]
+                "messages": [message]
             }
         }
 
@@ -209,5 +212,5 @@ Output **ONLY THE CODE** and **NO MARKDOWN BACKTICKS**"""
                 yield {}
 
             except ToolExecutionError as e:
+                chat_history.append(HumanMessage(content=f"Your tool call caused an error: {e}\n\nPlease correct your tool call and try again."))
                 yield self._create_error_chunk(str(e))
-                break
diff --git a/ra_aid/tools/memory.py b/ra_aid/tools/memory.py
index e5262e9..f87ce0e 100644
--- a/ra_aid/tools/memory.py
+++ b/ra_aid/tools/memory.py
@@ -192,6 +192,8 @@ def emit_key_snippets(snippets: List[SnippetInfo]) -> str:
     """Store multiple key source code snippets in global memory.
     Automatically adds the filepaths of the snippets to related files.
     
+    This is for **existing**, or **just-written** files, not for things to be created in the future.
+    
     Args:
         snippets: List of snippet information dictionaries containing:
                  - filepath: Path to the source file