ciayn improvements

2025-03-03 23:07:58 -05:00 · 2025-03-03 23:07:58 -05:00 · 039aa8f22a
parent 035544c77a
commit 039aa8f22a
13 changed files with 457 additions and 151 deletions
--- a/ra_aid/agent_backends/ciayn_agent.py
+++ b/ra_aid/agent_backends/ciayn_agent.py
@ -11,7 +11,7 @@ from ra_aid.exceptions import ToolExecutionError
 from ra_aid.fallback_handler import FallbackHandler
 from ra_aid.logging_config import get_logger
 from ra_aid.models_params import DEFAULT_TOKEN_LIMIT
-from ra_aid.prompts.ciayn_prompts import CIAYN_AGENT_BASE_PROMPT, EXTRACT_TOOL_CALL_PROMPT
+from ra_aid.prompts.ciayn_prompts import CIAYN_AGENT_SYSTEM_PROMPT, CIAYN_AGENT_HUMAN_PROMPT, EXTRACT_TOOL_CALL_PROMPT, NO_TOOL_CALL_PROMPT
 from ra_aid.tools.expert import get_model
 from ra_aid.tools.reflection import get_function_info

@ -32,6 +32,7 @@ def validate_function_call_pattern(s: str) -> bool:
    - Opening/closing parentheses with balanced nesting
    - Arbitrary arguments inside parentheses
    - Optional whitespace
+    - Support for triple-quoted strings

    Args:
        s: String to validate
@ -39,8 +40,43 @@ def validate_function_call_pattern(s: str) -> bool:
    Returns:
        bool: False if pattern matches (valid), True if invalid
    """
-    pattern = r"^\s*[\w_\-]+\s*\([^)(]*(?:\([^)(]*\)[^)(]*)*\)\s*$"
-    return not re.match(pattern, s, re.DOTALL)
+    # First check for the basic pattern of a function call
+    basic_pattern = r"^\s*[\w_\-]+\s*\("
+    if not re.match(basic_pattern, s, re.DOTALL):
+        return True
+    
+    # Handle triple-quoted strings to avoid parsing issues
+    # Temporarily replace triple-quoted content to avoid false positives
+    def replace_triple_quoted(match):
+        return '"""' + '_' * len(match.group(1)) + '"""'
+    
+    # Replace content in triple quotes with placeholders
+    s_clean = re.sub(r'"""(.*?)"""', replace_triple_quoted, s, flags=re.DOTALL)
+    
+    # Handle regular quotes
+    s_clean = re.sub(r'"[^"]*"', '""', s_clean)
+    s_clean = re.sub(r"'[^']*'", "''", s_clean)
+    
+    # Check for multiple function calls (not allowed)
+    if re.search(r"\)\s*[\w_\-]+\s*\(", s_clean):
+        return True
+    
+    # Count the number of opening and closing parentheses
+    open_count = s_clean.count('(')
+    close_count = s_clean.count(')')
+    
+    if open_count != close_count:
+        return True
+    
+    # Check for the presence of triple quotes and if they're properly closed
+    triple_quote_pairs = s.count('"""') // 2
+    triple_quote_count = s.count('"""')
+    
+    if triple_quote_count % 2 != 0:  # Odd number means unbalanced quotes
+        return True
+        
+    # If we've passed all checks, the pattern is valid
+    return False


 class CiaynAgent:
@ -106,9 +142,13 @@ class CiaynAgent:
            self.available_functions.append(get_function_info(t.func))

        self.fallback_handler = FallbackHandler(config, tools)
+        
+        # Include the functions list in the system prompt
+        functions_list = "\n\n".join(self.available_functions)
        self.sys_message = SystemMessage(
-            "Execute efficiently yet completely as a fully autonomous agent."
+            CIAYN_AGENT_SYSTEM_PROMPT.format(functions_list=functions_list)
        )
+        
        self.error_message_template = "Your tool call caused an error: {e}\n\nPlease correct your tool call and try again."
        self.fallback_fixed_msg = HumanMessage(
            "Fallback tool handler has fixed the tool call see: <fallback tool call result> for the output."
@ -116,20 +156,15 @@ class CiaynAgent:

    def _build_prompt(self, last_result: Optional[str] = None) -> str:
        """Build the prompt for the agent including available tools and context."""
-        base_prompt = ""
-
+        # Add last result section if provided
+        last_result_section = ""
        if last_result is not None:
-            base_prompt += f"\n<last result>{last_result}</last result>"
-
-        # Add available functions section
-        functions_list = "\n\n".join(self.available_functions)
-
-        # Build the complete prompt without f-strings for the static parts
-        base_prompt += CIAYN_AGENT_BASE_PROMPT.format(functions_list=functions_list)
-
-        # base_prompt += "\n\nYou must reply with ONLY ONE of the functions given in available functions."
-
-        return base_prompt
+            last_result_section = f"\n<last result>{last_result}</last result>"
+        
+        # Build the human prompt without the function list
+        return CIAYN_AGENT_HUMAN_PROMPT.format(
+            last_result_section=last_result_section
+        )

    def _execute_tool(self, msg: BaseMessage) -> str:
        """Execute a tool call and return its result."""
@ -186,54 +221,16 @@ class CiaynAgent:
        """Create an agent chunk in the format expected by print_agent_output."""
        return {"agent": {"messages": [AIMessage(content=content)]}}

-    def _create_error_chunk(self, content: str) -> Dict[str, Any]:
-        """Create an error chunk in the format expected by print_agent_output."""
-        message = ChunkMessage(content=content, status="error")
-        return {"tools": {"messages": [message]}}
-
-    @staticmethod
-    def _estimate_tokens(content: Optional[Union[str, BaseMessage]]) -> int:
-        """Estimate number of tokens in content using simple byte length heuristic.
-        Estimates 1 token per 2.0 bytes of content. For messages, uses the content field.
-
-        Args:
-            content: String content or Message object to estimate tokens for
-
-        Returns:
-            int: Estimated number of tokens, 0 if content is None/empty
-        """
-        if content is None:
-            return 0
-
-        if isinstance(content, BaseMessage):
-            text = content.content
-        else:
-            text = content
-
-        # create-react-agent tool calls can be lists
-        if isinstance(text, List):
-            text = str(text)
-
-        if not text:
-            return 0
-
-        return len(text.encode("utf-8")) // 2.0
-
-    def _extract_tool_call(self, code: str, functions_list: str) -> str:
-        model = get_model()
-        prompt = EXTRACT_TOOL_CALL_PROMPT.format(
-            functions_list=functions_list, code=code
-        )
-        response = model.invoke(prompt)
-        response = response.content
-
-        pattern = r"([\w_\-]+)\((.*?)\)"
-        matches = re.findall(pattern, response, re.DOTALL)
-        if len(matches) == 0:
-            raise ToolExecutionError("Failed to extract tool call")
-        ma = matches[0][0].strip()
-        mb = matches[0][1].strip().replace("\n", " ")
-        return f"{ma}({mb})"
+    def _create_error_chunk(self, error_message: str) -> Dict[str, Any]:
+        """Create an error chunk for the agent output stream."""
+        return {
+            "type": "error",
+            "message": error_message,
+            "tool_call": {
+                "name": "report_error",
+                "args": {"error": error_message},
+            },
+        }

    def _trim_chat_history(
        self, initial_messages: List[Any], chat_history: List[Any]
@ -272,6 +269,42 @@ class CiaynAgent:

        return initial_messages + chat_history

+    @staticmethod
+    def _estimate_tokens(content: Optional[Union[str, BaseMessage]]) -> int:
+        """Estimate token count for a message or string."""
+        if content is None:
+            return 0
+
+        if isinstance(content, BaseMessage):
+            text = content.content
+        else:
+            text = content
+
+        # create-react-agent tool calls can be lists
+        if isinstance(text, List):
+            text = str(text)
+
+        if not text:
+            return 0
+
+        return len(text.encode("utf-8")) // 2.0
+
+    def _extract_tool_call(self, code: str, functions_list: str) -> str:
+        model = get_model()
+        prompt = EXTRACT_TOOL_CALL_PROMPT.format(
+            functions_list=functions_list, code=code
+        )
+        response = model.invoke(prompt)
+        response = response.content
+
+        pattern = r"([\w_\-]+)\((.*?)\)"
+        matches = re.findall(pattern, response, re.DOTALL)
+        if len(matches) == 0:
+            raise ToolExecutionError("Failed to extract tool call")
+        ma = matches[0][0].strip()
+        mb = matches[0][1].strip().replace("\n", " ")
+        return f"{ma}({mb})"
+
    def stream(
        self, messages_dict: Dict[str, List[Any]], _config: Dict[str, Any] = None
    ) -> Generator[Dict[str, Any], None, None]:
@ -279,6 +312,8 @@ class CiaynAgent:
        initial_messages = messages_dict.get("messages", [])
        self.chat_history = []
        last_result = None
+        empty_response_count = 0
+        max_empty_responses = 3  # Maximum number of consecutive empty responses before giving up

        while True:
            base_prompt = self._build_prompt(last_result)
@ -286,6 +321,28 @@ class CiaynAgent:
            full_history = self._trim_chat_history(initial_messages, self.chat_history)
            response = self.model.invoke([self.sys_message] + full_history)

+            # Check if the response is empty or doesn't contain a valid tool call
+            if not response.content or not response.content.strip():
+                empty_response_count += 1
+                logger.warning(f"Model returned empty response (count: {empty_response_count})")
+                
+                if empty_response_count >= max_empty_responses:
+                    # If we've had too many empty responses, raise an error to break the loop
+                    from ra_aid.agent_context import mark_agent_crashed
+                    crash_message = "Agent failed to make any tool calls after multiple attempts"
+                    mark_agent_crashed(crash_message)
+                    logger.error(crash_message)
+                    yield self._create_error_chunk(crash_message)
+                    return
+                
+                # Send a message to the model explicitly telling it to make a tool call
+                self.chat_history.append(AIMessage(content=""))  # Add the empty response
+                self.chat_history.append(HumanMessage(content=NO_TOOL_CALL_PROMPT))
+                continue
+            
+            # Reset empty response counter on successful response
+            empty_response_count = 0
+
            try:
                last_result = self._execute_tool(response)
                self.chat_history.append(response)
--- a/ra_aid/prompts/ciayn_prompts.py
+++ b/ra_aid/prompts/ciayn_prompts.py
@ -7,9 +7,7 @@ generated Python code for tool interaction rather than structured APIs.

 # Extract tool call prompt - used to format code from LLM responses
 EXTRACT_TOOL_CALL_PROMPT = """I'm conversing with a AI model and requiring responses in a particular format: A function call with any parameters escaped. Here is an example:
-```
 run_programming_task("blah \" blah\" blah")
-```

 The following tasks are allowed:

@ -17,17 +15,21 @@ The following tasks are allowed:

 I got this invalid response from the model, can you format it so it becomes a correct function call?

-```
-{code}
-```"""
+{code}"""

-# CIAYN agent base prompt - core instructions for the code-based agent
-CIAYN_AGENT_BASE_PROMPT = """<agent instructions>
+# Core system instructions for the CIAYN agent
+CIAYN_AGENT_SYSTEM_PROMPT = """<agent instructions>
 You are a ReAct agent. You run in a loop and use ONE of the available functions per iteration, but you will be called in a loop, so you will be able to accomplish the task over many iterations.
 The result of that function call will be given to you in the next message.
 Call one function at a time. Function arguments can be complex objects, long strings, etc. if needed.
+Each tool call you make shall be different from the previous.
 The user cannot see the results of function calls, so you have to explicitly use a tool like ask_human if you want them to see something.
-You must always respond with a single line of python that calls one of the available tools.
+
+YOU MUST ALWAYS RESPOND WITH A SINGLE LINE OF PYTHON THAT CALLS ONE OF THE AVAILABLE TOOLS.
+NEVER RETURN AN EMPTY MESSAGE.
+NEVER RETURN PLAIN TEXT - ONLY RETURN A TOOL CALL.
+IF UNSURE WHAT TO DO, JUST YEET IT AND CALL THE BEST FUNCTION YOU CAN THINK OF.
+
 Use as many steps as you need to in order to fully complete the task.
 Start by asking the user what they want.

@ -37,88 +39,48 @@ Think hard about what the best *next* tool call is, knowing that you can make as
 You typically don't want to keep calling the same function over and over with the same parameters.
 </agent instructions>

-You must ONLY use ONE of the following functions (these are the ONLY functions that exist):
+<efficiency guidelines>
+- Avoid repetitive actions that don't yield new information:
+  - Don't repeatedly list empty directories or check the same information multiple times
+  - For new projects, immediately proceed to planning and implementation rather than exploring empty directories
+  - Only list directories when you expect them to contain useful content
+  - If a directory listing is empty, don't list it again unless files have been created since last check

-<available functions>{functions_list}
+- Use the right tool for the right job:
+  - Use high-level functions like request_implementation for new projects instead of manually exploring
+  - Only use fine-grained exploration tools when addressing specific questions or debugging
+  - Prioritize tools that give you the most useful information with the fewest calls
+
+- Progress efficiently toward goals:
+  - After understanding the user's request, move quickly to implementation planning
+  - Prefer direct implementation paths over excessive exploration
+  - If a tool call doesn't yield useful information, try a different approach instead of repeating it
+  - When working on new projects, focus on creating files rather than searching empty directories
+</efficiency guidelines>
+
+<available functions>
+{functions_list}
 </available functions>

-You may use any of the above functions to complete your job. Use the best one for the current step you are on. Be efficient, avoid getting stuck in repetitive loops, and do not hesitate to call functions which delegate your work to make your life easier.
-But you MUST NOT assume tools exist that are not in the above list, e.g. write_file_tool.
-Consider your task done only once you have taken *ALL* the steps required to complete it.
+<function call guidelines>
+- When using functions with multi-line string arguments (especially put_complete_file_contents):
+  - ALWAYS use three double-quotes for multi-line strings
+  - Make sure to properly escape any quotes within the string if needed
+  - Never break up a multi-line string with line breaks outside the quotes
+  - For file content, the entire content must be inside ONE triple-quoted string

--- EXAMPLE BAD OUTPUTS ---
-
-This tool is not in available functions, so this is a bad tool call:
-
-<example bad output>
-write_file_tool(...)
-</example bad output>
-
-This tool call has a syntax error (unclosed parenthesis, quotes), so it is bad:
-
-<example bad output>
-write_file_tool("asdf
-</example bad output>
-
-This tool call is bad because it includes a message as well as backticks:
-
-<example bad output>
-Sure, I'll make the following tool call to accomplish what you asked me:
-
-```
-list_directory_tree('.')
-```
-</example bad output>
-
-This tool call is bad because the output code is surrounded with backticks:
-
-<example bad output>
-```
-list_directory_tree('.')
-```
-</example bad output>
-
-The following is bad becasue it makes the same tool call multiple times in a row with the exact same parameters, for no reason, getting stuck in a loop:
-
-<example bad output>
-<response 1>
-list_directory_tree('.')
-</response 1>
-<response 2>
-list_directory_tree('.')
-</response 2>
-</example bad output>
-
-The following is bad because it makes more than one tool call in one response:
-
-<example bad output>
-list_directory_tree('.')
-read_file_tool('README.md') # Now we've made 
-</example bad output.
-
-This is a good output because it calls the tool appropriately and with correct syntax:
-
--- EXAMPLE GOOD OUTPUTS ---
-
-<example good output>
-request_research_and_implementation(\"\"\"
-Example query.
+- Example of correct put_complete_file_contents format:
+  put_complete_file_contents("/path/to/file.py", \"\"\"
+def example_function():
+    print("Hello world")
 \"\"\")
-</example good output>

-This is good output because it uses a multiple line string when needed and properly calls the tool, does not output backticks or extra information:
-<example good output>
-run_programming_task(\"\"\"
-# Example Programming Task
-
-Implement a widget factory satisfying the following requirements:
-
- Requirement A
- Requirement B
-
-...
-\"\"\")
-</example good output>
+- Example of INCORRECT format (DO NOT DO THIS):
+  put_complete_file_contents("/path/to/file.py", \"\"\"
+  def example_function():
+      print("Hello world")
+  \"\"\")
+</function call guidelines>

 As an agent, you will carefully plan ahead, carefully analyze tool call responses, and adapt to circumstances in order to accomplish your goal.

@ -126,10 +88,69 @@ You will make as many tool calls as you feel necessary in order to fully complet

 We're entrusting you with a lot of autonomy and power, so be efficient and don't mess up.

-You have often been criticized for:
+PERFORMING WELL AS AN EFFICIENT YET COMPLETE AGENT WILL HELP MY CAREER.

- Making the same function calls over and over, getting stuck in a loop.
+<critical rules>
+1. YOU MUST ALWAYS CALL A FUNCTION - NEVER RETURN EMPTY TEXT OR PLAIN TEXT
+2. ALWAYS OUTPUT EXACTLY ONE VALID FUNCTION CALL AS YOUR RESPONSE 
+3. NEVER TERMINATE YOUR RESPONSE WITHOUT CALLING A FUNCTION
+4. WHEN USING put_complete_file_contents, ALWAYS PUT THE ENTIRE FILE CONTENT INSIDE ONE TRIPLE-QUOTED STRING
+</critical rules>

 DO NOT CLAIM YOU ARE FINISHED UNTIL YOU ACTUALLY ARE!
 Output **ONLY THE CODE** and **NO MARKDOWN BACKTICKS**
+"""
+
+# Slimmed-down human message format for interaction
+CIAYN_AGENT_HUMAN_PROMPT = """<new project reminder>
+For new projects or empty directories, avoid repetitive directory listing and immediately use request_implementation or appropriate creation tools.
+</new project reminder>
+
+<tool call reminder>
+YOU MUST ALWAYS CALL A FUNCTION - NEVER RETURN EMPTY TEXT
+</tool call reminder>
+
+<multiline content reminder>
+When using put_complete_file_contents, ALWAYS place the entire file content within a SINGLE triple-quoted string:
+
+CORRECT:   put_complete_file_contents("/path/to/file.py", \"\"\"
+def main():
+    print("Hello")
+\"\"\")
+</multiline content reminder>
+
+--- EXAMPLE GOOD OUTPUTS ---
+
+<example good output>
+request_research_and_implementation(\"\"\"Example query.\"\"\")
+</example good output>
+
+<example good output>
+run_programming_task(\"\"\"# Example Programming Task\"\"\")
+</example good output>
+
+<example good output>
+put_complete_file_contents("/path/to/file.py", \"\"\"def example_function():
+    print("This is a multi-line example")
+    for i in range(10):
+        print("Line " + str(i))
+    return True
+\"\"\")
+</example good output>
+
+{last_result_section}
+"""
+
+# Prompt to send when the model gives no tool call
+NO_TOOL_CALL_PROMPT = """YOU MUST CALL A FUNCTION. Your previous response did not contain a valid function call.
+
+Please respond with exactly one valid function call from the available tools. If you're unsure what to do next, just make the best guess on what tool to call and call it.
+
+Remember: ALWAYS respond with a single line of Python code that calls a function.
+
+IMPORTANT: For put_complete_file_contents, make sure to include the entire file content inside a SINGLE triple-quoted string:
+
+CORRECT:   put_complete_file_contents("/path/to/file.py", \"\"\"def main():
+    print("Hello")
+\"\"\")
 """
--- a/tests/data/invalid_case_1.txt
+++ b/tests/data/invalid_case_1.txt
@ -0,0 +1 @@
+unterminated_quotes("""This is missing the closing quotes)
--- a/tests/data/invalid_case_2.txt
+++ b/tests/data/invalid_case_2.txt
@ -0,0 +1 @@
+unbalanced_parentheses("""Valid quotes"""))
--- a/tests/data/invalid_case_3.txt
+++ b/tests/data/invalid_case_3.txt
@ -0,0 +1 @@
+missing_parentheses"""Valid quotes"""
--- a/tests/data/test_case_1.txt
+++ b/tests/data/test_case_1.txt
@ -0,0 +1,5 @@
+put_complete_file_contents("main.cpp", """
+#include <GLFW/glfw3.h>
+#include <GL/gl.h>
+#include <iostream>
+""")
--- a/tests/data/test_case_2.txt
+++ b/tests/data/test_case_2.txt
@ -0,0 +1,5 @@
+put_complete_file_contents("test.py", """
+def hello_world():
+    print("Hello, world!")
+    return 42
+""")
--- a/tests/data/test_case_3.txt
+++ b/tests/data/test_case_3.txt
@ -0,0 +1,3 @@
+function_with_triple_quotes("""This is a test
+with multiple
+lines of text""")
--- a/tests/data/test_case_4.txt
+++ b/tests/data/test_case_4.txt
@ -0,0 +1,2 @@
+mixed_quotes_function("single quotes", """triple quoted
+multiline content""", 'another single quote')
--- a/tests/data/test_case_5.txt
+++ b/tests/data/test_case_5.txt
@ -0,0 +1,9 @@
+put_complete_file_contents("complex.json", """
+{
+  "name": "test",
+  "values": [1, 2, 3],
+  "nested": {
+    "property": "value"
+  }
+}
+""")
--- a/tests/data/test_case_6.txt
+++ b/tests/data/test_case_6.txt
@ -0,0 +1,77 @@
+put_complete_file_contents("main.cpp", """
+#include <GLFW/glfw3.h>
+#include <iostream>
+
+void framebuffer_size_callback(GLFWwindow* window, int width, int height);
+void processInput(GLFWwindow *window);
+
+// settings
+const unsigned int SCR_WIDTH = 800;
+const unsigned int SCR_HEIGHT = 600;
+
+int main()
+{
+    // glfw: initialize and configure
+    glfwInit();
+    glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
+    glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
+    glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
+
+#ifdef __APPLE__
+    glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE);
+#endif
+
+    // glfw window creation
+    GLFWwindow* window = glfwCreateWindow(SCR_WIDTH, SCR_HEIGHT, "LearnOpenGL", NULL, NULL);
+    if (window == NULL)
+    {
+        std::cout << "Failed to create GLFW window" << std::endl;
+        glfwTerminate();
+        return -1;
+    }
+    glfwMakeContextCurrent(window);
+    glfwSetFramebufferSizeCallback(window, framebuffer_size_callback);
+
+    // glad: load all OpenGL function pointers
+    // gladLoadGLLoader((GLADloadproc)glfwGetProcAddress);
+    // if (!gladLoadGLLoader((GLADloadproc)glfwGetProcAddress))
+    // {
+    //     std::cout << "Failed to initialize GLAD" << std::endl;
+    //     return -1;
+    // }
+
+    // render loop
+    while (!glfwWindowShouldClose(window))
+    {
+        // input
+        processInput(window);
+
+        // render
+        glClearColor(0.2f, 0.3f, 0.3f, 1.0f);
+        glClear(GL_COLOR_BUFFER_BIT);
+
+        // glfw: swap buffers and poll IO events (keys pressed/released, mouse moved etc.)
+        glfwSwapBuffers(window);
+        glfwPollEvents();
+    }
+
+    // glfw: terminate, clearing all previously allocated GLFW resources.
+    glfwTerminate();
+    return 0;
+}
+
+// process all input: query GLFW whether relevant keys are pressed/released this frame and react accordingly
+void processInput(GLFWwindow *window)
+{
+    if (glfwGetKey(window, GLFW_KEY_ESCAPE) == GLFW_PRESS)
+        glfwSetWindowShouldClose(window, true);
+}
+
+// glfw: whenever the window size changed (by OS or user resize) this callback function executes
+void framebuffer_size_callback(GLFWwindow* window, int width, int height)
+{
+    // make sure the viewport matches the new window dimensions; note that width and 
+    // height will be significantly larger than specified on retina displays.
+    glViewport(0, 0, width, height);
+}
+""")
--- a/tests/data/valid_function_calls.txt
+++ b/tests/data/valid_function_calls.txt
@ -0,0 +1,106 @@
+put_complete_file_contents("main.cpp", """
+#include <GLFW/glfw3.h>
+#include <GL/gl.h>
+#include <iostream>
+""")
+
+put_complete_file_contents("test.py", """
+def hello_world():
+    print("Hello, world!")
+    return 42
+""")
+
+function_with_triple_quotes("""This is a test
+with multiple
+lines of text""")
+
+mixed_quotes_function("single quotes", """triple quoted
+multiline content""", 'another single quote')
+
+put_complete_file_contents("complex.json", """
+{
+  "name": "test",
+  "values": [1, 2, 3],
+  "nested": {
+    "property": "value"
+  }
+}
+""")
+
+put_complete_file_contents("main.cpp", """
+#include <GLFW/glfw3.h>
+#include <iostream>
+
+void framebuffer_size_callback(GLFWwindow* window, int width, int height);
+void processInput(GLFWwindow *window);
+
+// settings
+const unsigned int SCR_WIDTH = 800;
+const unsigned int SCR_HEIGHT = 600;
+
+int main()
+{
+    // glfw: initialize and configure
+    glfwInit();
+    glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
+    glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
+    glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
+
+#ifdef __APPLE__
+    glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE);
+#endif
+
+    // glfw window creation
+    GLFWwindow* window = glfwCreateWindow(SCR_WIDTH, SCR_HEIGHT, "LearnOpenGL", NULL, NULL);
+    if (window == NULL)
+    {
+        std::cout << "Failed to create GLFW window" << std::endl;
+        glfwTerminate();
+        return -1;
+    }
+    glfwMakeContextCurrent(window);
+    glfwSetFramebufferSizeCallback(window, framebuffer_size_callback);
+
+    // glad: load all OpenGL function pointers
+    // gladLoadGLLoader((GLADloadproc)glfwGetProcAddress);
+    // if (!gladLoadGLLoader((GLADloadproc)glfwGetProcAddress))
+    // {
+    //     std::cout << "Failed to initialize GLAD" << std::endl;
+    //     return -1;
+    // }
+
+    // render loop
+    while (!glfwWindowShouldClose(window))
+    {
+        // input
+        processInput(window);
+
+        // render
+        glClearColor(0.2f, 0.3f, 0.3f, 1.0f);
+        glClear(GL_COLOR_BUFFER_BIT);
+
+        // glfw: swap buffers and poll IO events (keys pressed/released, mouse moved etc.)
+        glfwSwapBuffers(window);
+        glfwPollEvents();
+    }
+
+    // glfw: terminate, clearing all previously allocated GLFW resources.
+    glfwTerminate();
+    return 0;
+}
+
+// process all input: query GLFW whether relevant keys are pressed/released this frame and react accordingly
+void processInput(GLFWwindow *window)
+{
+    if (glfwGetKey(window, GLFW_KEY_ESCAPE) == GLFW_PRESS)
+        glfwSetWindowShouldClose(window, true);
+}
+
+// glfw: whenever the window size changed (by OS or user resize) this callback function executes
+void framebuffer_size_callback(GLFWwindow* window, int width, int height)
+{
+    // make sure the viewport matches the new window dimensions; note that width and 
+    // height will be significantly larger than specified on retina displays.
+    glViewport(0, 0, width, height);
+}
+""")
--- a/tests/ra_aid/test_ciayn_agent.py
+++ b/tests/ra_aid/test_ciayn_agent.py
@ -263,6 +263,24 @@ class TestFunctionCallValidation:
        """Test function calls spanning multiple lines."""
        assert not validate_function_call_pattern(test_input)

+    def test_triple_quoted_string_function_calls(self):
+        """Test function calls with triple-quoted strings."""
+        import os
+        import glob
+        
+        # Valid test cases
+        test_files = sorted(glob.glob("/home/user/workspace/ra-aid/tests/data/test_case_*.txt"))
+        for test_file in test_files:
+            with open(test_file, "r") as f:
+                test_case = f.read().strip()
+                assert not validate_function_call_pattern(test_case), f"Failed on valid case: {os.path.basename(test_file)}"
+                
+        # Invalid test cases
+        invalid_files = sorted(glob.glob("/home/user/workspace/ra-aid/tests/data/invalid_case_*.txt"))
+        for invalid_file in invalid_files:
+            with open(invalid_file, "r") as f:
+                invalid_case = f.read().strip()
+                assert validate_function_call_pattern(invalid_case), f"Should fail on invalid case: {os.path.basename(invalid_file)}"

 class TestCiaynAgentNewMethods(unittest.TestCase):
    pass
				`@ -0,0 +1 @@`
				`unterminated_quotes("""This is missing the closing quotes)`