ciayn fixes

2025-03-04 09:20:46 -05:00 · 2025-03-04 09:20:46 -05:00 · a0775e3792
parent fb030e9049
commit a0775e3792
5 changed files with 63 additions and 53 deletions
--- a/ra_aid/agent_backends/ciayn_agent.py
+++ b/ra_aid/agent_backends/ciayn_agent.py
@ -18,6 +18,7 @@ from ra_aid.prompts.ciayn_prompts import CIAYN_AGENT_SYSTEM_PROMPT, CIAYN_AGENT_
 from ra_aid.tools.expert import get_model
 from ra_aid.tools.reflection import get_function_info
 from ra_aid.console.output import cpm
+from ra_aid.console.formatting import print_warning, print_error

 logger = get_logger(__name__)

@ -327,7 +328,7 @@ class CiaynAgent:
                                
                                # If this fingerprint matches the last tool call, reject it
                                if current_call == self.last_tool_call:
-                                    logger.warning(f"Detected repeat call of {tool_name} with the same parameters.")
+                                    logger.info(f"Detected repeat call of {tool_name} with the same parameters.")
                                    result = f"Repeat calls of {tool_name} with the same parameters are not allowed. You must try something different!"
                                    results.append(result)
                                    
@ -419,7 +420,7 @@ class CiaynAgent:
                        
                        # If this fingerprint matches the last tool call, reject it
                        if current_call == self.last_tool_call:
-                            logger.warning(f"Detected repeat call of {tool_name} with the same parameters.")
+                            logger.info(f"Detected repeat call of {tool_name} with the same parameters.")
                            return f"Repeat calls of {tool_name} with the same parameters are not allowed. You must try something different!"
                        
                        # Update last tool call fingerprint for next comparison
@ -435,7 +436,8 @@ class CiaynAgent:
        except Exception as e:
            error_msg = f"Error: {str(e)} \n Could not execute code: {code}"
            tool_name = self.extract_tool_name(code)
-            logger.warning(f"Tool execution failed for `{tool_name}`: {str(e)}")
+            logger.info(f"Tool execution failed for `{tool_name}`: {str(e)}")
+            print_warning(f"Tool execution failed for `{tool_name}`:\nError: {str(e)}\n\nCode:\n\n````\n{code}\n````", title="Tool Error")
            raise ToolExecutionError(
                error_msg, base_message=msg, tool_name=tool_name
            ) from e
@ -467,7 +469,8 @@ class CiaynAgent:

        if not fallback_response:
            self.chat_history.append(err_msg)
-            logger.warning(f"Tool fallback was attempted but did not succeed. Original error: {str(e)}")
+            logger.info(f"Tool fallback was attempted but did not succeed. Original error: {str(e)}")
+            print_warning(f"Tool fallback was attempted but did not succeed. Original error: {str(e)}", title="Fallback Failed")
            return ""

        self.chat_history.append(self.fallback_fixed_msg)
@ -566,7 +569,8 @@ class CiaynAgent:
        pattern = r"([\w_\-]+)\((.*?)\)"
        matches = re.findall(pattern, response, re.DOTALL)
        if len(matches) == 0:
-            logger.warning("Failed to extract a valid tool call from the model's response.")
+            logger.info("Failed to extract a valid tool call from the model's response.")
+            print_warning("Failed to extract a valid tool call from the model's response.", title="Extraction Failed")
            raise ToolExecutionError("Failed to extract tool call")
        ma = matches[0][0].strip()
        mb = matches[0][1].strip().replace("\n", " ")
@ -593,9 +597,11 @@ class CiaynAgent:
            # Check if the response is empty or doesn't contain a valid tool call
            if not response.content or not response.content.strip():
                empty_response_count += 1
-                logger.warning(f"Model returned empty response (count: {empty_response_count})")
+                logger.info(f"Model returned empty response (count: {empty_response_count})")
                
-                logger.warning(f"The model returned an empty response (attempt {empty_response_count} of {max_empty_responses}). Requesting the model to make a valid tool call.")
+                warning_message = f"The model returned an empty response (attempt {empty_response_count} of {max_empty_responses}). Requesting the model to make a valid tool call."
+                logger.info(warning_message)
+                print_warning(warning_message, title="Empty Response")
                
                if empty_response_count >= max_empty_responses:
                    # If we've had too many empty responses, raise an error to break the loop
@ -604,7 +610,9 @@ class CiaynAgent:
                    mark_agent_crashed(crash_message)
                    logger.error(crash_message)
                    
-                    logger.error("The agent has crashed after multiple failed attempts to generate a valid tool call.")
+                    error_message = "The agent has crashed after multiple failed attempts to generate a valid tool call."
+                    logger.error(error_message)
+                    print_error(error_message)
                    
                    yield self._create_error_chunk(crash_message)
                    return
@ -624,7 +632,7 @@ class CiaynAgent:
                yield {}

            except ToolExecutionError as e:
-                logger.warning(f"Tool execution error: {str(e)}. Attempting fallback...")
+                logger.info(f"Tool execution error: {str(e)}. Attempting fallback...")
                fallback_response = self.fallback_handler.handle_failure(
                    e, self, self.chat_history
                )
--- a/ra_aid/console/formatting.py
+++ b/ra_aid/console/formatting.py
@ -53,6 +53,18 @@ def print_error(message: str) -> None:
    console.print(Panel(Markdown(message), title="Error", border_style="red bold"))


+def print_warning(message: str, title: str = "Warning") -> None:
+    """Print a warning message in an amber-bordered panel with warning emoji.
+    
+    Uses a text-only title to prevent console formatting issues.
+
+    Args:
+        message: The warning message to display (supports Markdown formatting)
+        title: The title for the panel, defaults to "Warning"
+    """
+    console.print(Panel(Markdown(message), title=title, border_style="yellow bold"))
+
+
 def print_interrupt(message: str) -> None:
    """Print an interrupt message in a yellow-bordered panel with stop emoji.

--- a/ra_aid/prompts/ciayn_prompts.py
+++ b/ra_aid/prompts/ciayn_prompts.py
@ -70,11 +70,12 @@ You typically don't want to keep calling the same function over and over with th
  - For file content, the entire content must be inside ONE triple-quoted string
  - If you are calling a function with a dict argument, and one part of the dict is multiline, use \"\"\"

- Example of correct put_complete_file_contents format:
-  put_complete_file_contents("/path/to/file.py", \"\"\"
+<example of correct put_complete_file_contents format>
+  put_complete_file_contents('/path/to/file.py', '''
 def example_function():
    print("Hello world")
-\"\"\")
+''')
+</example of correct put_complete_file_contents format>

 </function call guidelines>

@ -88,28 +89,15 @@ PERFORMING WELL AS AN EFFICIENT YET COMPLETE AGENT WILL HELP MY CAREER.

 <critical rules>
 1. YOU MUST ALWAYS CALL A FUNCTION - NEVER RETURN EMPTY TEXT OR PLAIN TEXT
-2. ALWAYS OUTPUT EXACTLY ONE VALID FUNCTION CALL AS YOUR RESPONSE (except for bundleable tools which can have multiple calls)
+2. ALWAYS OUTPUT EXACTLY ONE VALID FUNCTION CALL AS YOUR RESPONSE
 3. NEVER TERMINATE YOUR RESPONSE WITHOUT CALLING A FUNCTION
 4. WHEN USING put_complete_file_contents, ALWAYS PUT THE ENTIRE FILE CONTENT INSIDE ONE TRIPLE-QUOTED STRING
+5. IF YOU EMIT CODE USING emit_key_snippet, WATCH OUT FOR PROPERLY ESCAPING QUOTES, E.G. TRIPLE QUOTES SHOULD HAVE ONE BACKSLASH IN FRONT OF EACH QUOTE.
 </critical rules>

-<bundled tools format>
-When you bundle multiple tool calls in one response, you'll receive results in the following format:
-
-```
-<result-abc123>
-First tool result content
-</result-abc123>
-
-<result-def456>
-Second tool result content
-</result-def456>
-```
-
-Each result will have a unique random ID tag, and the order of results will match the order of your tool calls.
-</bundled tools format>
-
 DO NOT CLAIM YOU ARE FINISHED UNTIL YOU ACTUALLY ARE!
+ALWAYS PREFER SINGLE QUOTES IN YOUR TOOL CALLING CODE!
+PROPERLY ESCAPE NESTED QUOTES!
 Output **ONLY THE CODE** and **NO MARKDOWN BACKTICKS**
 """

@ -125,37 +113,31 @@ YOU MUST ALWAYS CALL A FUNCTION - NEVER RETURN EMPTY TEXT
 <multiline content reminder>
 When using put_complete_file_contents, ALWAYS place the entire file content within a SINGLE triple-quoted string:

-CORRECT:   put_complete_file_contents("/path/to/file.py", \"\"\"
+CORRECT:   put_complete_file_contents('/path/to/file.py', '''
 def main():
    print("Hello")
-\"\"\")
+''')
 </multiline content reminder>

 --- EXAMPLE GOOD OUTPUTS ---

 <example good output>
-request_research_and_implementation(\"\"\"Example query.\"\"\")
+request_research_and_implementation('''Example query.''')
 </example good output>

 <example good output>
-run_programming_task(\"\"\"# Example Programming Task\"\"\")
+run_programming_task('''# Example Programming Task''')
 </example good output>

 <example good output>
-put_complete_file_contents("/path/to/file.py", \"\"\"def example_function():
+put_complete_file_contents("/path/to/file.py", '''def example_function():
    print("This is a multi-line example")
    for i in range(10):
        print("Line " + str(i))
    return True
-\"\"\")
+''')
 </example good output>

-<example bundled output>
-emit_key_facts(["Fact 1", "Fact 2"])
-emit_expert_context("Important context")
-ask_expert("What does this mean?")
-</example bundled output>
-
 {last_result_section}
 """

@ -168,17 +150,10 @@ Remember: ALWAYS respond with a single line of Python code that calls a function

 IMPORTANT: For put_complete_file_contents, make sure to include the entire file content inside a SINGLE triple-quoted string:

-CORRECT:   put_complete_file_contents("/path/to/file.py", \"\"\"def main():
+CORRECT:   put_complete_file_contents('/path/to/file.py', '''def main():
    print("Hello")
-\"\"\")
+''')

-NOTE: You can also bundle multiple calls to certain tools (emit_expert_context, ask_expert, emit_key_facts, emit_key_snippet, and others) in one response. When bundling tools, each result will be returned with a unique random ID tag in this format:
-
-<result-abc123>
-First tool result content
-</result-abc123>
-
-<result-def456>
-Second tool result content
-</result-def456>
+ALWAYS PREFER SINGLE QUOTES IN YOUR TOOL CALLING CODE!
+PROPERLY ESCAPE NESTED QUOTES!
 """
--- a/ra_aid/prompts/research_prompts.py
+++ b/ra_aid/prompts/research_prompts.py
@ -59,6 +59,7 @@ You must:
    Use ripgrep_search extensively to do *exhaustive* searches for all references to anything that might be changed as part of the base level task.
      Prefer to use ripgrep_search with context params rather than reading whole files in order to preserve context tokens.
    Call emit_key_facts and emit_key_snippet on key information/facts/snippets of code you discover about this project during your research. This is information you will be writing down to be able to efficiently complete work in the future, so be on the lookout for these and make it count.
+    While it is important to emit key facts and snippets, only emit ones that are truly important info about the project or this task. Do not excessively emit key facts or snippets. Be strategic about it.

 You must not:

--- a/tests/ra_aid/agent_backends/test_ciayn_tool_validation.py
+++ b/tests/ra_aid/agent_backends/test_ciayn_tool_validation.py
@ -61,3 +61,17 @@ void display() {
 """)'''
    
    assert validate_function_call_pattern(function_call) is False, "C++ code in a triple-quoted string should be a valid function call"
+
+def test_validate_function_call_with_nested_triple_quotes():
+    """Test that function calls containing triple-quoted strings with nested docstrings are correctly validated."""
+    # The exact function call from the error message
+    function_call = '''emit_key_snippet(snippet_info={
+    "filepath": "tests/ra_aid/test_llm.py",
+    "line_number": 56,
+    "snippet": """def test_initialize_expert_defaults(clean_env, mock_openai, monkeypatch):\n    """Test expert LLM initialization with explicit parameters."""\n           
+monkeypatch.setenv("EXPERT_OPENAI_API_KEY", "test-key")\n    _llm = initialize_expert_llm("openai", "o1")\n\n    mock_openai.assert_called_once_with(\n                     
+api_key="test-key",\n        model="o1",\n        reasoning_effort="high",\n        timeout=180,\n        max_retries=5,\n    )""",
+    "description": "Test case for initializing expert LLM with explicit parameters."
+})'''
+    
+    assert validate_function_call_pattern(function_call) is False, "Triple-quoted string with nested docstring should be a valid function call"