From e1b04781d79387c9b0d6678f4ff74c0ca2329e56 Mon Sep 17 00:00:00 2001
From: AI Christianson <ai.christianson@christianson.ai>
Date: Wed, 11 Dec 2024 11:25:34 -0500
Subject: [PATCH] Update prompts, shell tools and add shell tests

---
 README.md                        |   8 ++
 ra_aid/__main__.py               |   8 +-
 ra_aid/prompts.py                | 122 ++++++++++---------------------
 ra_aid/tools/shell.py            |  15 +++-
 tests/ra_aid/tools/test_shell.py |  67 +++++++++++++++++
 5 files changed, 133 insertions(+), 87 deletions(-)
 create mode 100644 tests/ra_aid/tools/test_shell.py

diff --git a/README.md b/README.md
index 35c93af..c94ddc9 100644
--- a/README.md
+++ b/README.md
@@ -24,6 +24,8 @@ RA.Aid (ReAct Aid) is a powerful AI-driven command-line tool that integrates `ai
 
 ⚠️ **IMPORTANT: USE AT YOUR OWN RISK** ⚠️
 - This tool **can and will** automatically execute shell commands on your system
+- Shell commands require interactive approval unless --cowboy-mode is enabled
+- The --cowboy-mode flag disables command approval and should be used with extreme caution
 - No warranty is provided, either express or implied
 - Always review the actions the agent proposes before allowing them to proceed
 
@@ -125,6 +127,7 @@ ra-aid -m "Explain the authentication flow" --research-only
 
 - `-m, --message`: The task or query to be executed (required)
 - `--research-only`: Only perform research without implementation
+- `--cowboy-mode`: Skip interactive approval for shell commands
 
 ### Example Tasks
 
@@ -143,6 +146,11 @@ ra-aid -m "Explain the authentication flow" --research-only
    ra-aid -m "Refactor the database connection code to use connection pooling"
    ```
 
+4. Non-Interactive Mode:
+   ```bash
+   ra-aid -m "Update all deprecated API calls" --cowboy-mode
+   ```
+
 ### Environment Variables
 
 RA.Aid uses the following environment variables:
diff --git a/ra_aid/__main__.py b/ra_aid/__main__.py
index 2effc47..9bbad77 100644
--- a/ra_aid/__main__.py
+++ b/ra_aid/__main__.py
@@ -46,6 +46,11 @@ Examples:
         action='store_true',
         help='Only perform research without implementation'
     )
+    parser.add_argument(
+        '--cowboy-mode',
+        action='store_true',
+        help='Skip interactive approval for shell commands'
+    )
     return parser.parse_args()
 
 # Create the base model
@@ -240,7 +245,8 @@ def main():
             "thread_id": "abc123"
         },
         "recursion_limit": 100,
-        "research_only": args.research_only
+        "research_only": args.research_only,
+        "cowboy_mode": args.cowboy_mode
     }
     
     # Store config in global memory for access by is_informational_query
diff --git a/ra_aid/prompts.py b/ra_aid/prompts.py
index e7ac89d..dc969ab 100644
--- a/ra_aid/prompts.py
+++ b/ra_aid/prompts.py
@@ -3,9 +3,12 @@ Stage-specific prompts for the AI agent system.
 
 Each prompt constant uses str.format() style template substitution for variable replacement.
 The prompts guide the agent through different stages of task execution.
-"""
 
-# Research stage prompt - guides initial codebase analysis
+These updated prompts include instructions to scale complexity:
+- For simpler requests, keep the scope minimal and avoid unnecessary complexity.
+- For more complex requests, still provide detailed planning and thorough steps.
+"""
+ 
 # Research stage prompt - guides initial codebase analysis
 RESEARCH_PROMPT = """
 Objective
@@ -33,8 +36,8 @@ You must not:
 
 Tools and Methodology
 
-    Use only non-recursive, targeted fuzzy find, ripgrep_search tool (which provides context), list_directory_tree tool, shell commands, etc. (use your imagination) to efficiently explore the project structure. For example:
-    After identifying files, you may read them to confirm their contents only if needed to understand what currently exists (for example, to confirm if a file is a documentation file or a configuration file).
+    Use only non-recursive, targeted fuzzy find, ripgrep_search tool (which provides context), list_directory_tree tool, shell commands, etc. (use your imagination) to efficiently explore the project structure.
+    After identifying files, you may read them to confirm their contents only if needed to understand what currently exists.
     Be meticulous: If you find a directory, explore it thoroughly. If you find files of potential relevance, record them. Make sure you do not skip any directories you discover.
     Prefer to use list_directory_tree and other tools over shell commands.
     Do not produce huge outputs from your commands. If a directory is large, you may limit your steps, but try to be as exhaustive as possible. Incrementally gather details as needed.
@@ -74,12 +77,11 @@ Decision on Implementation
         If you see reasons that implementation changes will be required in the future, after documenting all findings, call request_implementation and specify why.
         If no changes are needed, simply state that no changes are required.
 
-Do not do any implementation or planning now. Just request it if needed.
-
 If there is a top-level README.md or docs/ folder, always start with that.
 """
 
 # Planning stage prompt - guides task breakdown and implementation planning
+# Includes a directive to scale complexity with request size.
 PLANNING_PROMPT = """Base Task:
 {base_task}
 
@@ -94,18 +96,6 @@ Key Facts:
 Key Snippets:
 {key_snippets}
 
-Fact Management:
-    Each fact is identified with [Fact ID: X].
-    Facts may be deleted if they become outdated, irrelevant, or duplicates. 
-    Use delete_key_fact with the specific Fact ID to remove unnecessary facts.
-
-Snippet Management:
-    Each snippet is identified with [Snippet ID: X].
-    Snippets include file path, line number, and source code.
-    Snippets may have optional descriptions explaining their significance.
-    Delete snippets with delete_key_snippet if they become outdated or irrelevant.
-    Use emit_key_snippet to store important code sections needed for reference.
-
 Fact Management:
     Each fact is identified with [Fact ID: X].
     Facts may be deleted if they become outdated, irrelevant, or duplicates. 
@@ -122,37 +112,32 @@ Guidelines:
 
     If you need additional input or assistance from the expert, first use emit_expert_context to provide all relevant context. Wait for the expert’s response before defining tasks in non-trivial scenarios.
 
+    Scale the complexity of your plan:
+        Individual tasks can include multiple steps, file edits, etc.
+          Therefore, use as few tasks as needed, but no fewer.
+          Keep tasks organized as semantic divisions of the overall work, rather than a series of steps.
+
     When planning the implementation:
-        Break the overall work into sub-tasks that are as detailed as possible.
+        Break the overall work into sub-tasks that are as detailed as necessary, but no more.
         Each sub-task should be clear and unambiguous, and should fully describe what needs to be done, including:
             Purpose and goals of the sub-task
             Steps required to complete it
             Any external interfaces it will integrate with
             Data models and structures it will use
             API contracts, endpoints, or protocols it requires or provides
-            Detailed testing strategies specific to the sub-task
-        Be explicit about inputs, outputs, error cases, and edge conditions.
-
-    For complex tasks, include:
-        Sample requests and responses (if APIs are involved)
-        Details on error handling and logging
-        Relevant data validation rules
-        Any performance, scalability, or security considerations
+            Testing strategies appropriate to the complexity of that sub-task
+            You may include pseudocode, but not full code.
 
     After finalizing the overall approach:
         Use emit_plan to store the high-level implementation plan.
-        For each sub-task, use emit_task to store a thorough, step-by-step description.
-            The description should be so detailed that it could be handed to another engineer who could implement it without further clarification.
-
-    Only stop after all necessary tasks are fully detailed and cover the entire scope of the original request.
-
-    Avoid unnecessary complexity, but do not omit critical details.
-
+        For each sub-task, use emit_task to store a step-by-step description.
+            The description should be only as detailed as warranted by the complexity of the request.
+    
     Do not implement anything yet.
-
-You are an autonomous agent, not a chatbot."""
+"""
 
 # Research summary prompt - guides generation of research summaries
+# Remains essentially the same, but with complexity scaling if needed.
 SUMMARY_PROMPT = """
 Using only the information provided in the Research Notes and Key Facts below, write a concise and direct answer to the user's query.
 
@@ -181,13 +166,14 @@ Snippet Management:
     Use emit_key_snippet to store important code sections needed for reference.
 
 Instructions:
-- **Stay Within Provided Information**: Do not include any information not present in the Research Notes or Key Facts. Avoid assumptions or external knowledge.
-- **Handle Contradictions Appropriately**: If there are contradictions in the provided information, you may take further research steps to resolve the contradiction. If you cannot, note and explain the contradictions as best as you can.
-- **Maintain Focus and Brevity**: Keep your response succinct yet comprehensive and focused solely on the user's query without adding unnecessary details.
-- **Include technical details**: If it is a technical query or a query related to files on the filesystem, always take time to read those and include relevant snippets.
+- **Stay Within Provided Information**: Do not include any information not present in the Research Notes or Key Facts.
+- **Handle Contradictions Appropriately**: If contradictions exist, consider additional research or note the contradictions.
+- **Maintain Focus and Brevity**: Keep the response concise, focusing on the user's query.
+- **Include Technical Details If Relevant**: For technical queries, reference discovered files and snippets.
 """
 
 # Implementation stage prompt - guides specific task implementation
+# Added instruction to adjust complexity of implementation to match request.
 IMPLEMENTATION_PROMPT = """Base-level task (for reference only):
 {base_task}
 
@@ -204,55 +190,21 @@ Relevant Files:
 {related_files}
 
 Important Notes:
-- You must focus solely on the given task and implement it as described.
-- Do not implement other tasks or deviate from the defined scope.
-- Use the delete_key_fact tool to remove facts that become outdated, irrelevant, or duplicated.
-- Whenever referencing facts, use their assigned **[Fact ID: X]** format.
-- Aggressively manage code snippets throughout implementation:
-
-  **When to Add Snippets**
-  - Capture code with emit_key_snippet:
-    * Before modifying any existing code
-    * When discovering related code that impacts the task
-    * After implementing new code sections
-    * When finding code patterns that will be modified
-
-  **When to Remove Snippets**
-  - Use delete_key_snippet with [Snippet ID: X]:
-    * Immediately after modifying or replacing referenced code
-    * When the snippet becomes obsolete or irrelevant
-    * When newer versions of the code exist
-    * When the referenced code has been deleted
-
-  **Snippet Management Examples**
-  - Adding a snippet before modification:
-    emit_key_snippet with:
-      filepath: "path/to/file.py"
-      line_number: 10
-      snippet: "[code to be modified]"
-      description: "Original version before changes"
-  
-  - Removing an outdated snippet:
-    delete_key_snippet with [Snippet ID: X] after the code is modified
-
-  **Maintaining Snippet Quality**
-  - Only keep snippets relevant to current or future task understanding
-  - Regularly review snippets to ensure they match current codebase
-  - Prioritize snippet management but don't let it block implementation progress
-  - Use snippets to complement version control by highlighting key code sections
+- Focus solely on the given task and implement it as described.
+- Scale the complexity of your solution to the complexity of the request. For simple requests, keep it straightforward and minimal. For complex requests, maintain the previously planned depth.
+- Use delete_key_fact to remove facts that become outdated, irrelevant, or duplicated.
+- Use emit_key_snippet to manage code sections before and after modifications as needed.
+- Regularly remove outdated snippets with delete_key_snippet.
 
 Instructions:
 1. Review the provided base task, plan, and key facts.
 2. Implement only the specified task:
    {task}
 
-3. While implementing, follow these guidelines:
-   - Work incrementally, testing and validating as you go.
-   - Update or remove any key facts that no longer apply.
-   - Do not build features not explicitly required by the task.
-   - Only create or modify files directly related to this task.
+3. Work incrementally, validating as you go.
+4. Update or remove any key facts that no longer apply.
+5. Do not add features not explicitly required.
+6. Only create or modify files directly related to this task.
 
-4. Once the task is complete, ensure all updated files are emitted.
-
-No other activities (such as discussing purpose, future improvements, or unrelated steps) are allowed. Stay fully focused on completing the defined implementation task.
-"""
+Once the task is complete, ensure all updated files are emitted.
+"""
\ No newline at end of file
diff --git a/ra_aid/tools/shell.py b/ra_aid/tools/shell.py
index aba2afb..2dc8826 100644
--- a/ra_aid/tools/shell.py
+++ b/ra_aid/tools/shell.py
@@ -2,6 +2,8 @@ from typing import Dict, Union
 from langchain_core.tools import tool
 from rich.console import Console
 from rich.panel import Panel
+from rich.prompt import Confirm
+from ra_aid.tools.memory import _global_memory
 from ra_aid.proc.interactive import run_interactive_command
 from ra_aid.text.processing import truncate_output
 
@@ -39,7 +41,18 @@ def run_shell_command(command: str) -> Dict[str, Union[str, int, bool]]:
     """
     # Show just the command in a simple panel
     console.print(Panel(command, title="🐚 Shell", border_style="bright_yellow"))
-        
+
+    # Check if we need approval
+    cowboy_mode = _global_memory.get('config', {}).get('cowboy_mode', False)
+    
+    if not cowboy_mode:
+        if not Confirm.ask("Execute this command?", default=True):
+            return {
+                "output": "Command execution cancelled by user",
+                "return_code": 1,
+                "success": False
+            }
+    
     try:
         print()
         output, return_code = run_interactive_command(['/bin/bash', '-c', command])
diff --git a/tests/ra_aid/tools/test_shell.py b/tests/ra_aid/tools/test_shell.py
new file mode 100644
index 0000000..231b39b
--- /dev/null
+++ b/tests/ra_aid/tools/test_shell.py
@@ -0,0 +1,67 @@
+import pytest
+from unittest.mock import patch, MagicMock
+from ra_aid.tools.shell import run_shell_command
+from ra_aid.tools.memory import _global_memory
+
+@pytest.fixture
+def mock_console():
+    with patch('ra_aid.tools.shell.console') as mock:
+        yield mock
+
+@pytest.fixture
+def mock_confirm():
+    with patch('ra_aid.tools.shell.Confirm') as mock:
+        yield mock
+
+@pytest.fixture
+def mock_run_interactive():
+    with patch('ra_aid.tools.shell.run_interactive_command') as mock:
+        mock.return_value = (b"test output", 0)
+        yield mock
+
+def test_shell_command_cowboy_mode(mock_console, mock_confirm, mock_run_interactive):
+    """Test shell command execution in cowboy mode (no approval)"""
+    _global_memory['config'] = {'cowboy_mode': True}
+    
+    result = run_shell_command("echo test")
+    
+    assert result['success'] is True
+    assert result['return_code'] == 0
+    assert "test output" in result['output']
+    mock_confirm.ask.assert_not_called()
+
+def test_shell_command_interactive_approved(mock_console, mock_confirm, mock_run_interactive):
+    """Test shell command execution with interactive approval"""
+    _global_memory['config'] = {'cowboy_mode': False}
+    mock_confirm.ask.return_value = True
+    
+    result = run_shell_command("echo test")
+    
+    assert result['success'] is True
+    assert result['return_code'] == 0
+    assert "test output" in result['output']
+    mock_confirm.ask.assert_called_once()
+
+def test_shell_command_interactive_rejected(mock_console, mock_confirm, mock_run_interactive):
+    """Test shell command rejection in interactive mode"""
+    _global_memory['config'] = {'cowboy_mode': False}
+    mock_confirm.ask.return_value = False
+    
+    result = run_shell_command("echo test")
+    
+    assert result['success'] is False
+    assert result['return_code'] == 1
+    assert "cancelled by user" in result['output']
+    mock_confirm.ask.assert_called_once()
+    mock_run_interactive.assert_not_called()
+
+def test_shell_command_execution_error(mock_console, mock_confirm, mock_run_interactive):
+    """Test handling of shell command execution errors"""
+    _global_memory['config'] = {'cowboy_mode': True}
+    mock_run_interactive.side_effect = Exception("Command failed")
+    
+    result = run_shell_command("invalid command")
+    
+    assert result['success'] is False
+    assert result['return_code'] == 1
+    assert "Command failed" in result['output']