Update prompts, shell tools and add shell tests

2024-12-11 11:25:34 -05:00 · 2024-12-11 11:25:34 -05:00 · e1b04781d7
parent 3b9757061c
commit e1b04781d7
5 changed files with 133 additions and 87 deletions
--- a/README.md
+++ b/README.md
@ -24,6 +24,8 @@ RA.Aid (ReAct Aid) is a powerful AI-driven command-line tool that integrates `ai

 ⚠️ **IMPORTANT: USE AT YOUR OWN RISK** ⚠️
 - This tool **can and will** automatically execute shell commands on your system
+- Shell commands require interactive approval unless --cowboy-mode is enabled
+- The --cowboy-mode flag disables command approval and should be used with extreme caution
 - No warranty is provided, either express or implied
 - Always review the actions the agent proposes before allowing them to proceed

@ -125,6 +127,7 @@ ra-aid -m "Explain the authentication flow" --research-only

 - `-m, --message`: The task or query to be executed (required)
 - `--research-only`: Only perform research without implementation
+- `--cowboy-mode`: Skip interactive approval for shell commands

 ### Example Tasks

@ -143,6 +146,11 @@ ra-aid -m "Explain the authentication flow" --research-only
   ra-aid -m "Refactor the database connection code to use connection pooling"
   ```

+4. Non-Interactive Mode:
+   ```bash
+   ra-aid -m "Update all deprecated API calls" --cowboy-mode
+   ```
+
 ### Environment Variables

 RA.Aid uses the following environment variables:
--- a/ra_aid/main.py
+++ b/ra_aid/main.py
@ -46,6 +46,11 @@ Examples:
        action='store_true',
        help='Only perform research without implementation'
    )
+    parser.add_argument(
+        '--cowboy-mode',
+        action='store_true',
+        help='Skip interactive approval for shell commands'
+    )
    return parser.parse_args()

 # Create the base model
@ -240,7 +245,8 @@ def main():
            "thread_id": "abc123"
        },
        "recursion_limit": 100,
-        "research_only": args.research_only
+        "research_only": args.research_only,
+        "cowboy_mode": args.cowboy_mode
    }
    
    # Store config in global memory for access by is_informational_query
--- a/ra_aid/prompts.py
+++ b/ra_aid/prompts.py
@ -3,9 +3,12 @@ Stage-specific prompts for the AI agent system.

 Each prompt constant uses str.format() style template substitution for variable replacement.
 The prompts guide the agent through different stages of task execution.
-"""

-# Research stage prompt - guides initial codebase analysis
+These updated prompts include instructions to scale complexity:
+- For simpler requests, keep the scope minimal and avoid unnecessary complexity.
+- For more complex requests, still provide detailed planning and thorough steps.
+"""
+ 
 # Research stage prompt - guides initial codebase analysis
 RESEARCH_PROMPT = """
 Objective
@ -33,8 +36,8 @@ You must not:

 Tools and Methodology

-    Use only non-recursive, targeted fuzzy find, ripgrep_search tool (which provides context), list_directory_tree tool, shell commands, etc. (use your imagination) to efficiently explore the project structure. For example:
-    After identifying files, you may read them to confirm their contents only if needed to understand what currently exists (for example, to confirm if a file is a documentation file or a configuration file).
+    Use only non-recursive, targeted fuzzy find, ripgrep_search tool (which provides context), list_directory_tree tool, shell commands, etc. (use your imagination) to efficiently explore the project structure.
+    After identifying files, you may read them to confirm their contents only if needed to understand what currently exists.
    Be meticulous: If you find a directory, explore it thoroughly. If you find files of potential relevance, record them. Make sure you do not skip any directories you discover.
    Prefer to use list_directory_tree and other tools over shell commands.
    Do not produce huge outputs from your commands. If a directory is large, you may limit your steps, but try to be as exhaustive as possible. Incrementally gather details as needed.
@ -74,12 +77,11 @@ Decision on Implementation
        If you see reasons that implementation changes will be required in the future, after documenting all findings, call request_implementation and specify why.
        If no changes are needed, simply state that no changes are required.

-Do not do any implementation or planning now. Just request it if needed.
-
 If there is a top-level README.md or docs/ folder, always start with that.
 """

 # Planning stage prompt - guides task breakdown and implementation planning
+# Includes a directive to scale complexity with request size.
 PLANNING_PROMPT = """Base Task:
 {base_task}

@ -94,18 +96,6 @@ Key Facts:
 Key Snippets:
 {key_snippets}

-Fact Management:
-    Each fact is identified with [Fact ID: X].
-    Facts may be deleted if they become outdated, irrelevant, or duplicates. 
-    Use delete_key_fact with the specific Fact ID to remove unnecessary facts.
-
-Snippet Management:
-    Each snippet is identified with [Snippet ID: X].
-    Snippets include file path, line number, and source code.
-    Snippets may have optional descriptions explaining their significance.
-    Delete snippets with delete_key_snippet if they become outdated or irrelevant.
-    Use emit_key_snippet to store important code sections needed for reference.
-
 Fact Management:
    Each fact is identified with [Fact ID: X].
    Facts may be deleted if they become outdated, irrelevant, or duplicates. 
@ -122,37 +112,32 @@ Guidelines:

    If you need additional input or assistance from the expert, first use emit_expert_context to provide all relevant context. Wait for the expert’s response before defining tasks in non-trivial scenarios.

+    Scale the complexity of your plan:
+        Individual tasks can include multiple steps, file edits, etc.
+          Therefore, use as few tasks as needed, but no fewer.
+          Keep tasks organized as semantic divisions of the overall work, rather than a series of steps.
+
    When planning the implementation:
-        Break the overall work into sub-tasks that are as detailed as possible.
+        Break the overall work into sub-tasks that are as detailed as necessary, but no more.
        Each sub-task should be clear and unambiguous, and should fully describe what needs to be done, including:
            Purpose and goals of the sub-task
            Steps required to complete it
            Any external interfaces it will integrate with
            Data models and structures it will use
            API contracts, endpoints, or protocols it requires or provides
-            Detailed testing strategies specific to the sub-task
-        Be explicit about inputs, outputs, error cases, and edge conditions.
-
-    For complex tasks, include:
-        Sample requests and responses (if APIs are involved)
-        Details on error handling and logging
-        Relevant data validation rules
-        Any performance, scalability, or security considerations
+            Testing strategies appropriate to the complexity of that sub-task
+            You may include pseudocode, but not full code.

    After finalizing the overall approach:
        Use emit_plan to store the high-level implementation plan.
-        For each sub-task, use emit_task to store a thorough, step-by-step description.
-            The description should be so detailed that it could be handed to another engineer who could implement it without further clarification.
-
-    Only stop after all necessary tasks are fully detailed and cover the entire scope of the original request.
-
-    Avoid unnecessary complexity, but do not omit critical details.
-
+        For each sub-task, use emit_task to store a step-by-step description.
+            The description should be only as detailed as warranted by the complexity of the request.
+    
    Do not implement anything yet.
-
-You are an autonomous agent, not a chatbot."""
+"""

 # Research summary prompt - guides generation of research summaries
+# Remains essentially the same, but with complexity scaling if needed.
 SUMMARY_PROMPT = """
 Using only the information provided in the Research Notes and Key Facts below, write a concise and direct answer to the user's query.

@ -181,13 +166,14 @@ Snippet Management:
    Use emit_key_snippet to store important code sections needed for reference.

 Instructions:
- **Stay Within Provided Information**: Do not include any information not present in the Research Notes or Key Facts. Avoid assumptions or external knowledge.
- **Handle Contradictions Appropriately**: If there are contradictions in the provided information, you may take further research steps to resolve the contradiction. If you cannot, note and explain the contradictions as best as you can.
- **Maintain Focus and Brevity**: Keep your response succinct yet comprehensive and focused solely on the user's query without adding unnecessary details.
- **Include technical details**: If it is a technical query or a query related to files on the filesystem, always take time to read those and include relevant snippets.
+- **Stay Within Provided Information**: Do not include any information not present in the Research Notes or Key Facts.
+- **Handle Contradictions Appropriately**: If contradictions exist, consider additional research or note the contradictions.
+- **Maintain Focus and Brevity**: Keep the response concise, focusing on the user's query.
+- **Include Technical Details If Relevant**: For technical queries, reference discovered files and snippets.
 """

 # Implementation stage prompt - guides specific task implementation
+# Added instruction to adjust complexity of implementation to match request.
 IMPLEMENTATION_PROMPT = """Base-level task (for reference only):
 {base_task}

@ -204,55 +190,21 @@ Relevant Files:
 {related_files}

 Important Notes:
- You must focus solely on the given task and implement it as described.
- Do not implement other tasks or deviate from the defined scope.
- Use the delete_key_fact tool to remove facts that become outdated, irrelevant, or duplicated.
- Whenever referencing facts, use their assigned **[Fact ID: X]** format.
- Aggressively manage code snippets throughout implementation:
-
-  **When to Add Snippets**
-  - Capture code with emit_key_snippet:
-    * Before modifying any existing code
-    * When discovering related code that impacts the task
-    * After implementing new code sections
-    * When finding code patterns that will be modified
-
-  **When to Remove Snippets**
-  - Use delete_key_snippet with [Snippet ID: X]:
-    * Immediately after modifying or replacing referenced code
-    * When the snippet becomes obsolete or irrelevant
-    * When newer versions of the code exist
-    * When the referenced code has been deleted
-
-  **Snippet Management Examples**
-  - Adding a snippet before modification:
-    emit_key_snippet with:
-      filepath: "path/to/file.py"
-      line_number: 10
-      snippet: "[code to be modified]"
-      description: "Original version before changes"
-  
-  - Removing an outdated snippet:
-    delete_key_snippet with [Snippet ID: X] after the code is modified
-
-  **Maintaining Snippet Quality**
-  - Only keep snippets relevant to current or future task understanding
-  - Regularly review snippets to ensure they match current codebase
-  - Prioritize snippet management but don't let it block implementation progress
-  - Use snippets to complement version control by highlighting key code sections
+- Focus solely on the given task and implement it as described.
+- Scale the complexity of your solution to the complexity of the request. For simple requests, keep it straightforward and minimal. For complex requests, maintain the previously planned depth.
+- Use delete_key_fact to remove facts that become outdated, irrelevant, or duplicated.
+- Use emit_key_snippet to manage code sections before and after modifications as needed.
+- Regularly remove outdated snippets with delete_key_snippet.

 Instructions:
 1. Review the provided base task, plan, and key facts.
 2. Implement only the specified task:
   {task}

-3. While implementing, follow these guidelines:
-   - Work incrementally, testing and validating as you go.
-   - Update or remove any key facts that no longer apply.
-   - Do not build features not explicitly required by the task.
-   - Only create or modify files directly related to this task.
+3. Work incrementally, validating as you go.
+4. Update or remove any key facts that no longer apply.
+5. Do not add features not explicitly required.
+6. Only create or modify files directly related to this task.

-4. Once the task is complete, ensure all updated files are emitted.
-
-No other activities (such as discussing purpose, future improvements, or unrelated steps) are allowed. Stay fully focused on completing the defined implementation task.
-"""
+Once the task is complete, ensure all updated files are emitted.
+"""
--- a/ra_aid/tools/shell.py
+++ b/ra_aid/tools/shell.py
@ -2,6 +2,8 @@ from typing import Dict, Union
 from langchain_core.tools import tool
 from rich.console import Console
 from rich.panel import Panel
+from rich.prompt import Confirm
+from ra_aid.tools.memory import _global_memory
 from ra_aid.proc.interactive import run_interactive_command
 from ra_aid.text.processing import truncate_output

@ -39,7 +41,18 @@ def run_shell_command(command: str) -> Dict[str, Union[str, int, bool]]:
    """
    # Show just the command in a simple panel
    console.print(Panel(command, title="🐚 Shell", border_style="bright_yellow"))
-        
+
+    # Check if we need approval
+    cowboy_mode = _global_memory.get('config', {}).get('cowboy_mode', False)
+    
+    if not cowboy_mode:
+        if not Confirm.ask("Execute this command?", default=True):
+            return {
+                "output": "Command execution cancelled by user",
+                "return_code": 1,
+                "success": False
+            }
+    
    try:
        print()
        output, return_code = run_interactive_command(['/bin/bash', '-c', command])
--- a/tests/ra_aid/tools/test_shell.py
+++ b/tests/ra_aid/tools/test_shell.py
@ -0,0 +1,67 @@
+import pytest
+from unittest.mock import patch, MagicMock
+from ra_aid.tools.shell import run_shell_command
+from ra_aid.tools.memory import _global_memory
+
+@pytest.fixture
+def mock_console():
+    with patch('ra_aid.tools.shell.console') as mock:
+        yield mock
+
+@pytest.fixture
+def mock_confirm():
+    with patch('ra_aid.tools.shell.Confirm') as mock:
+        yield mock
+
+@pytest.fixture
+def mock_run_interactive():
+    with patch('ra_aid.tools.shell.run_interactive_command') as mock:
+        mock.return_value = (b"test output", 0)
+        yield mock
+
+def test_shell_command_cowboy_mode(mock_console, mock_confirm, mock_run_interactive):
+    """Test shell command execution in cowboy mode (no approval)"""
+    _global_memory['config'] = {'cowboy_mode': True}
+    
+    result = run_shell_command("echo test")
+    
+    assert result['success'] is True
+    assert result['return_code'] == 0
+    assert "test output" in result['output']
+    mock_confirm.ask.assert_not_called()
+
+def test_shell_command_interactive_approved(mock_console, mock_confirm, mock_run_interactive):
+    """Test shell command execution with interactive approval"""
+    _global_memory['config'] = {'cowboy_mode': False}
+    mock_confirm.ask.return_value = True
+    
+    result = run_shell_command("echo test")
+    
+    assert result['success'] is True
+    assert result['return_code'] == 0
+    assert "test output" in result['output']
+    mock_confirm.ask.assert_called_once()
+
+def test_shell_command_interactive_rejected(mock_console, mock_confirm, mock_run_interactive):
+    """Test shell command rejection in interactive mode"""
+    _global_memory['config'] = {'cowboy_mode': False}
+    mock_confirm.ask.return_value = False
+    
+    result = run_shell_command("echo test")
+    
+    assert result['success'] is False
+    assert result['return_code'] == 1
+    assert "cancelled by user" in result['output']
+    mock_confirm.ask.assert_called_once()
+    mock_run_interactive.assert_not_called()
+
+def test_shell_command_execution_error(mock_console, mock_confirm, mock_run_interactive):
+    """Test handling of shell command execution errors"""
+    _global_memory['config'] = {'cowboy_mode': True}
+    mock_run_interactive.side_effect = Exception("Command failed")
+    
+    result = run_shell_command("invalid command")
+    
+    assert result['success'] is False
+    assert result['return_code'] == 1
+    assert "Command failed" in result['output']