Update prompts, shell tools and add shell tests

2024-12-11 11:25:34 -05:00 · 2024-12-11 11:25:34 -05:00 · e1b04781d7
parent 3b9757061c
commit e1b04781d7
5 changed files with 133 additions and 87 deletions
--- a/README.md
+++ b/README.md
@ -24,6 +24,8 @@ RA.Aid (ReAct Aid) is a powerful AI-driven command-line tool that integrates `ai
 ⚠️ **IMPORTANT: USE AT YOUR OWN RISK** ⚠️
 - This tool **can and will** automatically execute shell commands on your system
 - Shell commands require interactive approval unless --cowboy-mode is enabled
 - The --cowboy-mode flag disables command approval and should be used with extreme caution
 - No warranty is provided, either express or implied
 - Always review the actions the agent proposes before allowing them to proceed
@ -125,6 +127,7 @@ ra-aid -m "Explain the authentication flow" --research-only
 - `-m, --message`: The task or query to be executed (required)
 - `--research-only`: Only perform research without implementation
 - `--cowboy-mode`: Skip interactive approval for shell commands
 ### Example Tasks
@ -143,6 +146,11 @@ ra-aid -m "Explain the authentication flow" --research-only
   ra-aid -m "Refactor the database connection code to use connection pooling"
   ```
 4. Non-Interactive Mode:
   ```bash
   ra-aid -m "Update all deprecated API calls" --cowboy-mode
   ```
 ### Environment Variables
 RA.Aid uses the following environment variables:
--- a/ra_aid/main.py
+++ b/ra_aid/main.py
@ -46,6 +46,11 @@ Examples:
        action='store_true',
        help='Only perform research without implementation'
    )
    parser.add_argument(
        '--cowboy-mode',
        action='store_true',
        help='Skip interactive approval for shell commands'
    )
    return parser.parse_args()
 # Create the base model
@ -240,7 +245,8 @@ def main():
            "thread_id": "abc123"
        },
        "recursion_limit": 100,
-        "research_only": args.research_only
+        "research_only": args.research_only,
        "cowboy_mode": args.cowboy_mode
    }
    # Store config in global memory for access by is_informational_query
--- a/ra_aid/prompts.py
+++ b/ra_aid/prompts.py
@ -3,9 +3,12 @@ Stage-specific prompts for the AI agent system.
 Each prompt constant uses str.format() style template substitution for variable replacement.
 The prompts guide the agent through different stages of task execution.
 """
-# Research stage prompt - guides initial codebase analysis
+These updated prompts include instructions to scale complexity:
 - For simpler requests, keep the scope minimal and avoid unnecessary complexity.
 - For more complex requests, still provide detailed planning and thorough steps.
 """
 # Research stage prompt - guides initial codebase analysis
 RESEARCH_PROMPT = """
 Objective
@ -33,8 +36,8 @@ You must not:
 Tools and Methodology
-    Use only non-recursive, targeted fuzzy find, ripgrep_search tool (which provides context), list_directory_tree tool, shell commands, etc. (use your imagination) to efficiently explore the project structure. For example:
+    Use only non-recursive, targeted fuzzy find, ripgrep_search tool (which provides context), list_directory_tree tool, shell commands, etc. (use your imagination) to efficiently explore the project structure.
-    After identifying files, you may read them to confirm their contents only if needed to understand what currently exists (for example, to confirm if a file is a documentation file or a configuration file).
+    After identifying files, you may read them to confirm their contents only if needed to understand what currently exists.
    Be meticulous: If you find a directory, explore it thoroughly. If you find files of potential relevance, record them. Make sure you do not skip any directories you discover.
    Prefer to use list_directory_tree and other tools over shell commands.
    Do not produce huge outputs from your commands. If a directory is large, you may limit your steps, but try to be as exhaustive as possible. Incrementally gather details as needed.
@ -74,12 +77,11 @@ Decision on Implementation
        If you see reasons that implementation changes will be required in the future, after documenting all findings, call request_implementation and specify why.
        If no changes are needed, simply state that no changes are required.
 Do not do any implementation or planning now. Just request it if needed.
 If there is a top-level README.md or docs/ folder, always start with that.
 """
 # Planning stage prompt - guides task breakdown and implementation planning
 # Includes a directive to scale complexity with request size.
 PLANNING_PROMPT = """Base Task:
 {base_task}
@ -94,18 +96,6 @@ Key Facts:
 Key Snippets:
 {key_snippets}
 Fact Management:
    Each fact is identified with [Fact ID: X].
    Facts may be deleted if they become outdated, irrelevant, or duplicates. 
    Use delete_key_fact with the specific Fact ID to remove unnecessary facts.
 Snippet Management:
    Each snippet is identified with [Snippet ID: X].
    Snippets include file path, line number, and source code.
    Snippets may have optional descriptions explaining their significance.
    Delete snippets with delete_key_snippet if they become outdated or irrelevant.
    Use emit_key_snippet to store important code sections needed for reference.
 Fact Management:
    Each fact is identified with [Fact ID: X].
    Facts may be deleted if they become outdated, irrelevant, or duplicates. 
@ -122,37 +112,32 @@ Guidelines:
    If you need additional input or assistance from the expert, first use emit_expert_context to provide all relevant context. Wait for the expert’s response before defining tasks in non-trivial scenarios.
    Scale the complexity of your plan:
        Individual tasks can include multiple steps, file edits, etc.
          Therefore, use as few tasks as needed, but no fewer.
          Keep tasks organized as semantic divisions of the overall work, rather than a series of steps.
    When planning the implementation:
-        Break the overall work into sub-tasks that are as detailed as possible.
+        Break the overall work into sub-tasks that are as detailed as necessary, but no more.
        Each sub-task should be clear and unambiguous, and should fully describe what needs to be done, including:
            Purpose and goals of the sub-task
            Steps required to complete it
            Any external interfaces it will integrate with
            Data models and structures it will use
            API contracts, endpoints, or protocols it requires or provides
-            Detailed testing strategies specific to the sub-task
+            Testing strategies appropriate to the complexity of that sub-task
-        Be explicit about inputs, outputs, error cases, and edge conditions.
+            You may include pseudocode, but not full code.
    For complex tasks, include:
        Sample requests and responses (if APIs are involved)
        Details on error handling and logging
        Relevant data validation rules
        Any performance, scalability, or security considerations
    After finalizing the overall approach:
        Use emit_plan to store the high-level implementation plan.
-        For each sub-task, use emit_task to store a thorough, step-by-step description.
+        For each sub-task, use emit_task to store a step-by-step description.
-            The description should be so detailed that it could be handed to another engineer who could implement it without further clarification.
+            The description should be only as detailed as warranted by the complexity of the request.
-
+    
    Only stop after all necessary tasks are fully detailed and cover the entire scope of the original request.
    Avoid unnecessary complexity, but do not omit critical details.
    Do not implement anything yet.
-
+"""
 You are an autonomous agent, not a chatbot."""
 # Research summary prompt - guides generation of research summaries
 # Remains essentially the same, but with complexity scaling if needed.
 SUMMARY_PROMPT = """
 Using only the information provided in the Research Notes and Key Facts below, write a concise and direct answer to the user's query.
@ -181,13 +166,14 @@ Snippet Management:
    Use emit_key_snippet to store important code sections needed for reference.
 Instructions:
- **Stay Within Provided Information**: Do not include any information not present in the Research Notes or Key Facts. Avoid assumptions or external knowledge.
+- **Stay Within Provided Information**: Do not include any information not present in the Research Notes or Key Facts.
- **Handle Contradictions Appropriately**: If there are contradictions in the provided information, you may take further research steps to resolve the contradiction. If you cannot, note and explain the contradictions as best as you can.
+- **Handle Contradictions Appropriately**: If contradictions exist, consider additional research or note the contradictions.
- **Maintain Focus and Brevity**: Keep your response succinct yet comprehensive and focused solely on the user's query without adding unnecessary details.
+- **Maintain Focus and Brevity**: Keep the response concise, focusing on the user's query.
- **Include technical details**: If it is a technical query or a query related to files on the filesystem, always take time to read those and include relevant snippets.
+- **Include Technical Details If Relevant**: For technical queries, reference discovered files and snippets.
 """
 # Implementation stage prompt - guides specific task implementation
 # Added instruction to adjust complexity of implementation to match request.
 IMPLEMENTATION_PROMPT = """Base-level task (for reference only):
 {base_task}
@ -204,55 +190,21 @@ Relevant Files:
 {related_files}
 Important Notes:
- You must focus solely on the given task and implement it as described.
+- Focus solely on the given task and implement it as described.
- Do not implement other tasks or deviate from the defined scope.
+- Scale the complexity of your solution to the complexity of the request. For simple requests, keep it straightforward and minimal. For complex requests, maintain the previously planned depth.
- Use the delete_key_fact tool to remove facts that become outdated, irrelevant, or duplicated.
+- Use delete_key_fact to remove facts that become outdated, irrelevant, or duplicated.
- Whenever referencing facts, use their assigned **[Fact ID: X]** format.
+- Use emit_key_snippet to manage code sections before and after modifications as needed.
- Aggressively manage code snippets throughout implementation:
+- Regularly remove outdated snippets with delete_key_snippet.
  **When to Add Snippets**
  - Capture code with emit_key_snippet:
    * Before modifying any existing code
    * When discovering related code that impacts the task
    * After implementing new code sections
    * When finding code patterns that will be modified
  **When to Remove Snippets**
  - Use delete_key_snippet with [Snippet ID: X]:
    * Immediately after modifying or replacing referenced code
    * When the snippet becomes obsolete or irrelevant
    * When newer versions of the code exist
    * When the referenced code has been deleted
  **Snippet Management Examples**
  - Adding a snippet before modification:
    emit_key_snippet with:
      filepath: "path/to/file.py"
      line_number: 10
      snippet: "[code to be modified]"
      description: "Original version before changes"
  - Removing an outdated snippet:
    delete_key_snippet with [Snippet ID: X] after the code is modified
  **Maintaining Snippet Quality**
  - Only keep snippets relevant to current or future task understanding
  - Regularly review snippets to ensure they match current codebase
  - Prioritize snippet management but don't let it block implementation progress
  - Use snippets to complement version control by highlighting key code sections
 Instructions:
 1. Review the provided base task, plan, and key facts.
 2. Implement only the specified task:
   {task}
-3. While implementing, follow these guidelines:
+3. Work incrementally, validating as you go.
-   - Work incrementally, testing and validating as you go.
+4. Update or remove any key facts that no longer apply.
-   - Update or remove any key facts that no longer apply.
+5. Do not add features not explicitly required.
-   - Do not build features not explicitly required by the task.
+6. Only create or modify files directly related to this task.
   - Only create or modify files directly related to this task.
-4. Once the task is complete, ensure all updated files are emitted.
+Once the task is complete, ensure all updated files are emitted.
-
+"""
 No other activities (such as discussing purpose, future improvements, or unrelated steps) are allowed. Stay fully focused on completing the defined implementation task.
 """
--- a/ra_aid/tools/shell.py
+++ b/ra_aid/tools/shell.py
@ -2,6 +2,8 @@ from typing import Dict, Union
 from langchain_core.tools import tool
 from rich.console import Console
 from rich.panel import Panel
 from rich.prompt import Confirm
 from ra_aid.tools.memory import _global_memory
 from ra_aid.proc.interactive import run_interactive_command
 from ra_aid.text.processing import truncate_output
@ -39,7 +41,18 @@ def run_shell_command(command: str) -> Dict[str, Union[str, int, bool]]:
    """
    # Show just the command in a simple panel
    console.print(Panel(command, title="🐚 Shell", border_style="bright_yellow"))
-        
+
    # Check if we need approval
    cowboy_mode = _global_memory.get('config', {}).get('cowboy_mode', False)
    if not cowboy_mode:
        if not Confirm.ask("Execute this command?", default=True):
            return {
                "output": "Command execution cancelled by user",
                "return_code": 1,
                "success": False
            }
    try:
        print()
        output, return_code = run_interactive_command(['/bin/bash', '-c', command])
--- a/tests/ra_aid/tools/test_shell.py
+++ b/tests/ra_aid/tools/test_shell.py
@ -0,0 +1,67 @@
 import pytest
 from unittest.mock import patch, MagicMock
 from ra_aid.tools.shell import run_shell_command
 from ra_aid.tools.memory import _global_memory
@pytest.fixture
 def mock_console():
    with patch('ra_aid.tools.shell.console') as mock:
        yield mock
@pytest.fixture
 def mock_confirm():
    with patch('ra_aid.tools.shell.Confirm') as mock:
        yield mock
@pytest.fixture
 def mock_run_interactive():
    with patch('ra_aid.tools.shell.run_interactive_command') as mock:
        mock.return_value = (b"test output", 0)
        yield mock
 def test_shell_command_cowboy_mode(mock_console, mock_confirm, mock_run_interactive):
    """Test shell command execution in cowboy mode (no approval)"""
    _global_memory['config'] = {'cowboy_mode': True}
    result = run_shell_command("echo test")
    assert result['success'] is True
    assert result['return_code'] == 0
    assert "test output" in result['output']
    mock_confirm.ask.assert_not_called()
 def test_shell_command_interactive_approved(mock_console, mock_confirm, mock_run_interactive):
    """Test shell command execution with interactive approval"""
    _global_memory['config'] = {'cowboy_mode': False}
    mock_confirm.ask.return_value = True
    result = run_shell_command("echo test")
    assert result['success'] is True
    assert result['return_code'] == 0
    assert "test output" in result['output']
    mock_confirm.ask.assert_called_once()
 def test_shell_command_interactive_rejected(mock_console, mock_confirm, mock_run_interactive):
    """Test shell command rejection in interactive mode"""
    _global_memory['config'] = {'cowboy_mode': False}
    mock_confirm.ask.return_value = False
    result = run_shell_command("echo test")
    assert result['success'] is False
    assert result['return_code'] == 1
    assert "cancelled by user" in result['output']
    mock_confirm.ask.assert_called_once()
    mock_run_interactive.assert_not_called()
 def test_shell_command_execution_error(mock_console, mock_confirm, mock_run_interactive):
    """Test handling of shell command execution errors"""
    _global_memory['config'] = {'cowboy_mode': True}
    mock_run_interactive.side_effect = Exception("Command failed")
    result = run_shell_command("invalid command")
    assert result['success'] is False
    assert result['return_code'] == 1
    assert "Command failed" in result['output']