From e1b04781d79387c9b0d6678f4ff74c0ca2329e56 Mon Sep 17 00:00:00 2001 From: AI Christianson Date: Wed, 11 Dec 2024 11:25:34 -0500 Subject: [PATCH] Update prompts, shell tools and add shell tests --- README.md | 8 ++ ra_aid/__main__.py | 8 +- ra_aid/prompts.py | 122 ++++++++++--------------------- ra_aid/tools/shell.py | 15 +++- tests/ra_aid/tools/test_shell.py | 67 +++++++++++++++++ 5 files changed, 133 insertions(+), 87 deletions(-) create mode 100644 tests/ra_aid/tools/test_shell.py diff --git a/README.md b/README.md index 35c93af..c94ddc9 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,8 @@ RA.Aid (ReAct Aid) is a powerful AI-driven command-line tool that integrates `ai ⚠️ **IMPORTANT: USE AT YOUR OWN RISK** ⚠️ - This tool **can and will** automatically execute shell commands on your system +- Shell commands require interactive approval unless --cowboy-mode is enabled +- The --cowboy-mode flag disables command approval and should be used with extreme caution - No warranty is provided, either express or implied - Always review the actions the agent proposes before allowing them to proceed @@ -125,6 +127,7 @@ ra-aid -m "Explain the authentication flow" --research-only - `-m, --message`: The task or query to be executed (required) - `--research-only`: Only perform research without implementation +- `--cowboy-mode`: Skip interactive approval for shell commands ### Example Tasks @@ -143,6 +146,11 @@ ra-aid -m "Explain the authentication flow" --research-only ra-aid -m "Refactor the database connection code to use connection pooling" ``` +4. Non-Interactive Mode: + ```bash + ra-aid -m "Update all deprecated API calls" --cowboy-mode + ``` + ### Environment Variables RA.Aid uses the following environment variables: diff --git a/ra_aid/__main__.py b/ra_aid/__main__.py index 2effc47..9bbad77 100644 --- a/ra_aid/__main__.py +++ b/ra_aid/__main__.py @@ -46,6 +46,11 @@ Examples: action='store_true', help='Only perform research without implementation' ) + parser.add_argument( + '--cowboy-mode', + action='store_true', + help='Skip interactive approval for shell commands' + ) return parser.parse_args() # Create the base model @@ -240,7 +245,8 @@ def main(): "thread_id": "abc123" }, "recursion_limit": 100, - "research_only": args.research_only + "research_only": args.research_only, + "cowboy_mode": args.cowboy_mode } # Store config in global memory for access by is_informational_query diff --git a/ra_aid/prompts.py b/ra_aid/prompts.py index e7ac89d..dc969ab 100644 --- a/ra_aid/prompts.py +++ b/ra_aid/prompts.py @@ -3,9 +3,12 @@ Stage-specific prompts for the AI agent system. Each prompt constant uses str.format() style template substitution for variable replacement. The prompts guide the agent through different stages of task execution. -""" -# Research stage prompt - guides initial codebase analysis +These updated prompts include instructions to scale complexity: +- For simpler requests, keep the scope minimal and avoid unnecessary complexity. +- For more complex requests, still provide detailed planning and thorough steps. +""" + # Research stage prompt - guides initial codebase analysis RESEARCH_PROMPT = """ Objective @@ -33,8 +36,8 @@ You must not: Tools and Methodology - Use only non-recursive, targeted fuzzy find, ripgrep_search tool (which provides context), list_directory_tree tool, shell commands, etc. (use your imagination) to efficiently explore the project structure. For example: - After identifying files, you may read them to confirm their contents only if needed to understand what currently exists (for example, to confirm if a file is a documentation file or a configuration file). + Use only non-recursive, targeted fuzzy find, ripgrep_search tool (which provides context), list_directory_tree tool, shell commands, etc. (use your imagination) to efficiently explore the project structure. + After identifying files, you may read them to confirm their contents only if needed to understand what currently exists. Be meticulous: If you find a directory, explore it thoroughly. If you find files of potential relevance, record them. Make sure you do not skip any directories you discover. Prefer to use list_directory_tree and other tools over shell commands. Do not produce huge outputs from your commands. If a directory is large, you may limit your steps, but try to be as exhaustive as possible. Incrementally gather details as needed. @@ -74,12 +77,11 @@ Decision on Implementation If you see reasons that implementation changes will be required in the future, after documenting all findings, call request_implementation and specify why. If no changes are needed, simply state that no changes are required. -Do not do any implementation or planning now. Just request it if needed. - If there is a top-level README.md or docs/ folder, always start with that. """ # Planning stage prompt - guides task breakdown and implementation planning +# Includes a directive to scale complexity with request size. PLANNING_PROMPT = """Base Task: {base_task} @@ -94,18 +96,6 @@ Key Facts: Key Snippets: {key_snippets} -Fact Management: - Each fact is identified with [Fact ID: X]. - Facts may be deleted if they become outdated, irrelevant, or duplicates. - Use delete_key_fact with the specific Fact ID to remove unnecessary facts. - -Snippet Management: - Each snippet is identified with [Snippet ID: X]. - Snippets include file path, line number, and source code. - Snippets may have optional descriptions explaining their significance. - Delete snippets with delete_key_snippet if they become outdated or irrelevant. - Use emit_key_snippet to store important code sections needed for reference. - Fact Management: Each fact is identified with [Fact ID: X]. Facts may be deleted if they become outdated, irrelevant, or duplicates. @@ -122,37 +112,32 @@ Guidelines: If you need additional input or assistance from the expert, first use emit_expert_context to provide all relevant context. Wait for the expert’s response before defining tasks in non-trivial scenarios. + Scale the complexity of your plan: + Individual tasks can include multiple steps, file edits, etc. + Therefore, use as few tasks as needed, but no fewer. + Keep tasks organized as semantic divisions of the overall work, rather than a series of steps. + When planning the implementation: - Break the overall work into sub-tasks that are as detailed as possible. + Break the overall work into sub-tasks that are as detailed as necessary, but no more. Each sub-task should be clear and unambiguous, and should fully describe what needs to be done, including: Purpose and goals of the sub-task Steps required to complete it Any external interfaces it will integrate with Data models and structures it will use API contracts, endpoints, or protocols it requires or provides - Detailed testing strategies specific to the sub-task - Be explicit about inputs, outputs, error cases, and edge conditions. - - For complex tasks, include: - Sample requests and responses (if APIs are involved) - Details on error handling and logging - Relevant data validation rules - Any performance, scalability, or security considerations + Testing strategies appropriate to the complexity of that sub-task + You may include pseudocode, but not full code. After finalizing the overall approach: Use emit_plan to store the high-level implementation plan. - For each sub-task, use emit_task to store a thorough, step-by-step description. - The description should be so detailed that it could be handed to another engineer who could implement it without further clarification. - - Only stop after all necessary tasks are fully detailed and cover the entire scope of the original request. - - Avoid unnecessary complexity, but do not omit critical details. - + For each sub-task, use emit_task to store a step-by-step description. + The description should be only as detailed as warranted by the complexity of the request. + Do not implement anything yet. - -You are an autonomous agent, not a chatbot.""" +""" # Research summary prompt - guides generation of research summaries +# Remains essentially the same, but with complexity scaling if needed. SUMMARY_PROMPT = """ Using only the information provided in the Research Notes and Key Facts below, write a concise and direct answer to the user's query. @@ -181,13 +166,14 @@ Snippet Management: Use emit_key_snippet to store important code sections needed for reference. Instructions: -- **Stay Within Provided Information**: Do not include any information not present in the Research Notes or Key Facts. Avoid assumptions or external knowledge. -- **Handle Contradictions Appropriately**: If there are contradictions in the provided information, you may take further research steps to resolve the contradiction. If you cannot, note and explain the contradictions as best as you can. -- **Maintain Focus and Brevity**: Keep your response succinct yet comprehensive and focused solely on the user's query without adding unnecessary details. -- **Include technical details**: If it is a technical query or a query related to files on the filesystem, always take time to read those and include relevant snippets. +- **Stay Within Provided Information**: Do not include any information not present in the Research Notes or Key Facts. +- **Handle Contradictions Appropriately**: If contradictions exist, consider additional research or note the contradictions. +- **Maintain Focus and Brevity**: Keep the response concise, focusing on the user's query. +- **Include Technical Details If Relevant**: For technical queries, reference discovered files and snippets. """ # Implementation stage prompt - guides specific task implementation +# Added instruction to adjust complexity of implementation to match request. IMPLEMENTATION_PROMPT = """Base-level task (for reference only): {base_task} @@ -204,55 +190,21 @@ Relevant Files: {related_files} Important Notes: -- You must focus solely on the given task and implement it as described. -- Do not implement other tasks or deviate from the defined scope. -- Use the delete_key_fact tool to remove facts that become outdated, irrelevant, or duplicated. -- Whenever referencing facts, use their assigned **[Fact ID: X]** format. -- Aggressively manage code snippets throughout implementation: - - **When to Add Snippets** - - Capture code with emit_key_snippet: - * Before modifying any existing code - * When discovering related code that impacts the task - * After implementing new code sections - * When finding code patterns that will be modified - - **When to Remove Snippets** - - Use delete_key_snippet with [Snippet ID: X]: - * Immediately after modifying or replacing referenced code - * When the snippet becomes obsolete or irrelevant - * When newer versions of the code exist - * When the referenced code has been deleted - - **Snippet Management Examples** - - Adding a snippet before modification: - emit_key_snippet with: - filepath: "path/to/file.py" - line_number: 10 - snippet: "[code to be modified]" - description: "Original version before changes" - - - Removing an outdated snippet: - delete_key_snippet with [Snippet ID: X] after the code is modified - - **Maintaining Snippet Quality** - - Only keep snippets relevant to current or future task understanding - - Regularly review snippets to ensure they match current codebase - - Prioritize snippet management but don't let it block implementation progress - - Use snippets to complement version control by highlighting key code sections +- Focus solely on the given task and implement it as described. +- Scale the complexity of your solution to the complexity of the request. For simple requests, keep it straightforward and minimal. For complex requests, maintain the previously planned depth. +- Use delete_key_fact to remove facts that become outdated, irrelevant, or duplicated. +- Use emit_key_snippet to manage code sections before and after modifications as needed. +- Regularly remove outdated snippets with delete_key_snippet. Instructions: 1. Review the provided base task, plan, and key facts. 2. Implement only the specified task: {task} -3. While implementing, follow these guidelines: - - Work incrementally, testing and validating as you go. - - Update or remove any key facts that no longer apply. - - Do not build features not explicitly required by the task. - - Only create or modify files directly related to this task. +3. Work incrementally, validating as you go. +4. Update or remove any key facts that no longer apply. +5. Do not add features not explicitly required. +6. Only create or modify files directly related to this task. -4. Once the task is complete, ensure all updated files are emitted. - -No other activities (such as discussing purpose, future improvements, or unrelated steps) are allowed. Stay fully focused on completing the defined implementation task. -""" +Once the task is complete, ensure all updated files are emitted. +""" \ No newline at end of file diff --git a/ra_aid/tools/shell.py b/ra_aid/tools/shell.py index aba2afb..2dc8826 100644 --- a/ra_aid/tools/shell.py +++ b/ra_aid/tools/shell.py @@ -2,6 +2,8 @@ from typing import Dict, Union from langchain_core.tools import tool from rich.console import Console from rich.panel import Panel +from rich.prompt import Confirm +from ra_aid.tools.memory import _global_memory from ra_aid.proc.interactive import run_interactive_command from ra_aid.text.processing import truncate_output @@ -39,7 +41,18 @@ def run_shell_command(command: str) -> Dict[str, Union[str, int, bool]]: """ # Show just the command in a simple panel console.print(Panel(command, title="🐚 Shell", border_style="bright_yellow")) - + + # Check if we need approval + cowboy_mode = _global_memory.get('config', {}).get('cowboy_mode', False) + + if not cowboy_mode: + if not Confirm.ask("Execute this command?", default=True): + return { + "output": "Command execution cancelled by user", + "return_code": 1, + "success": False + } + try: print() output, return_code = run_interactive_command(['/bin/bash', '-c', command]) diff --git a/tests/ra_aid/tools/test_shell.py b/tests/ra_aid/tools/test_shell.py new file mode 100644 index 0000000..231b39b --- /dev/null +++ b/tests/ra_aid/tools/test_shell.py @@ -0,0 +1,67 @@ +import pytest +from unittest.mock import patch, MagicMock +from ra_aid.tools.shell import run_shell_command +from ra_aid.tools.memory import _global_memory + +@pytest.fixture +def mock_console(): + with patch('ra_aid.tools.shell.console') as mock: + yield mock + +@pytest.fixture +def mock_confirm(): + with patch('ra_aid.tools.shell.Confirm') as mock: + yield mock + +@pytest.fixture +def mock_run_interactive(): + with patch('ra_aid.tools.shell.run_interactive_command') as mock: + mock.return_value = (b"test output", 0) + yield mock + +def test_shell_command_cowboy_mode(mock_console, mock_confirm, mock_run_interactive): + """Test shell command execution in cowboy mode (no approval)""" + _global_memory['config'] = {'cowboy_mode': True} + + result = run_shell_command("echo test") + + assert result['success'] is True + assert result['return_code'] == 0 + assert "test output" in result['output'] + mock_confirm.ask.assert_not_called() + +def test_shell_command_interactive_approved(mock_console, mock_confirm, mock_run_interactive): + """Test shell command execution with interactive approval""" + _global_memory['config'] = {'cowboy_mode': False} + mock_confirm.ask.return_value = True + + result = run_shell_command("echo test") + + assert result['success'] is True + assert result['return_code'] == 0 + assert "test output" in result['output'] + mock_confirm.ask.assert_called_once() + +def test_shell_command_interactive_rejected(mock_console, mock_confirm, mock_run_interactive): + """Test shell command rejection in interactive mode""" + _global_memory['config'] = {'cowboy_mode': False} + mock_confirm.ask.return_value = False + + result = run_shell_command("echo test") + + assert result['success'] is False + assert result['return_code'] == 1 + assert "cancelled by user" in result['output'] + mock_confirm.ask.assert_called_once() + mock_run_interactive.assert_not_called() + +def test_shell_command_execution_error(mock_console, mock_confirm, mock_run_interactive): + """Test handling of shell command execution errors""" + _global_memory['config'] = {'cowboy_mode': True} + mock_run_interactive.side_effect = Exception("Command failed") + + result = run_shell_command("invalid command") + + assert result['success'] is False + assert result['return_code'] == 1 + assert "Command failed" in result['output']