Update prompts, shell tools and add shell tests

This commit is contained in:
AI Christianson 2024-12-11 11:25:34 -05:00
parent 3b9757061c
commit e1b04781d7
5 changed files with 133 additions and 87 deletions

View File

@ -24,6 +24,8 @@ RA.Aid (ReAct Aid) is a powerful AI-driven command-line tool that integrates `ai
⚠️ **IMPORTANT: USE AT YOUR OWN RISK** ⚠️
- This tool **can and will** automatically execute shell commands on your system
- Shell commands require interactive approval unless --cowboy-mode is enabled
- The --cowboy-mode flag disables command approval and should be used with extreme caution
- No warranty is provided, either express or implied
- Always review the actions the agent proposes before allowing them to proceed
@ -125,6 +127,7 @@ ra-aid -m "Explain the authentication flow" --research-only
- `-m, --message`: The task or query to be executed (required)
- `--research-only`: Only perform research without implementation
- `--cowboy-mode`: Skip interactive approval for shell commands
### Example Tasks
@ -143,6 +146,11 @@ ra-aid -m "Explain the authentication flow" --research-only
ra-aid -m "Refactor the database connection code to use connection pooling"
```
4. Non-Interactive Mode:
```bash
ra-aid -m "Update all deprecated API calls" --cowboy-mode
```
### Environment Variables
RA.Aid uses the following environment variables:

View File

@ -46,6 +46,11 @@ Examples:
action='store_true',
help='Only perform research without implementation'
)
parser.add_argument(
'--cowboy-mode',
action='store_true',
help='Skip interactive approval for shell commands'
)
return parser.parse_args()
# Create the base model
@ -240,7 +245,8 @@ def main():
"thread_id": "abc123"
},
"recursion_limit": 100,
"research_only": args.research_only
"research_only": args.research_only,
"cowboy_mode": args.cowboy_mode
}
# Store config in global memory for access by is_informational_query

View File

@ -3,9 +3,12 @@ Stage-specific prompts for the AI agent system.
Each prompt constant uses str.format() style template substitution for variable replacement.
The prompts guide the agent through different stages of task execution.
"""
# Research stage prompt - guides initial codebase analysis
These updated prompts include instructions to scale complexity:
- For simpler requests, keep the scope minimal and avoid unnecessary complexity.
- For more complex requests, still provide detailed planning and thorough steps.
"""
# Research stage prompt - guides initial codebase analysis
RESEARCH_PROMPT = """
Objective
@ -33,8 +36,8 @@ You must not:
Tools and Methodology
Use only non-recursive, targeted fuzzy find, ripgrep_search tool (which provides context), list_directory_tree tool, shell commands, etc. (use your imagination) to efficiently explore the project structure. For example:
After identifying files, you may read them to confirm their contents only if needed to understand what currently exists (for example, to confirm if a file is a documentation file or a configuration file).
Use only non-recursive, targeted fuzzy find, ripgrep_search tool (which provides context), list_directory_tree tool, shell commands, etc. (use your imagination) to efficiently explore the project structure.
After identifying files, you may read them to confirm their contents only if needed to understand what currently exists.
Be meticulous: If you find a directory, explore it thoroughly. If you find files of potential relevance, record them. Make sure you do not skip any directories you discover.
Prefer to use list_directory_tree and other tools over shell commands.
Do not produce huge outputs from your commands. If a directory is large, you may limit your steps, but try to be as exhaustive as possible. Incrementally gather details as needed.
@ -74,12 +77,11 @@ Decision on Implementation
If you see reasons that implementation changes will be required in the future, after documenting all findings, call request_implementation and specify why.
If no changes are needed, simply state that no changes are required.
Do not do any implementation or planning now. Just request it if needed.
If there is a top-level README.md or docs/ folder, always start with that.
"""
# Planning stage prompt - guides task breakdown and implementation planning
# Includes a directive to scale complexity with request size.
PLANNING_PROMPT = """Base Task:
{base_task}
@ -94,18 +96,6 @@ Key Facts:
Key Snippets:
{key_snippets}
Fact Management:
Each fact is identified with [Fact ID: X].
Facts may be deleted if they become outdated, irrelevant, or duplicates.
Use delete_key_fact with the specific Fact ID to remove unnecessary facts.
Snippet Management:
Each snippet is identified with [Snippet ID: X].
Snippets include file path, line number, and source code.
Snippets may have optional descriptions explaining their significance.
Delete snippets with delete_key_snippet if they become outdated or irrelevant.
Use emit_key_snippet to store important code sections needed for reference.
Fact Management:
Each fact is identified with [Fact ID: X].
Facts may be deleted if they become outdated, irrelevant, or duplicates.
@ -122,37 +112,32 @@ Guidelines:
If you need additional input or assistance from the expert, first use emit_expert_context to provide all relevant context. Wait for the experts response before defining tasks in non-trivial scenarios.
Scale the complexity of your plan:
Individual tasks can include multiple steps, file edits, etc.
Therefore, use as few tasks as needed, but no fewer.
Keep tasks organized as semantic divisions of the overall work, rather than a series of steps.
When planning the implementation:
Break the overall work into sub-tasks that are as detailed as possible.
Break the overall work into sub-tasks that are as detailed as necessary, but no more.
Each sub-task should be clear and unambiguous, and should fully describe what needs to be done, including:
Purpose and goals of the sub-task
Steps required to complete it
Any external interfaces it will integrate with
Data models and structures it will use
API contracts, endpoints, or protocols it requires or provides
Detailed testing strategies specific to the sub-task
Be explicit about inputs, outputs, error cases, and edge conditions.
For complex tasks, include:
Sample requests and responses (if APIs are involved)
Details on error handling and logging
Relevant data validation rules
Any performance, scalability, or security considerations
Testing strategies appropriate to the complexity of that sub-task
You may include pseudocode, but not full code.
After finalizing the overall approach:
Use emit_plan to store the high-level implementation plan.
For each sub-task, use emit_task to store a thorough, step-by-step description.
The description should be so detailed that it could be handed to another engineer who could implement it without further clarification.
Only stop after all necessary tasks are fully detailed and cover the entire scope of the original request.
Avoid unnecessary complexity, but do not omit critical details.
For each sub-task, use emit_task to store a step-by-step description.
The description should be only as detailed as warranted by the complexity of the request.
Do not implement anything yet.
You are an autonomous agent, not a chatbot."""
"""
# Research summary prompt - guides generation of research summaries
# Remains essentially the same, but with complexity scaling if needed.
SUMMARY_PROMPT = """
Using only the information provided in the Research Notes and Key Facts below, write a concise and direct answer to the user's query.
@ -181,13 +166,14 @@ Snippet Management:
Use emit_key_snippet to store important code sections needed for reference.
Instructions:
- **Stay Within Provided Information**: Do not include any information not present in the Research Notes or Key Facts. Avoid assumptions or external knowledge.
- **Handle Contradictions Appropriately**: If there are contradictions in the provided information, you may take further research steps to resolve the contradiction. If you cannot, note and explain the contradictions as best as you can.
- **Maintain Focus and Brevity**: Keep your response succinct yet comprehensive and focused solely on the user's query without adding unnecessary details.
- **Include technical details**: If it is a technical query or a query related to files on the filesystem, always take time to read those and include relevant snippets.
- **Stay Within Provided Information**: Do not include any information not present in the Research Notes or Key Facts.
- **Handle Contradictions Appropriately**: If contradictions exist, consider additional research or note the contradictions.
- **Maintain Focus and Brevity**: Keep the response concise, focusing on the user's query.
- **Include Technical Details If Relevant**: For technical queries, reference discovered files and snippets.
"""
# Implementation stage prompt - guides specific task implementation
# Added instruction to adjust complexity of implementation to match request.
IMPLEMENTATION_PROMPT = """Base-level task (for reference only):
{base_task}
@ -204,55 +190,21 @@ Relevant Files:
{related_files}
Important Notes:
- You must focus solely on the given task and implement it as described.
- Do not implement other tasks or deviate from the defined scope.
- Use the delete_key_fact tool to remove facts that become outdated, irrelevant, or duplicated.
- Whenever referencing facts, use their assigned **[Fact ID: X]** format.
- Aggressively manage code snippets throughout implementation:
**When to Add Snippets**
- Capture code with emit_key_snippet:
* Before modifying any existing code
* When discovering related code that impacts the task
* After implementing new code sections
* When finding code patterns that will be modified
**When to Remove Snippets**
- Use delete_key_snippet with [Snippet ID: X]:
* Immediately after modifying or replacing referenced code
* When the snippet becomes obsolete or irrelevant
* When newer versions of the code exist
* When the referenced code has been deleted
**Snippet Management Examples**
- Adding a snippet before modification:
emit_key_snippet with:
filepath: "path/to/file.py"
line_number: 10
snippet: "[code to be modified]"
description: "Original version before changes"
- Removing an outdated snippet:
delete_key_snippet with [Snippet ID: X] after the code is modified
**Maintaining Snippet Quality**
- Only keep snippets relevant to current or future task understanding
- Regularly review snippets to ensure they match current codebase
- Prioritize snippet management but don't let it block implementation progress
- Use snippets to complement version control by highlighting key code sections
- Focus solely on the given task and implement it as described.
- Scale the complexity of your solution to the complexity of the request. For simple requests, keep it straightforward and minimal. For complex requests, maintain the previously planned depth.
- Use delete_key_fact to remove facts that become outdated, irrelevant, or duplicated.
- Use emit_key_snippet to manage code sections before and after modifications as needed.
- Regularly remove outdated snippets with delete_key_snippet.
Instructions:
1. Review the provided base task, plan, and key facts.
2. Implement only the specified task:
{task}
3. While implementing, follow these guidelines:
- Work incrementally, testing and validating as you go.
- Update or remove any key facts that no longer apply.
- Do not build features not explicitly required by the task.
- Only create or modify files directly related to this task.
3. Work incrementally, validating as you go.
4. Update or remove any key facts that no longer apply.
5. Do not add features not explicitly required.
6. Only create or modify files directly related to this task.
4. Once the task is complete, ensure all updated files are emitted.
No other activities (such as discussing purpose, future improvements, or unrelated steps) are allowed. Stay fully focused on completing the defined implementation task.
"""
Once the task is complete, ensure all updated files are emitted.
"""

View File

@ -2,6 +2,8 @@ from typing import Dict, Union
from langchain_core.tools import tool
from rich.console import Console
from rich.panel import Panel
from rich.prompt import Confirm
from ra_aid.tools.memory import _global_memory
from ra_aid.proc.interactive import run_interactive_command
from ra_aid.text.processing import truncate_output
@ -39,7 +41,18 @@ def run_shell_command(command: str) -> Dict[str, Union[str, int, bool]]:
"""
# Show just the command in a simple panel
console.print(Panel(command, title="🐚 Shell", border_style="bright_yellow"))
# Check if we need approval
cowboy_mode = _global_memory.get('config', {}).get('cowboy_mode', False)
if not cowboy_mode:
if not Confirm.ask("Execute this command?", default=True):
return {
"output": "Command execution cancelled by user",
"return_code": 1,
"success": False
}
try:
print()
output, return_code = run_interactive_command(['/bin/bash', '-c', command])

View File

@ -0,0 +1,67 @@
import pytest
from unittest.mock import patch, MagicMock
from ra_aid.tools.shell import run_shell_command
from ra_aid.tools.memory import _global_memory
@pytest.fixture
def mock_console():
with patch('ra_aid.tools.shell.console') as mock:
yield mock
@pytest.fixture
def mock_confirm():
with patch('ra_aid.tools.shell.Confirm') as mock:
yield mock
@pytest.fixture
def mock_run_interactive():
with patch('ra_aid.tools.shell.run_interactive_command') as mock:
mock.return_value = (b"test output", 0)
yield mock
def test_shell_command_cowboy_mode(mock_console, mock_confirm, mock_run_interactive):
"""Test shell command execution in cowboy mode (no approval)"""
_global_memory['config'] = {'cowboy_mode': True}
result = run_shell_command("echo test")
assert result['success'] is True
assert result['return_code'] == 0
assert "test output" in result['output']
mock_confirm.ask.assert_not_called()
def test_shell_command_interactive_approved(mock_console, mock_confirm, mock_run_interactive):
"""Test shell command execution with interactive approval"""
_global_memory['config'] = {'cowboy_mode': False}
mock_confirm.ask.return_value = True
result = run_shell_command("echo test")
assert result['success'] is True
assert result['return_code'] == 0
assert "test output" in result['output']
mock_confirm.ask.assert_called_once()
def test_shell_command_interactive_rejected(mock_console, mock_confirm, mock_run_interactive):
"""Test shell command rejection in interactive mode"""
_global_memory['config'] = {'cowboy_mode': False}
mock_confirm.ask.return_value = False
result = run_shell_command("echo test")
assert result['success'] is False
assert result['return_code'] == 1
assert "cancelled by user" in result['output']
mock_confirm.ask.assert_called_once()
mock_run_interactive.assert_not_called()
def test_shell_command_execution_error(mock_console, mock_confirm, mock_run_interactive):
"""Test handling of shell command execution errors"""
_global_memory['config'] = {'cowboy_mode': True}
mock_run_interactive.side_effect = Exception("Command failed")
result = run_shell_command("invalid command")
assert result['success'] is False
assert result['return_code'] == 1
assert "Command failed" in result['output']