Update prompts, shell tools and add shell tests

This commit is contained in:
AI Christianson 2024-12-11 11:25:34 -05:00
parent 3b9757061c
commit e1b04781d7
5 changed files with 133 additions and 87 deletions

View File

@ -24,6 +24,8 @@ RA.Aid (ReAct Aid) is a powerful AI-driven command-line tool that integrates `ai
⚠️ **IMPORTANT: USE AT YOUR OWN RISK** ⚠️ ⚠️ **IMPORTANT: USE AT YOUR OWN RISK** ⚠️
- This tool **can and will** automatically execute shell commands on your system - This tool **can and will** automatically execute shell commands on your system
- Shell commands require interactive approval unless --cowboy-mode is enabled
- The --cowboy-mode flag disables command approval and should be used with extreme caution
- No warranty is provided, either express or implied - No warranty is provided, either express or implied
- Always review the actions the agent proposes before allowing them to proceed - Always review the actions the agent proposes before allowing them to proceed
@ -125,6 +127,7 @@ ra-aid -m "Explain the authentication flow" --research-only
- `-m, --message`: The task or query to be executed (required) - `-m, --message`: The task or query to be executed (required)
- `--research-only`: Only perform research without implementation - `--research-only`: Only perform research without implementation
- `--cowboy-mode`: Skip interactive approval for shell commands
### Example Tasks ### Example Tasks
@ -143,6 +146,11 @@ ra-aid -m "Explain the authentication flow" --research-only
ra-aid -m "Refactor the database connection code to use connection pooling" ra-aid -m "Refactor the database connection code to use connection pooling"
``` ```
4. Non-Interactive Mode:
```bash
ra-aid -m "Update all deprecated API calls" --cowboy-mode
```
### Environment Variables ### Environment Variables
RA.Aid uses the following environment variables: RA.Aid uses the following environment variables:

View File

@ -46,6 +46,11 @@ Examples:
action='store_true', action='store_true',
help='Only perform research without implementation' help='Only perform research without implementation'
) )
parser.add_argument(
'--cowboy-mode',
action='store_true',
help='Skip interactive approval for shell commands'
)
return parser.parse_args() return parser.parse_args()
# Create the base model # Create the base model
@ -240,7 +245,8 @@ def main():
"thread_id": "abc123" "thread_id": "abc123"
}, },
"recursion_limit": 100, "recursion_limit": 100,
"research_only": args.research_only "research_only": args.research_only,
"cowboy_mode": args.cowboy_mode
} }
# Store config in global memory for access by is_informational_query # Store config in global memory for access by is_informational_query

View File

@ -3,9 +3,12 @@ Stage-specific prompts for the AI agent system.
Each prompt constant uses str.format() style template substitution for variable replacement. Each prompt constant uses str.format() style template substitution for variable replacement.
The prompts guide the agent through different stages of task execution. The prompts guide the agent through different stages of task execution.
These updated prompts include instructions to scale complexity:
- For simpler requests, keep the scope minimal and avoid unnecessary complexity.
- For more complex requests, still provide detailed planning and thorough steps.
""" """
# Research stage prompt - guides initial codebase analysis
# Research stage prompt - guides initial codebase analysis # Research stage prompt - guides initial codebase analysis
RESEARCH_PROMPT = """ RESEARCH_PROMPT = """
Objective Objective
@ -33,8 +36,8 @@ You must not:
Tools and Methodology Tools and Methodology
Use only non-recursive, targeted fuzzy find, ripgrep_search tool (which provides context), list_directory_tree tool, shell commands, etc. (use your imagination) to efficiently explore the project structure. For example: Use only non-recursive, targeted fuzzy find, ripgrep_search tool (which provides context), list_directory_tree tool, shell commands, etc. (use your imagination) to efficiently explore the project structure.
After identifying files, you may read them to confirm their contents only if needed to understand what currently exists (for example, to confirm if a file is a documentation file or a configuration file). After identifying files, you may read them to confirm their contents only if needed to understand what currently exists.
Be meticulous: If you find a directory, explore it thoroughly. If you find files of potential relevance, record them. Make sure you do not skip any directories you discover. Be meticulous: If you find a directory, explore it thoroughly. If you find files of potential relevance, record them. Make sure you do not skip any directories you discover.
Prefer to use list_directory_tree and other tools over shell commands. Prefer to use list_directory_tree and other tools over shell commands.
Do not produce huge outputs from your commands. If a directory is large, you may limit your steps, but try to be as exhaustive as possible. Incrementally gather details as needed. Do not produce huge outputs from your commands. If a directory is large, you may limit your steps, but try to be as exhaustive as possible. Incrementally gather details as needed.
@ -74,12 +77,11 @@ Decision on Implementation
If you see reasons that implementation changes will be required in the future, after documenting all findings, call request_implementation and specify why. If you see reasons that implementation changes will be required in the future, after documenting all findings, call request_implementation and specify why.
If no changes are needed, simply state that no changes are required. If no changes are needed, simply state that no changes are required.
Do not do any implementation or planning now. Just request it if needed.
If there is a top-level README.md or docs/ folder, always start with that. If there is a top-level README.md or docs/ folder, always start with that.
""" """
# Planning stage prompt - guides task breakdown and implementation planning # Planning stage prompt - guides task breakdown and implementation planning
# Includes a directive to scale complexity with request size.
PLANNING_PROMPT = """Base Task: PLANNING_PROMPT = """Base Task:
{base_task} {base_task}
@ -94,18 +96,6 @@ Key Facts:
Key Snippets: Key Snippets:
{key_snippets} {key_snippets}
Fact Management:
Each fact is identified with [Fact ID: X].
Facts may be deleted if they become outdated, irrelevant, or duplicates.
Use delete_key_fact with the specific Fact ID to remove unnecessary facts.
Snippet Management:
Each snippet is identified with [Snippet ID: X].
Snippets include file path, line number, and source code.
Snippets may have optional descriptions explaining their significance.
Delete snippets with delete_key_snippet if they become outdated or irrelevant.
Use emit_key_snippet to store important code sections needed for reference.
Fact Management: Fact Management:
Each fact is identified with [Fact ID: X]. Each fact is identified with [Fact ID: X].
Facts may be deleted if they become outdated, irrelevant, or duplicates. Facts may be deleted if they become outdated, irrelevant, or duplicates.
@ -122,37 +112,32 @@ Guidelines:
If you need additional input or assistance from the expert, first use emit_expert_context to provide all relevant context. Wait for the experts response before defining tasks in non-trivial scenarios. If you need additional input or assistance from the expert, first use emit_expert_context to provide all relevant context. Wait for the experts response before defining tasks in non-trivial scenarios.
Scale the complexity of your plan:
Individual tasks can include multiple steps, file edits, etc.
Therefore, use as few tasks as needed, but no fewer.
Keep tasks organized as semantic divisions of the overall work, rather than a series of steps.
When planning the implementation: When planning the implementation:
Break the overall work into sub-tasks that are as detailed as possible. Break the overall work into sub-tasks that are as detailed as necessary, but no more.
Each sub-task should be clear and unambiguous, and should fully describe what needs to be done, including: Each sub-task should be clear and unambiguous, and should fully describe what needs to be done, including:
Purpose and goals of the sub-task Purpose and goals of the sub-task
Steps required to complete it Steps required to complete it
Any external interfaces it will integrate with Any external interfaces it will integrate with
Data models and structures it will use Data models and structures it will use
API contracts, endpoints, or protocols it requires or provides API contracts, endpoints, or protocols it requires or provides
Detailed testing strategies specific to the sub-task Testing strategies appropriate to the complexity of that sub-task
Be explicit about inputs, outputs, error cases, and edge conditions. You may include pseudocode, but not full code.
For complex tasks, include:
Sample requests and responses (if APIs are involved)
Details on error handling and logging
Relevant data validation rules
Any performance, scalability, or security considerations
After finalizing the overall approach: After finalizing the overall approach:
Use emit_plan to store the high-level implementation plan. Use emit_plan to store the high-level implementation plan.
For each sub-task, use emit_task to store a thorough, step-by-step description. For each sub-task, use emit_task to store a step-by-step description.
The description should be so detailed that it could be handed to another engineer who could implement it without further clarification. The description should be only as detailed as warranted by the complexity of the request.
Only stop after all necessary tasks are fully detailed and cover the entire scope of the original request.
Avoid unnecessary complexity, but do not omit critical details.
Do not implement anything yet. Do not implement anything yet.
"""
You are an autonomous agent, not a chatbot."""
# Research summary prompt - guides generation of research summaries # Research summary prompt - guides generation of research summaries
# Remains essentially the same, but with complexity scaling if needed.
SUMMARY_PROMPT = """ SUMMARY_PROMPT = """
Using only the information provided in the Research Notes and Key Facts below, write a concise and direct answer to the user's query. Using only the information provided in the Research Notes and Key Facts below, write a concise and direct answer to the user's query.
@ -181,13 +166,14 @@ Snippet Management:
Use emit_key_snippet to store important code sections needed for reference. Use emit_key_snippet to store important code sections needed for reference.
Instructions: Instructions:
- **Stay Within Provided Information**: Do not include any information not present in the Research Notes or Key Facts. Avoid assumptions or external knowledge. - **Stay Within Provided Information**: Do not include any information not present in the Research Notes or Key Facts.
- **Handle Contradictions Appropriately**: If there are contradictions in the provided information, you may take further research steps to resolve the contradiction. If you cannot, note and explain the contradictions as best as you can. - **Handle Contradictions Appropriately**: If contradictions exist, consider additional research or note the contradictions.
- **Maintain Focus and Brevity**: Keep your response succinct yet comprehensive and focused solely on the user's query without adding unnecessary details. - **Maintain Focus and Brevity**: Keep the response concise, focusing on the user's query.
- **Include technical details**: If it is a technical query or a query related to files on the filesystem, always take time to read those and include relevant snippets. - **Include Technical Details If Relevant**: For technical queries, reference discovered files and snippets.
""" """
# Implementation stage prompt - guides specific task implementation # Implementation stage prompt - guides specific task implementation
# Added instruction to adjust complexity of implementation to match request.
IMPLEMENTATION_PROMPT = """Base-level task (for reference only): IMPLEMENTATION_PROMPT = """Base-level task (for reference only):
{base_task} {base_task}
@ -204,55 +190,21 @@ Relevant Files:
{related_files} {related_files}
Important Notes: Important Notes:
- You must focus solely on the given task and implement it as described. - Focus solely on the given task and implement it as described.
- Do not implement other tasks or deviate from the defined scope. - Scale the complexity of your solution to the complexity of the request. For simple requests, keep it straightforward and minimal. For complex requests, maintain the previously planned depth.
- Use the delete_key_fact tool to remove facts that become outdated, irrelevant, or duplicated. - Use delete_key_fact to remove facts that become outdated, irrelevant, or duplicated.
- Whenever referencing facts, use their assigned **[Fact ID: X]** format. - Use emit_key_snippet to manage code sections before and after modifications as needed.
- Aggressively manage code snippets throughout implementation: - Regularly remove outdated snippets with delete_key_snippet.
**When to Add Snippets**
- Capture code with emit_key_snippet:
* Before modifying any existing code
* When discovering related code that impacts the task
* After implementing new code sections
* When finding code patterns that will be modified
**When to Remove Snippets**
- Use delete_key_snippet with [Snippet ID: X]:
* Immediately after modifying or replacing referenced code
* When the snippet becomes obsolete or irrelevant
* When newer versions of the code exist
* When the referenced code has been deleted
**Snippet Management Examples**
- Adding a snippet before modification:
emit_key_snippet with:
filepath: "path/to/file.py"
line_number: 10
snippet: "[code to be modified]"
description: "Original version before changes"
- Removing an outdated snippet:
delete_key_snippet with [Snippet ID: X] after the code is modified
**Maintaining Snippet Quality**
- Only keep snippets relevant to current or future task understanding
- Regularly review snippets to ensure they match current codebase
- Prioritize snippet management but don't let it block implementation progress
- Use snippets to complement version control by highlighting key code sections
Instructions: Instructions:
1. Review the provided base task, plan, and key facts. 1. Review the provided base task, plan, and key facts.
2. Implement only the specified task: 2. Implement only the specified task:
{task} {task}
3. While implementing, follow these guidelines: 3. Work incrementally, validating as you go.
- Work incrementally, testing and validating as you go. 4. Update or remove any key facts that no longer apply.
- Update or remove any key facts that no longer apply. 5. Do not add features not explicitly required.
- Do not build features not explicitly required by the task. 6. Only create or modify files directly related to this task.
- Only create or modify files directly related to this task.
4. Once the task is complete, ensure all updated files are emitted. Once the task is complete, ensure all updated files are emitted.
No other activities (such as discussing purpose, future improvements, or unrelated steps) are allowed. Stay fully focused on completing the defined implementation task.
""" """

View File

@ -2,6 +2,8 @@ from typing import Dict, Union
from langchain_core.tools import tool from langchain_core.tools import tool
from rich.console import Console from rich.console import Console
from rich.panel import Panel from rich.panel import Panel
from rich.prompt import Confirm
from ra_aid.tools.memory import _global_memory
from ra_aid.proc.interactive import run_interactive_command from ra_aid.proc.interactive import run_interactive_command
from ra_aid.text.processing import truncate_output from ra_aid.text.processing import truncate_output
@ -40,6 +42,17 @@ def run_shell_command(command: str) -> Dict[str, Union[str, int, bool]]:
# Show just the command in a simple panel # Show just the command in a simple panel
console.print(Panel(command, title="🐚 Shell", border_style="bright_yellow")) console.print(Panel(command, title="🐚 Shell", border_style="bright_yellow"))
# Check if we need approval
cowboy_mode = _global_memory.get('config', {}).get('cowboy_mode', False)
if not cowboy_mode:
if not Confirm.ask("Execute this command?", default=True):
return {
"output": "Command execution cancelled by user",
"return_code": 1,
"success": False
}
try: try:
print() print()
output, return_code = run_interactive_command(['/bin/bash', '-c', command]) output, return_code = run_interactive_command(['/bin/bash', '-c', command])

View File

@ -0,0 +1,67 @@
import pytest
from unittest.mock import patch, MagicMock
from ra_aid.tools.shell import run_shell_command
from ra_aid.tools.memory import _global_memory
@pytest.fixture
def mock_console():
with patch('ra_aid.tools.shell.console') as mock:
yield mock
@pytest.fixture
def mock_confirm():
with patch('ra_aid.tools.shell.Confirm') as mock:
yield mock
@pytest.fixture
def mock_run_interactive():
with patch('ra_aid.tools.shell.run_interactive_command') as mock:
mock.return_value = (b"test output", 0)
yield mock
def test_shell_command_cowboy_mode(mock_console, mock_confirm, mock_run_interactive):
"""Test shell command execution in cowboy mode (no approval)"""
_global_memory['config'] = {'cowboy_mode': True}
result = run_shell_command("echo test")
assert result['success'] is True
assert result['return_code'] == 0
assert "test output" in result['output']
mock_confirm.ask.assert_not_called()
def test_shell_command_interactive_approved(mock_console, mock_confirm, mock_run_interactive):
"""Test shell command execution with interactive approval"""
_global_memory['config'] = {'cowboy_mode': False}
mock_confirm.ask.return_value = True
result = run_shell_command("echo test")
assert result['success'] is True
assert result['return_code'] == 0
assert "test output" in result['output']
mock_confirm.ask.assert_called_once()
def test_shell_command_interactive_rejected(mock_console, mock_confirm, mock_run_interactive):
"""Test shell command rejection in interactive mode"""
_global_memory['config'] = {'cowboy_mode': False}
mock_confirm.ask.return_value = False
result = run_shell_command("echo test")
assert result['success'] is False
assert result['return_code'] == 1
assert "cancelled by user" in result['output']
mock_confirm.ask.assert_called_once()
mock_run_interactive.assert_not_called()
def test_shell_command_execution_error(mock_console, mock_confirm, mock_run_interactive):
"""Test handling of shell command execution errors"""
_global_memory['config'] = {'cowboy_mode': True}
mock_run_interactive.side_effect = Exception("Command failed")
result = run_shell_command("invalid command")
assert result['success'] is False
assert result['return_code'] == 1
assert "Command failed" in result['output']