Update prompts, shell tools and add shell tests
This commit is contained in:
parent
3b9757061c
commit
e1b04781d7
|
|
@ -24,6 +24,8 @@ RA.Aid (ReAct Aid) is a powerful AI-driven command-line tool that integrates `ai
|
|||
|
||||
⚠️ **IMPORTANT: USE AT YOUR OWN RISK** ⚠️
|
||||
- This tool **can and will** automatically execute shell commands on your system
|
||||
- Shell commands require interactive approval unless --cowboy-mode is enabled
|
||||
- The --cowboy-mode flag disables command approval and should be used with extreme caution
|
||||
- No warranty is provided, either express or implied
|
||||
- Always review the actions the agent proposes before allowing them to proceed
|
||||
|
||||
|
|
@ -125,6 +127,7 @@ ra-aid -m "Explain the authentication flow" --research-only
|
|||
|
||||
- `-m, --message`: The task or query to be executed (required)
|
||||
- `--research-only`: Only perform research without implementation
|
||||
- `--cowboy-mode`: Skip interactive approval for shell commands
|
||||
|
||||
### Example Tasks
|
||||
|
||||
|
|
@ -143,6 +146,11 @@ ra-aid -m "Explain the authentication flow" --research-only
|
|||
ra-aid -m "Refactor the database connection code to use connection pooling"
|
||||
```
|
||||
|
||||
4. Non-Interactive Mode:
|
||||
```bash
|
||||
ra-aid -m "Update all deprecated API calls" --cowboy-mode
|
||||
```
|
||||
|
||||
### Environment Variables
|
||||
|
||||
RA.Aid uses the following environment variables:
|
||||
|
|
|
|||
|
|
@ -46,6 +46,11 @@ Examples:
|
|||
action='store_true',
|
||||
help='Only perform research without implementation'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--cowboy-mode',
|
||||
action='store_true',
|
||||
help='Skip interactive approval for shell commands'
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
# Create the base model
|
||||
|
|
@ -240,7 +245,8 @@ def main():
|
|||
"thread_id": "abc123"
|
||||
},
|
||||
"recursion_limit": 100,
|
||||
"research_only": args.research_only
|
||||
"research_only": args.research_only,
|
||||
"cowboy_mode": args.cowboy_mode
|
||||
}
|
||||
|
||||
# Store config in global memory for access by is_informational_query
|
||||
|
|
|
|||
|
|
@ -3,9 +3,12 @@ Stage-specific prompts for the AI agent system.
|
|||
|
||||
Each prompt constant uses str.format() style template substitution for variable replacement.
|
||||
The prompts guide the agent through different stages of task execution.
|
||||
"""
|
||||
|
||||
# Research stage prompt - guides initial codebase analysis
|
||||
These updated prompts include instructions to scale complexity:
|
||||
- For simpler requests, keep the scope minimal and avoid unnecessary complexity.
|
||||
- For more complex requests, still provide detailed planning and thorough steps.
|
||||
"""
|
||||
|
||||
# Research stage prompt - guides initial codebase analysis
|
||||
RESEARCH_PROMPT = """
|
||||
Objective
|
||||
|
|
@ -33,8 +36,8 @@ You must not:
|
|||
|
||||
Tools and Methodology
|
||||
|
||||
Use only non-recursive, targeted fuzzy find, ripgrep_search tool (which provides context), list_directory_tree tool, shell commands, etc. (use your imagination) to efficiently explore the project structure. For example:
|
||||
After identifying files, you may read them to confirm their contents only if needed to understand what currently exists (for example, to confirm if a file is a documentation file or a configuration file).
|
||||
Use only non-recursive, targeted fuzzy find, ripgrep_search tool (which provides context), list_directory_tree tool, shell commands, etc. (use your imagination) to efficiently explore the project structure.
|
||||
After identifying files, you may read them to confirm their contents only if needed to understand what currently exists.
|
||||
Be meticulous: If you find a directory, explore it thoroughly. If you find files of potential relevance, record them. Make sure you do not skip any directories you discover.
|
||||
Prefer to use list_directory_tree and other tools over shell commands.
|
||||
Do not produce huge outputs from your commands. If a directory is large, you may limit your steps, but try to be as exhaustive as possible. Incrementally gather details as needed.
|
||||
|
|
@ -74,12 +77,11 @@ Decision on Implementation
|
|||
If you see reasons that implementation changes will be required in the future, after documenting all findings, call request_implementation and specify why.
|
||||
If no changes are needed, simply state that no changes are required.
|
||||
|
||||
Do not do any implementation or planning now. Just request it if needed.
|
||||
|
||||
If there is a top-level README.md or docs/ folder, always start with that.
|
||||
"""
|
||||
|
||||
# Planning stage prompt - guides task breakdown and implementation planning
|
||||
# Includes a directive to scale complexity with request size.
|
||||
PLANNING_PROMPT = """Base Task:
|
||||
{base_task}
|
||||
|
||||
|
|
@ -94,18 +96,6 @@ Key Facts:
|
|||
Key Snippets:
|
||||
{key_snippets}
|
||||
|
||||
Fact Management:
|
||||
Each fact is identified with [Fact ID: X].
|
||||
Facts may be deleted if they become outdated, irrelevant, or duplicates.
|
||||
Use delete_key_fact with the specific Fact ID to remove unnecessary facts.
|
||||
|
||||
Snippet Management:
|
||||
Each snippet is identified with [Snippet ID: X].
|
||||
Snippets include file path, line number, and source code.
|
||||
Snippets may have optional descriptions explaining their significance.
|
||||
Delete snippets with delete_key_snippet if they become outdated or irrelevant.
|
||||
Use emit_key_snippet to store important code sections needed for reference.
|
||||
|
||||
Fact Management:
|
||||
Each fact is identified with [Fact ID: X].
|
||||
Facts may be deleted if they become outdated, irrelevant, or duplicates.
|
||||
|
|
@ -122,37 +112,32 @@ Guidelines:
|
|||
|
||||
If you need additional input or assistance from the expert, first use emit_expert_context to provide all relevant context. Wait for the expert’s response before defining tasks in non-trivial scenarios.
|
||||
|
||||
Scale the complexity of your plan:
|
||||
Individual tasks can include multiple steps, file edits, etc.
|
||||
Therefore, use as few tasks as needed, but no fewer.
|
||||
Keep tasks organized as semantic divisions of the overall work, rather than a series of steps.
|
||||
|
||||
When planning the implementation:
|
||||
Break the overall work into sub-tasks that are as detailed as possible.
|
||||
Break the overall work into sub-tasks that are as detailed as necessary, but no more.
|
||||
Each sub-task should be clear and unambiguous, and should fully describe what needs to be done, including:
|
||||
Purpose and goals of the sub-task
|
||||
Steps required to complete it
|
||||
Any external interfaces it will integrate with
|
||||
Data models and structures it will use
|
||||
API contracts, endpoints, or protocols it requires or provides
|
||||
Detailed testing strategies specific to the sub-task
|
||||
Be explicit about inputs, outputs, error cases, and edge conditions.
|
||||
|
||||
For complex tasks, include:
|
||||
Sample requests and responses (if APIs are involved)
|
||||
Details on error handling and logging
|
||||
Relevant data validation rules
|
||||
Any performance, scalability, or security considerations
|
||||
Testing strategies appropriate to the complexity of that sub-task
|
||||
You may include pseudocode, but not full code.
|
||||
|
||||
After finalizing the overall approach:
|
||||
Use emit_plan to store the high-level implementation plan.
|
||||
For each sub-task, use emit_task to store a thorough, step-by-step description.
|
||||
The description should be so detailed that it could be handed to another engineer who could implement it without further clarification.
|
||||
|
||||
Only stop after all necessary tasks are fully detailed and cover the entire scope of the original request.
|
||||
|
||||
Avoid unnecessary complexity, but do not omit critical details.
|
||||
|
||||
For each sub-task, use emit_task to store a step-by-step description.
|
||||
The description should be only as detailed as warranted by the complexity of the request.
|
||||
|
||||
Do not implement anything yet.
|
||||
|
||||
You are an autonomous agent, not a chatbot."""
|
||||
"""
|
||||
|
||||
# Research summary prompt - guides generation of research summaries
|
||||
# Remains essentially the same, but with complexity scaling if needed.
|
||||
SUMMARY_PROMPT = """
|
||||
Using only the information provided in the Research Notes and Key Facts below, write a concise and direct answer to the user's query.
|
||||
|
||||
|
|
@ -181,13 +166,14 @@ Snippet Management:
|
|||
Use emit_key_snippet to store important code sections needed for reference.
|
||||
|
||||
Instructions:
|
||||
- **Stay Within Provided Information**: Do not include any information not present in the Research Notes or Key Facts. Avoid assumptions or external knowledge.
|
||||
- **Handle Contradictions Appropriately**: If there are contradictions in the provided information, you may take further research steps to resolve the contradiction. If you cannot, note and explain the contradictions as best as you can.
|
||||
- **Maintain Focus and Brevity**: Keep your response succinct yet comprehensive and focused solely on the user's query without adding unnecessary details.
|
||||
- **Include technical details**: If it is a technical query or a query related to files on the filesystem, always take time to read those and include relevant snippets.
|
||||
- **Stay Within Provided Information**: Do not include any information not present in the Research Notes or Key Facts.
|
||||
- **Handle Contradictions Appropriately**: If contradictions exist, consider additional research or note the contradictions.
|
||||
- **Maintain Focus and Brevity**: Keep the response concise, focusing on the user's query.
|
||||
- **Include Technical Details If Relevant**: For technical queries, reference discovered files and snippets.
|
||||
"""
|
||||
|
||||
# Implementation stage prompt - guides specific task implementation
|
||||
# Added instruction to adjust complexity of implementation to match request.
|
||||
IMPLEMENTATION_PROMPT = """Base-level task (for reference only):
|
||||
{base_task}
|
||||
|
||||
|
|
@ -204,55 +190,21 @@ Relevant Files:
|
|||
{related_files}
|
||||
|
||||
Important Notes:
|
||||
- You must focus solely on the given task and implement it as described.
|
||||
- Do not implement other tasks or deviate from the defined scope.
|
||||
- Use the delete_key_fact tool to remove facts that become outdated, irrelevant, or duplicated.
|
||||
- Whenever referencing facts, use their assigned **[Fact ID: X]** format.
|
||||
- Aggressively manage code snippets throughout implementation:
|
||||
|
||||
**When to Add Snippets**
|
||||
- Capture code with emit_key_snippet:
|
||||
* Before modifying any existing code
|
||||
* When discovering related code that impacts the task
|
||||
* After implementing new code sections
|
||||
* When finding code patterns that will be modified
|
||||
|
||||
**When to Remove Snippets**
|
||||
- Use delete_key_snippet with [Snippet ID: X]:
|
||||
* Immediately after modifying or replacing referenced code
|
||||
* When the snippet becomes obsolete or irrelevant
|
||||
* When newer versions of the code exist
|
||||
* When the referenced code has been deleted
|
||||
|
||||
**Snippet Management Examples**
|
||||
- Adding a snippet before modification:
|
||||
emit_key_snippet with:
|
||||
filepath: "path/to/file.py"
|
||||
line_number: 10
|
||||
snippet: "[code to be modified]"
|
||||
description: "Original version before changes"
|
||||
|
||||
- Removing an outdated snippet:
|
||||
delete_key_snippet with [Snippet ID: X] after the code is modified
|
||||
|
||||
**Maintaining Snippet Quality**
|
||||
- Only keep snippets relevant to current or future task understanding
|
||||
- Regularly review snippets to ensure they match current codebase
|
||||
- Prioritize snippet management but don't let it block implementation progress
|
||||
- Use snippets to complement version control by highlighting key code sections
|
||||
- Focus solely on the given task and implement it as described.
|
||||
- Scale the complexity of your solution to the complexity of the request. For simple requests, keep it straightforward and minimal. For complex requests, maintain the previously planned depth.
|
||||
- Use delete_key_fact to remove facts that become outdated, irrelevant, or duplicated.
|
||||
- Use emit_key_snippet to manage code sections before and after modifications as needed.
|
||||
- Regularly remove outdated snippets with delete_key_snippet.
|
||||
|
||||
Instructions:
|
||||
1. Review the provided base task, plan, and key facts.
|
||||
2. Implement only the specified task:
|
||||
{task}
|
||||
|
||||
3. While implementing, follow these guidelines:
|
||||
- Work incrementally, testing and validating as you go.
|
||||
- Update or remove any key facts that no longer apply.
|
||||
- Do not build features not explicitly required by the task.
|
||||
- Only create or modify files directly related to this task.
|
||||
3. Work incrementally, validating as you go.
|
||||
4. Update or remove any key facts that no longer apply.
|
||||
5. Do not add features not explicitly required.
|
||||
6. Only create or modify files directly related to this task.
|
||||
|
||||
4. Once the task is complete, ensure all updated files are emitted.
|
||||
|
||||
No other activities (such as discussing purpose, future improvements, or unrelated steps) are allowed. Stay fully focused on completing the defined implementation task.
|
||||
"""
|
||||
Once the task is complete, ensure all updated files are emitted.
|
||||
"""
|
||||
|
|
@ -2,6 +2,8 @@ from typing import Dict, Union
|
|||
from langchain_core.tools import tool
|
||||
from rich.console import Console
|
||||
from rich.panel import Panel
|
||||
from rich.prompt import Confirm
|
||||
from ra_aid.tools.memory import _global_memory
|
||||
from ra_aid.proc.interactive import run_interactive_command
|
||||
from ra_aid.text.processing import truncate_output
|
||||
|
||||
|
|
@ -39,7 +41,18 @@ def run_shell_command(command: str) -> Dict[str, Union[str, int, bool]]:
|
|||
"""
|
||||
# Show just the command in a simple panel
|
||||
console.print(Panel(command, title="🐚 Shell", border_style="bright_yellow"))
|
||||
|
||||
|
||||
# Check if we need approval
|
||||
cowboy_mode = _global_memory.get('config', {}).get('cowboy_mode', False)
|
||||
|
||||
if not cowboy_mode:
|
||||
if not Confirm.ask("Execute this command?", default=True):
|
||||
return {
|
||||
"output": "Command execution cancelled by user",
|
||||
"return_code": 1,
|
||||
"success": False
|
||||
}
|
||||
|
||||
try:
|
||||
print()
|
||||
output, return_code = run_interactive_command(['/bin/bash', '-c', command])
|
||||
|
|
|
|||
|
|
@ -0,0 +1,67 @@
|
|||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
from ra_aid.tools.shell import run_shell_command
|
||||
from ra_aid.tools.memory import _global_memory
|
||||
|
||||
@pytest.fixture
|
||||
def mock_console():
|
||||
with patch('ra_aid.tools.shell.console') as mock:
|
||||
yield mock
|
||||
|
||||
@pytest.fixture
|
||||
def mock_confirm():
|
||||
with patch('ra_aid.tools.shell.Confirm') as mock:
|
||||
yield mock
|
||||
|
||||
@pytest.fixture
|
||||
def mock_run_interactive():
|
||||
with patch('ra_aid.tools.shell.run_interactive_command') as mock:
|
||||
mock.return_value = (b"test output", 0)
|
||||
yield mock
|
||||
|
||||
def test_shell_command_cowboy_mode(mock_console, mock_confirm, mock_run_interactive):
|
||||
"""Test shell command execution in cowboy mode (no approval)"""
|
||||
_global_memory['config'] = {'cowboy_mode': True}
|
||||
|
||||
result = run_shell_command("echo test")
|
||||
|
||||
assert result['success'] is True
|
||||
assert result['return_code'] == 0
|
||||
assert "test output" in result['output']
|
||||
mock_confirm.ask.assert_not_called()
|
||||
|
||||
def test_shell_command_interactive_approved(mock_console, mock_confirm, mock_run_interactive):
|
||||
"""Test shell command execution with interactive approval"""
|
||||
_global_memory['config'] = {'cowboy_mode': False}
|
||||
mock_confirm.ask.return_value = True
|
||||
|
||||
result = run_shell_command("echo test")
|
||||
|
||||
assert result['success'] is True
|
||||
assert result['return_code'] == 0
|
||||
assert "test output" in result['output']
|
||||
mock_confirm.ask.assert_called_once()
|
||||
|
||||
def test_shell_command_interactive_rejected(mock_console, mock_confirm, mock_run_interactive):
|
||||
"""Test shell command rejection in interactive mode"""
|
||||
_global_memory['config'] = {'cowboy_mode': False}
|
||||
mock_confirm.ask.return_value = False
|
||||
|
||||
result = run_shell_command("echo test")
|
||||
|
||||
assert result['success'] is False
|
||||
assert result['return_code'] == 1
|
||||
assert "cancelled by user" in result['output']
|
||||
mock_confirm.ask.assert_called_once()
|
||||
mock_run_interactive.assert_not_called()
|
||||
|
||||
def test_shell_command_execution_error(mock_console, mock_confirm, mock_run_interactive):
|
||||
"""Test handling of shell command execution errors"""
|
||||
_global_memory['config'] = {'cowboy_mode': True}
|
||||
mock_run_interactive.side_effect = Exception("Command failed")
|
||||
|
||||
result = run_shell_command("invalid command")
|
||||
|
||||
assert result['success'] is False
|
||||
assert result['return_code'] == 1
|
||||
assert "Command failed" in result['output']
|
||||
Loading…
Reference in New Issue