Update prompts, shell tools and add shell tests
This commit is contained in:
parent
3b9757061c
commit
e1b04781d7
|
|
@ -24,6 +24,8 @@ RA.Aid (ReAct Aid) is a powerful AI-driven command-line tool that integrates `ai
|
||||||
|
|
||||||
⚠️ **IMPORTANT: USE AT YOUR OWN RISK** ⚠️
|
⚠️ **IMPORTANT: USE AT YOUR OWN RISK** ⚠️
|
||||||
- This tool **can and will** automatically execute shell commands on your system
|
- This tool **can and will** automatically execute shell commands on your system
|
||||||
|
- Shell commands require interactive approval unless --cowboy-mode is enabled
|
||||||
|
- The --cowboy-mode flag disables command approval and should be used with extreme caution
|
||||||
- No warranty is provided, either express or implied
|
- No warranty is provided, either express or implied
|
||||||
- Always review the actions the agent proposes before allowing them to proceed
|
- Always review the actions the agent proposes before allowing them to proceed
|
||||||
|
|
||||||
|
|
@ -125,6 +127,7 @@ ra-aid -m "Explain the authentication flow" --research-only
|
||||||
|
|
||||||
- `-m, --message`: The task or query to be executed (required)
|
- `-m, --message`: The task or query to be executed (required)
|
||||||
- `--research-only`: Only perform research without implementation
|
- `--research-only`: Only perform research without implementation
|
||||||
|
- `--cowboy-mode`: Skip interactive approval for shell commands
|
||||||
|
|
||||||
### Example Tasks
|
### Example Tasks
|
||||||
|
|
||||||
|
|
@ -143,6 +146,11 @@ ra-aid -m "Explain the authentication flow" --research-only
|
||||||
ra-aid -m "Refactor the database connection code to use connection pooling"
|
ra-aid -m "Refactor the database connection code to use connection pooling"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
4. Non-Interactive Mode:
|
||||||
|
```bash
|
||||||
|
ra-aid -m "Update all deprecated API calls" --cowboy-mode
|
||||||
|
```
|
||||||
|
|
||||||
### Environment Variables
|
### Environment Variables
|
||||||
|
|
||||||
RA.Aid uses the following environment variables:
|
RA.Aid uses the following environment variables:
|
||||||
|
|
|
||||||
|
|
@ -46,6 +46,11 @@ Examples:
|
||||||
action='store_true',
|
action='store_true',
|
||||||
help='Only perform research without implementation'
|
help='Only perform research without implementation'
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--cowboy-mode',
|
||||||
|
action='store_true',
|
||||||
|
help='Skip interactive approval for shell commands'
|
||||||
|
)
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
# Create the base model
|
# Create the base model
|
||||||
|
|
@ -240,7 +245,8 @@ def main():
|
||||||
"thread_id": "abc123"
|
"thread_id": "abc123"
|
||||||
},
|
},
|
||||||
"recursion_limit": 100,
|
"recursion_limit": 100,
|
||||||
"research_only": args.research_only
|
"research_only": args.research_only,
|
||||||
|
"cowboy_mode": args.cowboy_mode
|
||||||
}
|
}
|
||||||
|
|
||||||
# Store config in global memory for access by is_informational_query
|
# Store config in global memory for access by is_informational_query
|
||||||
|
|
|
||||||
|
|
@ -3,9 +3,12 @@ Stage-specific prompts for the AI agent system.
|
||||||
|
|
||||||
Each prompt constant uses str.format() style template substitution for variable replacement.
|
Each prompt constant uses str.format() style template substitution for variable replacement.
|
||||||
The prompts guide the agent through different stages of task execution.
|
The prompts guide the agent through different stages of task execution.
|
||||||
"""
|
|
||||||
|
|
||||||
# Research stage prompt - guides initial codebase analysis
|
These updated prompts include instructions to scale complexity:
|
||||||
|
- For simpler requests, keep the scope minimal and avoid unnecessary complexity.
|
||||||
|
- For more complex requests, still provide detailed planning and thorough steps.
|
||||||
|
"""
|
||||||
|
|
||||||
# Research stage prompt - guides initial codebase analysis
|
# Research stage prompt - guides initial codebase analysis
|
||||||
RESEARCH_PROMPT = """
|
RESEARCH_PROMPT = """
|
||||||
Objective
|
Objective
|
||||||
|
|
@ -33,8 +36,8 @@ You must not:
|
||||||
|
|
||||||
Tools and Methodology
|
Tools and Methodology
|
||||||
|
|
||||||
Use only non-recursive, targeted fuzzy find, ripgrep_search tool (which provides context), list_directory_tree tool, shell commands, etc. (use your imagination) to efficiently explore the project structure. For example:
|
Use only non-recursive, targeted fuzzy find, ripgrep_search tool (which provides context), list_directory_tree tool, shell commands, etc. (use your imagination) to efficiently explore the project structure.
|
||||||
After identifying files, you may read them to confirm their contents only if needed to understand what currently exists (for example, to confirm if a file is a documentation file or a configuration file).
|
After identifying files, you may read them to confirm their contents only if needed to understand what currently exists.
|
||||||
Be meticulous: If you find a directory, explore it thoroughly. If you find files of potential relevance, record them. Make sure you do not skip any directories you discover.
|
Be meticulous: If you find a directory, explore it thoroughly. If you find files of potential relevance, record them. Make sure you do not skip any directories you discover.
|
||||||
Prefer to use list_directory_tree and other tools over shell commands.
|
Prefer to use list_directory_tree and other tools over shell commands.
|
||||||
Do not produce huge outputs from your commands. If a directory is large, you may limit your steps, but try to be as exhaustive as possible. Incrementally gather details as needed.
|
Do not produce huge outputs from your commands. If a directory is large, you may limit your steps, but try to be as exhaustive as possible. Incrementally gather details as needed.
|
||||||
|
|
@ -74,12 +77,11 @@ Decision on Implementation
|
||||||
If you see reasons that implementation changes will be required in the future, after documenting all findings, call request_implementation and specify why.
|
If you see reasons that implementation changes will be required in the future, after documenting all findings, call request_implementation and specify why.
|
||||||
If no changes are needed, simply state that no changes are required.
|
If no changes are needed, simply state that no changes are required.
|
||||||
|
|
||||||
Do not do any implementation or planning now. Just request it if needed.
|
|
||||||
|
|
||||||
If there is a top-level README.md or docs/ folder, always start with that.
|
If there is a top-level README.md or docs/ folder, always start with that.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Planning stage prompt - guides task breakdown and implementation planning
|
# Planning stage prompt - guides task breakdown and implementation planning
|
||||||
|
# Includes a directive to scale complexity with request size.
|
||||||
PLANNING_PROMPT = """Base Task:
|
PLANNING_PROMPT = """Base Task:
|
||||||
{base_task}
|
{base_task}
|
||||||
|
|
||||||
|
|
@ -94,18 +96,6 @@ Key Facts:
|
||||||
Key Snippets:
|
Key Snippets:
|
||||||
{key_snippets}
|
{key_snippets}
|
||||||
|
|
||||||
Fact Management:
|
|
||||||
Each fact is identified with [Fact ID: X].
|
|
||||||
Facts may be deleted if they become outdated, irrelevant, or duplicates.
|
|
||||||
Use delete_key_fact with the specific Fact ID to remove unnecessary facts.
|
|
||||||
|
|
||||||
Snippet Management:
|
|
||||||
Each snippet is identified with [Snippet ID: X].
|
|
||||||
Snippets include file path, line number, and source code.
|
|
||||||
Snippets may have optional descriptions explaining their significance.
|
|
||||||
Delete snippets with delete_key_snippet if they become outdated or irrelevant.
|
|
||||||
Use emit_key_snippet to store important code sections needed for reference.
|
|
||||||
|
|
||||||
Fact Management:
|
Fact Management:
|
||||||
Each fact is identified with [Fact ID: X].
|
Each fact is identified with [Fact ID: X].
|
||||||
Facts may be deleted if they become outdated, irrelevant, or duplicates.
|
Facts may be deleted if they become outdated, irrelevant, or duplicates.
|
||||||
|
|
@ -122,37 +112,32 @@ Guidelines:
|
||||||
|
|
||||||
If you need additional input or assistance from the expert, first use emit_expert_context to provide all relevant context. Wait for the expert’s response before defining tasks in non-trivial scenarios.
|
If you need additional input or assistance from the expert, first use emit_expert_context to provide all relevant context. Wait for the expert’s response before defining tasks in non-trivial scenarios.
|
||||||
|
|
||||||
|
Scale the complexity of your plan:
|
||||||
|
Individual tasks can include multiple steps, file edits, etc.
|
||||||
|
Therefore, use as few tasks as needed, but no fewer.
|
||||||
|
Keep tasks organized as semantic divisions of the overall work, rather than a series of steps.
|
||||||
|
|
||||||
When planning the implementation:
|
When planning the implementation:
|
||||||
Break the overall work into sub-tasks that are as detailed as possible.
|
Break the overall work into sub-tasks that are as detailed as necessary, but no more.
|
||||||
Each sub-task should be clear and unambiguous, and should fully describe what needs to be done, including:
|
Each sub-task should be clear and unambiguous, and should fully describe what needs to be done, including:
|
||||||
Purpose and goals of the sub-task
|
Purpose and goals of the sub-task
|
||||||
Steps required to complete it
|
Steps required to complete it
|
||||||
Any external interfaces it will integrate with
|
Any external interfaces it will integrate with
|
||||||
Data models and structures it will use
|
Data models and structures it will use
|
||||||
API contracts, endpoints, or protocols it requires or provides
|
API contracts, endpoints, or protocols it requires or provides
|
||||||
Detailed testing strategies specific to the sub-task
|
Testing strategies appropriate to the complexity of that sub-task
|
||||||
Be explicit about inputs, outputs, error cases, and edge conditions.
|
You may include pseudocode, but not full code.
|
||||||
|
|
||||||
For complex tasks, include:
|
|
||||||
Sample requests and responses (if APIs are involved)
|
|
||||||
Details on error handling and logging
|
|
||||||
Relevant data validation rules
|
|
||||||
Any performance, scalability, or security considerations
|
|
||||||
|
|
||||||
After finalizing the overall approach:
|
After finalizing the overall approach:
|
||||||
Use emit_plan to store the high-level implementation plan.
|
Use emit_plan to store the high-level implementation plan.
|
||||||
For each sub-task, use emit_task to store a thorough, step-by-step description.
|
For each sub-task, use emit_task to store a step-by-step description.
|
||||||
The description should be so detailed that it could be handed to another engineer who could implement it without further clarification.
|
The description should be only as detailed as warranted by the complexity of the request.
|
||||||
|
|
||||||
Only stop after all necessary tasks are fully detailed and cover the entire scope of the original request.
|
|
||||||
|
|
||||||
Avoid unnecessary complexity, but do not omit critical details.
|
|
||||||
|
|
||||||
Do not implement anything yet.
|
Do not implement anything yet.
|
||||||
|
"""
|
||||||
You are an autonomous agent, not a chatbot."""
|
|
||||||
|
|
||||||
# Research summary prompt - guides generation of research summaries
|
# Research summary prompt - guides generation of research summaries
|
||||||
|
# Remains essentially the same, but with complexity scaling if needed.
|
||||||
SUMMARY_PROMPT = """
|
SUMMARY_PROMPT = """
|
||||||
Using only the information provided in the Research Notes and Key Facts below, write a concise and direct answer to the user's query.
|
Using only the information provided in the Research Notes and Key Facts below, write a concise and direct answer to the user's query.
|
||||||
|
|
||||||
|
|
@ -181,13 +166,14 @@ Snippet Management:
|
||||||
Use emit_key_snippet to store important code sections needed for reference.
|
Use emit_key_snippet to store important code sections needed for reference.
|
||||||
|
|
||||||
Instructions:
|
Instructions:
|
||||||
- **Stay Within Provided Information**: Do not include any information not present in the Research Notes or Key Facts. Avoid assumptions or external knowledge.
|
- **Stay Within Provided Information**: Do not include any information not present in the Research Notes or Key Facts.
|
||||||
- **Handle Contradictions Appropriately**: If there are contradictions in the provided information, you may take further research steps to resolve the contradiction. If you cannot, note and explain the contradictions as best as you can.
|
- **Handle Contradictions Appropriately**: If contradictions exist, consider additional research or note the contradictions.
|
||||||
- **Maintain Focus and Brevity**: Keep your response succinct yet comprehensive and focused solely on the user's query without adding unnecessary details.
|
- **Maintain Focus and Brevity**: Keep the response concise, focusing on the user's query.
|
||||||
- **Include technical details**: If it is a technical query or a query related to files on the filesystem, always take time to read those and include relevant snippets.
|
- **Include Technical Details If Relevant**: For technical queries, reference discovered files and snippets.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Implementation stage prompt - guides specific task implementation
|
# Implementation stage prompt - guides specific task implementation
|
||||||
|
# Added instruction to adjust complexity of implementation to match request.
|
||||||
IMPLEMENTATION_PROMPT = """Base-level task (for reference only):
|
IMPLEMENTATION_PROMPT = """Base-level task (for reference only):
|
||||||
{base_task}
|
{base_task}
|
||||||
|
|
||||||
|
|
@ -204,55 +190,21 @@ Relevant Files:
|
||||||
{related_files}
|
{related_files}
|
||||||
|
|
||||||
Important Notes:
|
Important Notes:
|
||||||
- You must focus solely on the given task and implement it as described.
|
- Focus solely on the given task and implement it as described.
|
||||||
- Do not implement other tasks or deviate from the defined scope.
|
- Scale the complexity of your solution to the complexity of the request. For simple requests, keep it straightforward and minimal. For complex requests, maintain the previously planned depth.
|
||||||
- Use the delete_key_fact tool to remove facts that become outdated, irrelevant, or duplicated.
|
- Use delete_key_fact to remove facts that become outdated, irrelevant, or duplicated.
|
||||||
- Whenever referencing facts, use their assigned **[Fact ID: X]** format.
|
- Use emit_key_snippet to manage code sections before and after modifications as needed.
|
||||||
- Aggressively manage code snippets throughout implementation:
|
- Regularly remove outdated snippets with delete_key_snippet.
|
||||||
|
|
||||||
**When to Add Snippets**
|
|
||||||
- Capture code with emit_key_snippet:
|
|
||||||
* Before modifying any existing code
|
|
||||||
* When discovering related code that impacts the task
|
|
||||||
* After implementing new code sections
|
|
||||||
* When finding code patterns that will be modified
|
|
||||||
|
|
||||||
**When to Remove Snippets**
|
|
||||||
- Use delete_key_snippet with [Snippet ID: X]:
|
|
||||||
* Immediately after modifying or replacing referenced code
|
|
||||||
* When the snippet becomes obsolete or irrelevant
|
|
||||||
* When newer versions of the code exist
|
|
||||||
* When the referenced code has been deleted
|
|
||||||
|
|
||||||
**Snippet Management Examples**
|
|
||||||
- Adding a snippet before modification:
|
|
||||||
emit_key_snippet with:
|
|
||||||
filepath: "path/to/file.py"
|
|
||||||
line_number: 10
|
|
||||||
snippet: "[code to be modified]"
|
|
||||||
description: "Original version before changes"
|
|
||||||
|
|
||||||
- Removing an outdated snippet:
|
|
||||||
delete_key_snippet with [Snippet ID: X] after the code is modified
|
|
||||||
|
|
||||||
**Maintaining Snippet Quality**
|
|
||||||
- Only keep snippets relevant to current or future task understanding
|
|
||||||
- Regularly review snippets to ensure they match current codebase
|
|
||||||
- Prioritize snippet management but don't let it block implementation progress
|
|
||||||
- Use snippets to complement version control by highlighting key code sections
|
|
||||||
|
|
||||||
Instructions:
|
Instructions:
|
||||||
1. Review the provided base task, plan, and key facts.
|
1. Review the provided base task, plan, and key facts.
|
||||||
2. Implement only the specified task:
|
2. Implement only the specified task:
|
||||||
{task}
|
{task}
|
||||||
|
|
||||||
3. While implementing, follow these guidelines:
|
3. Work incrementally, validating as you go.
|
||||||
- Work incrementally, testing and validating as you go.
|
4. Update or remove any key facts that no longer apply.
|
||||||
- Update or remove any key facts that no longer apply.
|
5. Do not add features not explicitly required.
|
||||||
- Do not build features not explicitly required by the task.
|
6. Only create or modify files directly related to this task.
|
||||||
- Only create or modify files directly related to this task.
|
|
||||||
|
|
||||||
4. Once the task is complete, ensure all updated files are emitted.
|
Once the task is complete, ensure all updated files are emitted.
|
||||||
|
"""
|
||||||
No other activities (such as discussing purpose, future improvements, or unrelated steps) are allowed. Stay fully focused on completing the defined implementation task.
|
|
||||||
"""
|
|
||||||
|
|
@ -2,6 +2,8 @@ from typing import Dict, Union
|
||||||
from langchain_core.tools import tool
|
from langchain_core.tools import tool
|
||||||
from rich.console import Console
|
from rich.console import Console
|
||||||
from rich.panel import Panel
|
from rich.panel import Panel
|
||||||
|
from rich.prompt import Confirm
|
||||||
|
from ra_aid.tools.memory import _global_memory
|
||||||
from ra_aid.proc.interactive import run_interactive_command
|
from ra_aid.proc.interactive import run_interactive_command
|
||||||
from ra_aid.text.processing import truncate_output
|
from ra_aid.text.processing import truncate_output
|
||||||
|
|
||||||
|
|
@ -39,7 +41,18 @@ def run_shell_command(command: str) -> Dict[str, Union[str, int, bool]]:
|
||||||
"""
|
"""
|
||||||
# Show just the command in a simple panel
|
# Show just the command in a simple panel
|
||||||
console.print(Panel(command, title="🐚 Shell", border_style="bright_yellow"))
|
console.print(Panel(command, title="🐚 Shell", border_style="bright_yellow"))
|
||||||
|
|
||||||
|
# Check if we need approval
|
||||||
|
cowboy_mode = _global_memory.get('config', {}).get('cowboy_mode', False)
|
||||||
|
|
||||||
|
if not cowboy_mode:
|
||||||
|
if not Confirm.ask("Execute this command?", default=True):
|
||||||
|
return {
|
||||||
|
"output": "Command execution cancelled by user",
|
||||||
|
"return_code": 1,
|
||||||
|
"success": False
|
||||||
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
print()
|
print()
|
||||||
output, return_code = run_interactive_command(['/bin/bash', '-c', command])
|
output, return_code = run_interactive_command(['/bin/bash', '-c', command])
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,67 @@
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import patch, MagicMock
|
||||||
|
from ra_aid.tools.shell import run_shell_command
|
||||||
|
from ra_aid.tools.memory import _global_memory
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_console():
|
||||||
|
with patch('ra_aid.tools.shell.console') as mock:
|
||||||
|
yield mock
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_confirm():
|
||||||
|
with patch('ra_aid.tools.shell.Confirm') as mock:
|
||||||
|
yield mock
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_run_interactive():
|
||||||
|
with patch('ra_aid.tools.shell.run_interactive_command') as mock:
|
||||||
|
mock.return_value = (b"test output", 0)
|
||||||
|
yield mock
|
||||||
|
|
||||||
|
def test_shell_command_cowboy_mode(mock_console, mock_confirm, mock_run_interactive):
|
||||||
|
"""Test shell command execution in cowboy mode (no approval)"""
|
||||||
|
_global_memory['config'] = {'cowboy_mode': True}
|
||||||
|
|
||||||
|
result = run_shell_command("echo test")
|
||||||
|
|
||||||
|
assert result['success'] is True
|
||||||
|
assert result['return_code'] == 0
|
||||||
|
assert "test output" in result['output']
|
||||||
|
mock_confirm.ask.assert_not_called()
|
||||||
|
|
||||||
|
def test_shell_command_interactive_approved(mock_console, mock_confirm, mock_run_interactive):
|
||||||
|
"""Test shell command execution with interactive approval"""
|
||||||
|
_global_memory['config'] = {'cowboy_mode': False}
|
||||||
|
mock_confirm.ask.return_value = True
|
||||||
|
|
||||||
|
result = run_shell_command("echo test")
|
||||||
|
|
||||||
|
assert result['success'] is True
|
||||||
|
assert result['return_code'] == 0
|
||||||
|
assert "test output" in result['output']
|
||||||
|
mock_confirm.ask.assert_called_once()
|
||||||
|
|
||||||
|
def test_shell_command_interactive_rejected(mock_console, mock_confirm, mock_run_interactive):
|
||||||
|
"""Test shell command rejection in interactive mode"""
|
||||||
|
_global_memory['config'] = {'cowboy_mode': False}
|
||||||
|
mock_confirm.ask.return_value = False
|
||||||
|
|
||||||
|
result = run_shell_command("echo test")
|
||||||
|
|
||||||
|
assert result['success'] is False
|
||||||
|
assert result['return_code'] == 1
|
||||||
|
assert "cancelled by user" in result['output']
|
||||||
|
mock_confirm.ask.assert_called_once()
|
||||||
|
mock_run_interactive.assert_not_called()
|
||||||
|
|
||||||
|
def test_shell_command_execution_error(mock_console, mock_confirm, mock_run_interactive):
|
||||||
|
"""Test handling of shell command execution errors"""
|
||||||
|
_global_memory['config'] = {'cowboy_mode': True}
|
||||||
|
mock_run_interactive.side_effect = Exception("Command failed")
|
||||||
|
|
||||||
|
result = run_shell_command("invalid command")
|
||||||
|
|
||||||
|
assert result['success'] is False
|
||||||
|
assert result['return_code'] == 1
|
||||||
|
assert "Command failed" in result['output']
|
||||||
Loading…
Reference in New Issue