diff --git a/.python-version b/.python-version
new file mode 100644
index 0000000..56bb660
--- /dev/null
+++ b/.python-version
@@ -0,0 +1 @@
+3.12.7
diff --git a/README.md b/README.md
index 12898fc..28fa0ca 100644
--- a/README.md
+++ b/README.md
@@ -179,6 +179,9 @@ ra-aid -m "Add new feature" --verbose
- `--temperature`: LLM temperature (0.0-2.0) to control randomness in responses
- `--disable-limit-tokens`: Disable token limiting for Anthropic Claude react agents
- `--recursion-limit`: Maximum recursion depth for agent operations (default: 100)
+- `--test-cmd`: Custom command to run tests. If set user will be asked if they want to run the test command
+- `--auto-test`: Automatically run tests after each code change
+- `--max-test-cmd-retries`: Maximum number of test command retry attempts (default: 3)
- `--version`: Show program version number and exit
### Example Tasks
diff --git a/ra_aid/__main__.py b/ra_aid/__main__.py
index a360b2d..a5bcee7 100644
--- a/ra_aid/__main__.py
+++ b/ra_aid/__main__.py
@@ -5,7 +5,7 @@ from datetime import datetime
from rich.panel import Panel
from rich.console import Console
from langgraph.checkpoint.memory import MemorySaver
-from ra_aid.config import DEFAULT_RECURSION_LIMIT
+from ra_aid.config import DEFAULT_MAX_TEST_CMD_RETRIES, DEFAULT_RECURSION_LIMIT
from ra_aid.env import validate_environment
from ra_aid.project_info import get_project_info, format_project_info
from ra_aid.tools.memory import _global_memory
@@ -148,7 +148,22 @@ Examples:
parser.add_argument(
"--aider-config", type=str, help="Specify the aider config file path"
)
-
+ parser.add_argument(
+ "--test-cmd",
+ type=str,
+ help="Test command to run before completing tasks (e.g. 'pytest tests/')"
+ )
+ parser.add_argument(
+ "--auto-test",
+ action="store_true",
+ help="Automatically run tests before completing tasks"
+ )
+ parser.add_argument(
+ "--max-test-cmd-retries",
+ type=int,
+ default=DEFAULT_MAX_TEST_CMD_RETRIES,
+ help="Maximum number of retries for the test command (default: 10)",
+ )
if args is None:
args = sys.argv[1:]
parsed_args = parser.parse_args(args)
@@ -192,6 +207,10 @@ Examples:
# Validate recursion limit is positive
if parsed_args.recursion_limit <= 0:
parser.error("Recursion limit must be positive")
+
+ # if auto-test command is provided, validate test-cmd is also provided
+ if parsed_args.auto_test and not parsed_args.test_cmd:
+ parser.error("Test command is required when using --auto-test")
return parsed_args
@@ -344,6 +363,9 @@ def main():
"web_research_enabled": web_research_enabled,
"aider_config": args.aider_config,
"limit_tokens": args.disable_limit_tokens,
+ "auto_test": args.auto_test,
+ "test_cmd": args.test_cmd,
+ "max_test_cmd_retries": args.max_test_cmd_retries,
}
# Store config in global memory for access by is_informational_query
diff --git a/ra_aid/agent_utils.py b/ra_aid/agent_utils.py
index f462778..7cba70b 100644
--- a/ra_aid/agent_utils.py
+++ b/ra_aid/agent_utils.py
@@ -12,7 +12,7 @@ import signal
from langgraph.checkpoint.memory import MemorySaver
from langgraph.prebuilt.chat_agent_executor import AgentState
-from ra_aid.config import DEFAULT_RECURSION_LIMIT
+from ra_aid.config import DEFAULT_MAX_TEST_CMD_RETRIES, DEFAULT_RECURSION_LIMIT
from ra_aid.models_tokens import DEFAULT_TOKEN_LIMIT, models_tokens
from ra_aid.agents.ciayn_agent import CiaynAgent
import threading
@@ -55,6 +55,8 @@ from ra_aid.prompts import (
from langchain_core.messages import HumanMessage
from anthropic import APIError, APITimeoutError, RateLimitError, InternalServerError
+from ra_aid.tools.human import ask_human
+from ra_aid.tools.shell import run_shell_command
from rich.console import Console
from rich.markdown import Markdown
from rich.panel import Panel
@@ -64,6 +66,7 @@ from ra_aid.tools.memory import (
get_memory_value,
get_related_files,
)
+from ra_aid.tools.handle_user_defined_test_cmd_execution import execute_test_command
console = Console()
@@ -719,6 +722,10 @@ def run_agent_with_retry(agent, prompt: str, config: dict) -> Optional[str]:
max_retries = 20
base_delay = 1
+ test_attempts = 0
+ max_test_retries = config.get("max_test_cmd_retries", DEFAULT_MAX_TEST_CMD_RETRIES)
+ auto_test = config.get("auto_test", False)
+ original_prompt = prompt
with InterruptibleSection():
try:
@@ -745,6 +752,19 @@ def run_agent_with_retry(agent, prompt: str, config: dict) -> Optional[str]:
_global_memory["task_completed"] = False
_global_memory["completion_message"] = ""
break
+
+ # Execute test command if configured
+ should_break, prompt, auto_test, test_attempts = execute_test_command(
+ config,
+ original_prompt,
+ test_attempts,
+ auto_test
+ )
+ if should_break:
+ break
+ if prompt != original_prompt:
+ continue
+
logger.debug("Agent run completed successfully")
return "Agent run completed successfully"
except (KeyboardInterrupt, AgentInterrupt):
diff --git a/ra_aid/config.py b/ra_aid/config.py
index 2a2a187..6df55f4 100644
--- a/ra_aid/config.py
+++ b/ra_aid/config.py
@@ -1,3 +1,4 @@
"""Configuration utilities."""
DEFAULT_RECURSION_LIMIT = 100
+DEFAULT_MAX_TEST_CMD_RETRIES = 3
\ No newline at end of file
diff --git a/ra_aid/llm.py b/ra_aid/llm.py
index 8009a23..5e22fa1 100644
--- a/ra_aid/llm.py
+++ b/ra_aid/llm.py
@@ -197,4 +197,4 @@ def initialize_expert_llm(
provider: str = "openai", model_name: str = "o1"
) -> BaseChatModel:
"""Initialize an expert language model client based on the specified provider and model."""
- return create_llm_client(provider, model_name, temperature=None, is_expert=True)
+ return create_llm_client(provider, model_name, temperature=None, is_expert=True)
\ No newline at end of file
diff --git a/ra_aid/tools/handle_user_defined_test_cmd_execution.py b/ra_aid/tools/handle_user_defined_test_cmd_execution.py
new file mode 100644
index 0000000..993ffd7
--- /dev/null
+++ b/ra_aid/tools/handle_user_defined_test_cmd_execution.py
@@ -0,0 +1,167 @@
+"""Utilities for executing and managing user-defined test commands."""
+
+from typing import Dict, Any, Tuple, Optional
+from dataclasses import dataclass
+from rich.console import Console
+from rich.markdown import Markdown
+from rich.panel import Panel
+from ra_aid.tools.human import ask_human
+from ra_aid.tools.shell import run_shell_command
+from ra_aid.logging_config import get_logger
+
+console = Console()
+logger = get_logger(__name__)
+
+@dataclass
+class TestState:
+ """State for test execution."""
+ prompt: str
+ test_attempts: int
+ auto_test: bool
+ should_break: bool = False
+
+def display_test_failure(attempts: int, max_retries: int) -> None:
+ """Display test failure message.
+
+ Args:
+ attempts: Current number of attempts
+ max_retries: Maximum allowed retries
+ """
+ console.print(
+ Panel(
+ Markdown(f"Test failed. Attempt number {attempts} of {max_retries}. Retrying and informing of failure output"),
+ title="🔎 User Defined Test",
+ border_style="red bold"
+ )
+ )
+
+def handle_test_failure(state: TestState, original_prompt: str, test_result: Dict[str, Any]) -> TestState:
+ """Handle test command failure.
+
+ Args:
+ state: Current test state
+ original_prompt: Original prompt text
+ test_result: Test command result
+
+ Returns:
+ Updated test state
+ """
+ state.prompt = f"{original_prompt}. Previous attempt failed with: {test_result['output']}"
+ display_test_failure(state.test_attempts, 5) # Default max retries
+ state.should_break = False
+ return state
+
+def run_test_command(cmd: str, state: TestState, original_prompt: str) -> TestState:
+ """Run test command and handle result.
+
+ Args:
+ cmd: Test command to execute
+ state: Current test state
+ original_prompt: Original prompt text
+
+ Returns:
+ Updated test state
+ """
+ try:
+ test_result = run_shell_command(cmd)
+ state.test_attempts += 1
+
+ if not test_result["success"]:
+ return handle_test_failure(state, original_prompt, test_result)
+
+ state.should_break = True
+ return state
+
+ except Exception as e:
+ logger.warning(f"Test command execution failed: {str(e)}")
+ state.test_attempts += 1
+ state.should_break = True
+ return state
+
+def handle_user_response(response: str, state: TestState, cmd: str, original_prompt: str) -> TestState:
+ """Handle user's response to test prompt.
+
+ Args:
+ response: User's response (y/n/a)
+ state: Current test state
+ cmd: Test command
+ original_prompt: Original prompt text
+
+ Returns:
+ Updated test state
+ """
+ response = response.strip().lower()
+
+ if response == "n":
+ state.should_break = True
+ return state
+
+ if response == "a":
+ state.auto_test = True
+ return run_test_command(cmd, state, original_prompt)
+
+ if response == "y":
+ return run_test_command(cmd, state, original_prompt)
+
+ return state
+
+def check_max_retries(attempts: int, max_retries: int) -> bool:
+ """Check if max retries reached.
+
+ Args:
+ attempts: Current number of attempts
+ max_retries: Maximum allowed retries
+
+ Returns:
+ True if max retries reached
+ """
+ if attempts >= max_retries:
+ logger.warning("Max test retries reached")
+ return True
+ return False
+
+def execute_test_command(
+ config: Dict[str, Any],
+ original_prompt: str,
+ test_attempts: int = 0,
+ auto_test: bool = False,
+) -> Tuple[bool, str, bool, int]:
+ """Execute a test command and handle retries.
+
+ Args:
+ config: Configuration dictionary containing test settings
+ original_prompt: The original prompt to append errors to
+ test_attempts: Current number of test attempts
+ auto_test: Whether auto-test mode is enabled
+
+ Returns:
+ Tuple containing:
+ - bool: Whether to break the retry loop
+ - str: Updated prompt
+ - bool: Updated auto_test flag
+ - int: Updated test_attempts count
+ """
+ state = TestState(
+ prompt=original_prompt,
+ test_attempts=test_attempts,
+ auto_test=auto_test
+ )
+
+ if not config.get("test_cmd"):
+ state.should_break = True
+ return state.should_break, state.prompt, state.auto_test, state.test_attempts
+
+ max_retries = config.get("max_test_cmd_retries", 5)
+ cmd = config["test_cmd"]
+
+ if not auto_test:
+ print()
+ response = ask_human.invoke({"question": "Would you like to run the test command? (y=yes, n=no, a=enable auto-test)"})
+ state = handle_user_response(response, state, cmd, original_prompt)
+ else:
+ if check_max_retries(test_attempts, max_retries):
+ state.should_break = True
+ else:
+ state = run_test_command(cmd, state, original_prompt)
+
+ return state.should_break, state.prompt, state.auto_test, state.test_attempts
\ No newline at end of file
diff --git a/ra_aid/tools/programmer.py b/ra_aid/tools/programmer.py
index 488cdec..438d27f 100644
--- a/ra_aid/tools/programmer.py
+++ b/ra_aid/tools/programmer.py
@@ -45,6 +45,8 @@ Returns: { "output": stdout+stderr, "return_code": 0 if success, "success": True
"--no-auto-commits",
"--dark-mode",
"--no-suggest-shell-commands",
+ "--no-show-release-notes",
+ "--no-check-update",
]
# Add config file if specified
diff --git a/scripts/__init__.py b/scripts/__init__.py
new file mode 100644
index 0000000..862f852
--- /dev/null
+++ b/scripts/__init__.py
@@ -0,0 +1 @@
+"""Scripts package."""
diff --git a/tests/ra_aid/tools/test_execution.py b/tests/ra_aid/tools/test_execution.py
new file mode 100644
index 0000000..316ebcd
--- /dev/null
+++ b/tests/ra_aid/tools/test_execution.py
@@ -0,0 +1,218 @@
+"""Tests for test execution utilities."""
+
+import pytest
+from unittest.mock import Mock, patch
+from ra_aid.tools.handle_user_defined_test_cmd_execution import execute_test_command
+
+# Test cases for execute_test_command
+test_cases = [
+ # Format: (name, config, original_prompt, test_attempts, auto_test,
+ # mock_responses, expected_result)
+
+ # Case 1: No test command configured
+ (
+ "no_test_command",
+ {"other_config": "value"},
+ "original prompt",
+ 0,
+ False,
+ {},
+ (True, "original prompt", False, 0)
+ ),
+
+ # Case 2: User declines to run test
+ (
+ "user_declines_test",
+ {"test_cmd": "pytest"},
+ "original prompt",
+ 0,
+ False,
+ {"ask_human_response": "n"},
+ (True, "original prompt", False, 0)
+ ),
+
+ # Case 3: User enables auto-test
+ (
+ "user_enables_auto_test",
+ {"test_cmd": "pytest"},
+ "original prompt",
+ 0,
+ False,
+ {
+ "ask_human_response": "a",
+ "shell_cmd_result": {"success": True, "output": "All tests passed"}
+ },
+ (True, "original prompt", True, 1)
+ ),
+
+ # Case 4: Auto-test success
+ (
+ "auto_test_success",
+ {"test_cmd": "pytest"},
+ "original prompt",
+ 0,
+ True,
+ {"shell_cmd_result": {"success": True, "output": "All tests passed"}},
+ (True, "original prompt", True, 1)
+ ),
+
+ # Case 5: Auto-test failure with retry
+ (
+ "auto_test_failure_retry",
+ {"test_cmd": "pytest"},
+ "original prompt",
+ 0,
+ True,
+ {"shell_cmd_result": {"success": False, "output": "Test failed"}},
+ (False, "original prompt. Previous attempt failed with: Test failed", True, 1)
+ ),
+
+ # Case 6: Max retries reached
+ (
+ "max_retries_reached",
+ {"test_cmd": "pytest", "max_test_cmd_retries": 3},
+ "original prompt",
+ 3,
+ True,
+ {},
+ (True, "original prompt", True, 3)
+ ),
+
+ # Case 7: User runs test manually
+ (
+ "manual_test_success",
+ {"test_cmd": "pytest"},
+ "original prompt",
+ 0,
+ False,
+ {
+ "ask_human_response": "y",
+ "shell_cmd_result": {"success": True, "output": "All tests passed"}
+ },
+ (True, "original prompt", False, 1)
+ ),
+
+ # Case 8: Manual test failure
+ (
+ "manual_test_failure",
+ {"test_cmd": "pytest"},
+ "original prompt",
+ 0,
+ False,
+ {
+ "ask_human_response": "y",
+ "shell_cmd_result": {"success": False, "output": "Test failed"}
+ },
+ (False, "original prompt. Previous attempt failed with: Test failed", False, 1)
+ ),
+
+ # Case 9: Manual test error
+ (
+ "manual_test_error",
+ {"test_cmd": "pytest"},
+ "original prompt",
+ 0,
+ False,
+ {
+ "ask_human_response": "y",
+ "shell_cmd_result_error": Exception("Command failed")
+ },
+ (True, "original prompt", False, 1)
+ ),
+
+ # Case 10: Auto-test error
+ (
+ "auto_test_error",
+ {"test_cmd": "pytest"},
+ "original prompt",
+ 0,
+ True,
+ {
+ "shell_cmd_result_error": Exception("Command failed")
+ },
+ (True, "original prompt", True, 1)
+ ),
+]
+
+@pytest.mark.parametrize(
+ "name,config,original_prompt,test_attempts,auto_test,mock_responses,expected",
+ test_cases,
+ ids=[case[0] for case in test_cases]
+)
+def test_execute_test_command(
+ name: str,
+ config: dict,
+ original_prompt: str,
+ test_attempts: int,
+ auto_test: bool,
+ mock_responses: dict,
+ expected: tuple,
+) -> None:
+ """Test execute_test_command with different scenarios.
+
+ Args:
+ name: Test case name
+ config: Test configuration
+ original_prompt: Original prompt text
+ test_attempts: Number of test attempts
+ auto_test: Auto-test flag
+ mock_responses: Mock response data
+ expected: Expected result tuple
+ """
+ with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.ask_human") as mock_ask_human, \
+ patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_shell_command") as mock_run_cmd, \
+ patch("ra_aid.tools.handle_user_defined_test_cmd_execution.console") as mock_console, \
+ patch("ra_aid.tools.handle_user_defined_test_cmd_execution.logger") as mock_logger:
+
+ # Configure mocks based on mock_responses
+ if "ask_human_response" in mock_responses:
+ mock_ask_human.invoke.return_value = mock_responses["ask_human_response"]
+
+ if "shell_cmd_result_error" in mock_responses:
+ mock_run_cmd.side_effect = mock_responses["shell_cmd_result_error"]
+ elif "shell_cmd_result" in mock_responses:
+ mock_run_cmd.return_value = mock_responses["shell_cmd_result"]
+
+ # Execute test command
+ result = execute_test_command(
+ config,
+ original_prompt,
+ test_attempts,
+ auto_test
+ )
+
+ # Verify result matches expected
+ assert result == expected, f"Test case '{name}' failed"
+
+ # Verify mock interactions
+ if config.get("test_cmd") and not auto_test:
+ mock_ask_human.invoke.assert_called_once()
+
+ if auto_test and test_attempts < config.get("max_test_cmd_retries", 5):
+ if config.get("test_cmd"):
+ mock_run_cmd.assert_called_once_with(config["test_cmd"])
+
+ # Verify logging for max retries
+ if test_attempts >= config.get("max_test_cmd_retries", 5):
+ mock_logger.warning.assert_called_once_with("Max test retries reached")
+
+def test_execute_test_command_error_handling() -> None:
+ """Test error handling in execute_test_command."""
+ config = {"test_cmd": "pytest"}
+
+ with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_shell_command") as mock_run_cmd, \
+ patch("ra_aid.tools.handle_user_defined_test_cmd_execution.logger") as mock_logger:
+
+ # Simulate run_shell_command raising an exception
+ mock_run_cmd.side_effect = Exception("Command failed")
+
+ result = execute_test_command(
+ config,
+ "original prompt",
+ 0,
+ True
+ )
+
+ # Should handle error and continue
+ assert result == (True, "original prompt", True, 1)
+ mock_logger.warning.assert_called_once()
\ No newline at end of file
diff --git a/tests/ra_aid/tools/test_handle_user_defined_test_cmd_execution.py b/tests/ra_aid/tools/test_handle_user_defined_test_cmd_execution.py
new file mode 100644
index 0000000..4f436b5
--- /dev/null
+++ b/tests/ra_aid/tools/test_handle_user_defined_test_cmd_execution.py
@@ -0,0 +1,109 @@
+"""Tests for user-defined test command execution utilities."""
+
+import pytest
+from unittest.mock import patch, Mock
+from ra_aid.tools.handle_user_defined_test_cmd_execution import (
+ TestState,
+ execute_test_command,
+ handle_test_failure,
+ run_test_command,
+ handle_user_response,
+ check_max_retries
+)
+
+@pytest.fixture
+def test_state():
+ """Create a test state fixture."""
+ return TestState(
+ prompt="test prompt",
+ test_attempts=0,
+ auto_test=False
+ )
+
+def test_check_max_retries():
+ """Test max retries check."""
+ assert not check_max_retries(2, 3)
+ assert check_max_retries(3, 3)
+ assert check_max_retries(4, 3)
+
+def test_handle_test_failure(test_state):
+ """Test handling of test failures."""
+ test_result = {"output": "error message"}
+ with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.display_test_failure"):
+ state = handle_test_failure(test_state, "original", test_result)
+ assert not state.should_break
+ assert "error message" in state.prompt
+
+def test_run_test_command_success(test_state):
+ """Test successful test command execution."""
+ with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_shell_command") as mock_run:
+ mock_run.return_value = {"success": True, "output": ""}
+ state = run_test_command("test", test_state, "original")
+ assert state.should_break
+ assert state.test_attempts == 1
+
+def test_run_test_command_failure(test_state):
+ """Test failed test command execution."""
+ with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_shell_command") as mock_run:
+ mock_run.return_value = {"success": False, "output": "error"}
+ state = run_test_command("test", test_state, "original")
+ assert not state.should_break
+ assert state.test_attempts == 1
+ assert "error" in state.prompt
+
+def test_run_test_command_error(test_state):
+ """Test test command execution error."""
+ with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_shell_command") as mock_run:
+ mock_run.side_effect = Exception("Command failed")
+ state = run_test_command("test", test_state, "original")
+ assert state.should_break
+ assert state.test_attempts == 1
+
+def test_handle_user_response_no(test_state):
+ """Test handling of 'no' response."""
+ state = handle_user_response("n", test_state, "test", "original")
+ assert state.should_break
+ assert not state.auto_test
+
+def test_handle_user_response_auto(test_state):
+ """Test handling of 'auto' response."""
+ with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_test_command") as mock_run:
+ mock_state = TestState("prompt", 1, True, True)
+ mock_run.return_value = mock_state
+ state = handle_user_response("a", test_state, "test", "original")
+ assert state.auto_test
+ mock_run.assert_called_once()
+
+def test_handle_user_response_yes(test_state):
+ """Test handling of 'yes' response."""
+ with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_test_command") as mock_run:
+ mock_state = TestState("prompt", 1, False, True)
+ mock_run.return_value = mock_state
+ state = handle_user_response("y", test_state, "test", "original")
+ assert not state.auto_test
+ mock_run.assert_called_once()
+
+def test_execute_test_command_no_cmd():
+ """Test execution with no test command."""
+ result = execute_test_command({}, "prompt")
+ assert result == (True, "prompt", False, 0)
+
+def test_execute_test_command_manual():
+ """Test manual test execution."""
+ config = {"test_cmd": "test"}
+ with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.ask_human") as mock_ask, \
+ patch("ra_aid.tools.handle_user_defined_test_cmd_execution.handle_user_response") as mock_handle:
+ mock_ask.invoke.return_value = "y"
+ mock_state = TestState("new prompt", 1, False, True)
+ mock_handle.return_value = mock_state
+ result = execute_test_command(config, "prompt")
+ assert result == (True, "new prompt", False, 1)
+
+def test_execute_test_command_auto():
+ """Test auto test execution."""
+ config = {"test_cmd": "test", "max_test_cmd_retries": 3}
+ with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_test_command") as mock_run:
+ mock_state = TestState("new prompt", 1, True, True)
+ mock_run.return_value = mock_state
+ result = execute_test_command(config, "prompt", auto_test=True)
+ assert result == (True, "new prompt", True, 1)
\ No newline at end of file