diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..56bb660 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.12.7 diff --git a/README.md b/README.md index 12898fc..28fa0ca 100644 --- a/README.md +++ b/README.md @@ -179,6 +179,9 @@ ra-aid -m "Add new feature" --verbose - `--temperature`: LLM temperature (0.0-2.0) to control randomness in responses - `--disable-limit-tokens`: Disable token limiting for Anthropic Claude react agents - `--recursion-limit`: Maximum recursion depth for agent operations (default: 100) +- `--test-cmd`: Custom command to run tests. If set user will be asked if they want to run the test command +- `--auto-test`: Automatically run tests after each code change +- `--max-test-cmd-retries`: Maximum number of test command retry attempts (default: 3) - `--version`: Show program version number and exit ### Example Tasks diff --git a/ra_aid/__main__.py b/ra_aid/__main__.py index a360b2d..a5bcee7 100644 --- a/ra_aid/__main__.py +++ b/ra_aid/__main__.py @@ -5,7 +5,7 @@ from datetime import datetime from rich.panel import Panel from rich.console import Console from langgraph.checkpoint.memory import MemorySaver -from ra_aid.config import DEFAULT_RECURSION_LIMIT +from ra_aid.config import DEFAULT_MAX_TEST_CMD_RETRIES, DEFAULT_RECURSION_LIMIT from ra_aid.env import validate_environment from ra_aid.project_info import get_project_info, format_project_info from ra_aid.tools.memory import _global_memory @@ -148,7 +148,22 @@ Examples: parser.add_argument( "--aider-config", type=str, help="Specify the aider config file path" ) - + parser.add_argument( + "--test-cmd", + type=str, + help="Test command to run before completing tasks (e.g. 'pytest tests/')" + ) + parser.add_argument( + "--auto-test", + action="store_true", + help="Automatically run tests before completing tasks" + ) + parser.add_argument( + "--max-test-cmd-retries", + type=int, + default=DEFAULT_MAX_TEST_CMD_RETRIES, + help="Maximum number of retries for the test command (default: 10)", + ) if args is None: args = sys.argv[1:] parsed_args = parser.parse_args(args) @@ -192,6 +207,10 @@ Examples: # Validate recursion limit is positive if parsed_args.recursion_limit <= 0: parser.error("Recursion limit must be positive") + + # if auto-test command is provided, validate test-cmd is also provided + if parsed_args.auto_test and not parsed_args.test_cmd: + parser.error("Test command is required when using --auto-test") return parsed_args @@ -344,6 +363,9 @@ def main(): "web_research_enabled": web_research_enabled, "aider_config": args.aider_config, "limit_tokens": args.disable_limit_tokens, + "auto_test": args.auto_test, + "test_cmd": args.test_cmd, + "max_test_cmd_retries": args.max_test_cmd_retries, } # Store config in global memory for access by is_informational_query diff --git a/ra_aid/agent_utils.py b/ra_aid/agent_utils.py index f462778..7cba70b 100644 --- a/ra_aid/agent_utils.py +++ b/ra_aid/agent_utils.py @@ -12,7 +12,7 @@ import signal from langgraph.checkpoint.memory import MemorySaver from langgraph.prebuilt.chat_agent_executor import AgentState -from ra_aid.config import DEFAULT_RECURSION_LIMIT +from ra_aid.config import DEFAULT_MAX_TEST_CMD_RETRIES, DEFAULT_RECURSION_LIMIT from ra_aid.models_tokens import DEFAULT_TOKEN_LIMIT, models_tokens from ra_aid.agents.ciayn_agent import CiaynAgent import threading @@ -55,6 +55,8 @@ from ra_aid.prompts import ( from langchain_core.messages import HumanMessage from anthropic import APIError, APITimeoutError, RateLimitError, InternalServerError +from ra_aid.tools.human import ask_human +from ra_aid.tools.shell import run_shell_command from rich.console import Console from rich.markdown import Markdown from rich.panel import Panel @@ -64,6 +66,7 @@ from ra_aid.tools.memory import ( get_memory_value, get_related_files, ) +from ra_aid.tools.handle_user_defined_test_cmd_execution import execute_test_command console = Console() @@ -719,6 +722,10 @@ def run_agent_with_retry(agent, prompt: str, config: dict) -> Optional[str]: max_retries = 20 base_delay = 1 + test_attempts = 0 + max_test_retries = config.get("max_test_cmd_retries", DEFAULT_MAX_TEST_CMD_RETRIES) + auto_test = config.get("auto_test", False) + original_prompt = prompt with InterruptibleSection(): try: @@ -745,6 +752,19 @@ def run_agent_with_retry(agent, prompt: str, config: dict) -> Optional[str]: _global_memory["task_completed"] = False _global_memory["completion_message"] = "" break + + # Execute test command if configured + should_break, prompt, auto_test, test_attempts = execute_test_command( + config, + original_prompt, + test_attempts, + auto_test + ) + if should_break: + break + if prompt != original_prompt: + continue + logger.debug("Agent run completed successfully") return "Agent run completed successfully" except (KeyboardInterrupt, AgentInterrupt): diff --git a/ra_aid/config.py b/ra_aid/config.py index 2a2a187..6df55f4 100644 --- a/ra_aid/config.py +++ b/ra_aid/config.py @@ -1,3 +1,4 @@ """Configuration utilities.""" DEFAULT_RECURSION_LIMIT = 100 +DEFAULT_MAX_TEST_CMD_RETRIES = 3 \ No newline at end of file diff --git a/ra_aid/llm.py b/ra_aid/llm.py index 8009a23..5e22fa1 100644 --- a/ra_aid/llm.py +++ b/ra_aid/llm.py @@ -197,4 +197,4 @@ def initialize_expert_llm( provider: str = "openai", model_name: str = "o1" ) -> BaseChatModel: """Initialize an expert language model client based on the specified provider and model.""" - return create_llm_client(provider, model_name, temperature=None, is_expert=True) + return create_llm_client(provider, model_name, temperature=None, is_expert=True) \ No newline at end of file diff --git a/ra_aid/tools/handle_user_defined_test_cmd_execution.py b/ra_aid/tools/handle_user_defined_test_cmd_execution.py new file mode 100644 index 0000000..993ffd7 --- /dev/null +++ b/ra_aid/tools/handle_user_defined_test_cmd_execution.py @@ -0,0 +1,167 @@ +"""Utilities for executing and managing user-defined test commands.""" + +from typing import Dict, Any, Tuple, Optional +from dataclasses import dataclass +from rich.console import Console +from rich.markdown import Markdown +from rich.panel import Panel +from ra_aid.tools.human import ask_human +from ra_aid.tools.shell import run_shell_command +from ra_aid.logging_config import get_logger + +console = Console() +logger = get_logger(__name__) + +@dataclass +class TestState: + """State for test execution.""" + prompt: str + test_attempts: int + auto_test: bool + should_break: bool = False + +def display_test_failure(attempts: int, max_retries: int) -> None: + """Display test failure message. + + Args: + attempts: Current number of attempts + max_retries: Maximum allowed retries + """ + console.print( + Panel( + Markdown(f"Test failed. Attempt number {attempts} of {max_retries}. Retrying and informing of failure output"), + title="🔎 User Defined Test", + border_style="red bold" + ) + ) + +def handle_test_failure(state: TestState, original_prompt: str, test_result: Dict[str, Any]) -> TestState: + """Handle test command failure. + + Args: + state: Current test state + original_prompt: Original prompt text + test_result: Test command result + + Returns: + Updated test state + """ + state.prompt = f"{original_prompt}. Previous attempt failed with: {test_result['output']}" + display_test_failure(state.test_attempts, 5) # Default max retries + state.should_break = False + return state + +def run_test_command(cmd: str, state: TestState, original_prompt: str) -> TestState: + """Run test command and handle result. + + Args: + cmd: Test command to execute + state: Current test state + original_prompt: Original prompt text + + Returns: + Updated test state + """ + try: + test_result = run_shell_command(cmd) + state.test_attempts += 1 + + if not test_result["success"]: + return handle_test_failure(state, original_prompt, test_result) + + state.should_break = True + return state + + except Exception as e: + logger.warning(f"Test command execution failed: {str(e)}") + state.test_attempts += 1 + state.should_break = True + return state + +def handle_user_response(response: str, state: TestState, cmd: str, original_prompt: str) -> TestState: + """Handle user's response to test prompt. + + Args: + response: User's response (y/n/a) + state: Current test state + cmd: Test command + original_prompt: Original prompt text + + Returns: + Updated test state + """ + response = response.strip().lower() + + if response == "n": + state.should_break = True + return state + + if response == "a": + state.auto_test = True + return run_test_command(cmd, state, original_prompt) + + if response == "y": + return run_test_command(cmd, state, original_prompt) + + return state + +def check_max_retries(attempts: int, max_retries: int) -> bool: + """Check if max retries reached. + + Args: + attempts: Current number of attempts + max_retries: Maximum allowed retries + + Returns: + True if max retries reached + """ + if attempts >= max_retries: + logger.warning("Max test retries reached") + return True + return False + +def execute_test_command( + config: Dict[str, Any], + original_prompt: str, + test_attempts: int = 0, + auto_test: bool = False, +) -> Tuple[bool, str, bool, int]: + """Execute a test command and handle retries. + + Args: + config: Configuration dictionary containing test settings + original_prompt: The original prompt to append errors to + test_attempts: Current number of test attempts + auto_test: Whether auto-test mode is enabled + + Returns: + Tuple containing: + - bool: Whether to break the retry loop + - str: Updated prompt + - bool: Updated auto_test flag + - int: Updated test_attempts count + """ + state = TestState( + prompt=original_prompt, + test_attempts=test_attempts, + auto_test=auto_test + ) + + if not config.get("test_cmd"): + state.should_break = True + return state.should_break, state.prompt, state.auto_test, state.test_attempts + + max_retries = config.get("max_test_cmd_retries", 5) + cmd = config["test_cmd"] + + if not auto_test: + print() + response = ask_human.invoke({"question": "Would you like to run the test command? (y=yes, n=no, a=enable auto-test)"}) + state = handle_user_response(response, state, cmd, original_prompt) + else: + if check_max_retries(test_attempts, max_retries): + state.should_break = True + else: + state = run_test_command(cmd, state, original_prompt) + + return state.should_break, state.prompt, state.auto_test, state.test_attempts \ No newline at end of file diff --git a/ra_aid/tools/programmer.py b/ra_aid/tools/programmer.py index 488cdec..438d27f 100644 --- a/ra_aid/tools/programmer.py +++ b/ra_aid/tools/programmer.py @@ -45,6 +45,8 @@ Returns: { "output": stdout+stderr, "return_code": 0 if success, "success": True "--no-auto-commits", "--dark-mode", "--no-suggest-shell-commands", + "--no-show-release-notes", + "--no-check-update", ] # Add config file if specified diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 0000000..862f852 --- /dev/null +++ b/scripts/__init__.py @@ -0,0 +1 @@ +"""Scripts package.""" diff --git a/tests/ra_aid/tools/test_execution.py b/tests/ra_aid/tools/test_execution.py new file mode 100644 index 0000000..316ebcd --- /dev/null +++ b/tests/ra_aid/tools/test_execution.py @@ -0,0 +1,218 @@ +"""Tests for test execution utilities.""" + +import pytest +from unittest.mock import Mock, patch +from ra_aid.tools.handle_user_defined_test_cmd_execution import execute_test_command + +# Test cases for execute_test_command +test_cases = [ + # Format: (name, config, original_prompt, test_attempts, auto_test, + # mock_responses, expected_result) + + # Case 1: No test command configured + ( + "no_test_command", + {"other_config": "value"}, + "original prompt", + 0, + False, + {}, + (True, "original prompt", False, 0) + ), + + # Case 2: User declines to run test + ( + "user_declines_test", + {"test_cmd": "pytest"}, + "original prompt", + 0, + False, + {"ask_human_response": "n"}, + (True, "original prompt", False, 0) + ), + + # Case 3: User enables auto-test + ( + "user_enables_auto_test", + {"test_cmd": "pytest"}, + "original prompt", + 0, + False, + { + "ask_human_response": "a", + "shell_cmd_result": {"success": True, "output": "All tests passed"} + }, + (True, "original prompt", True, 1) + ), + + # Case 4: Auto-test success + ( + "auto_test_success", + {"test_cmd": "pytest"}, + "original prompt", + 0, + True, + {"shell_cmd_result": {"success": True, "output": "All tests passed"}}, + (True, "original prompt", True, 1) + ), + + # Case 5: Auto-test failure with retry + ( + "auto_test_failure_retry", + {"test_cmd": "pytest"}, + "original prompt", + 0, + True, + {"shell_cmd_result": {"success": False, "output": "Test failed"}}, + (False, "original prompt. Previous attempt failed with: Test failed", True, 1) + ), + + # Case 6: Max retries reached + ( + "max_retries_reached", + {"test_cmd": "pytest", "max_test_cmd_retries": 3}, + "original prompt", + 3, + True, + {}, + (True, "original prompt", True, 3) + ), + + # Case 7: User runs test manually + ( + "manual_test_success", + {"test_cmd": "pytest"}, + "original prompt", + 0, + False, + { + "ask_human_response": "y", + "shell_cmd_result": {"success": True, "output": "All tests passed"} + }, + (True, "original prompt", False, 1) + ), + + # Case 8: Manual test failure + ( + "manual_test_failure", + {"test_cmd": "pytest"}, + "original prompt", + 0, + False, + { + "ask_human_response": "y", + "shell_cmd_result": {"success": False, "output": "Test failed"} + }, + (False, "original prompt. Previous attempt failed with: Test failed", False, 1) + ), + + # Case 9: Manual test error + ( + "manual_test_error", + {"test_cmd": "pytest"}, + "original prompt", + 0, + False, + { + "ask_human_response": "y", + "shell_cmd_result_error": Exception("Command failed") + }, + (True, "original prompt", False, 1) + ), + + # Case 10: Auto-test error + ( + "auto_test_error", + {"test_cmd": "pytest"}, + "original prompt", + 0, + True, + { + "shell_cmd_result_error": Exception("Command failed") + }, + (True, "original prompt", True, 1) + ), +] + +@pytest.mark.parametrize( + "name,config,original_prompt,test_attempts,auto_test,mock_responses,expected", + test_cases, + ids=[case[0] for case in test_cases] +) +def test_execute_test_command( + name: str, + config: dict, + original_prompt: str, + test_attempts: int, + auto_test: bool, + mock_responses: dict, + expected: tuple, +) -> None: + """Test execute_test_command with different scenarios. + + Args: + name: Test case name + config: Test configuration + original_prompt: Original prompt text + test_attempts: Number of test attempts + auto_test: Auto-test flag + mock_responses: Mock response data + expected: Expected result tuple + """ + with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.ask_human") as mock_ask_human, \ + patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_shell_command") as mock_run_cmd, \ + patch("ra_aid.tools.handle_user_defined_test_cmd_execution.console") as mock_console, \ + patch("ra_aid.tools.handle_user_defined_test_cmd_execution.logger") as mock_logger: + + # Configure mocks based on mock_responses + if "ask_human_response" in mock_responses: + mock_ask_human.invoke.return_value = mock_responses["ask_human_response"] + + if "shell_cmd_result_error" in mock_responses: + mock_run_cmd.side_effect = mock_responses["shell_cmd_result_error"] + elif "shell_cmd_result" in mock_responses: + mock_run_cmd.return_value = mock_responses["shell_cmd_result"] + + # Execute test command + result = execute_test_command( + config, + original_prompt, + test_attempts, + auto_test + ) + + # Verify result matches expected + assert result == expected, f"Test case '{name}' failed" + + # Verify mock interactions + if config.get("test_cmd") and not auto_test: + mock_ask_human.invoke.assert_called_once() + + if auto_test and test_attempts < config.get("max_test_cmd_retries", 5): + if config.get("test_cmd"): + mock_run_cmd.assert_called_once_with(config["test_cmd"]) + + # Verify logging for max retries + if test_attempts >= config.get("max_test_cmd_retries", 5): + mock_logger.warning.assert_called_once_with("Max test retries reached") + +def test_execute_test_command_error_handling() -> None: + """Test error handling in execute_test_command.""" + config = {"test_cmd": "pytest"} + + with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_shell_command") as mock_run_cmd, \ + patch("ra_aid.tools.handle_user_defined_test_cmd_execution.logger") as mock_logger: + + # Simulate run_shell_command raising an exception + mock_run_cmd.side_effect = Exception("Command failed") + + result = execute_test_command( + config, + "original prompt", + 0, + True + ) + + # Should handle error and continue + assert result == (True, "original prompt", True, 1) + mock_logger.warning.assert_called_once() \ No newline at end of file diff --git a/tests/ra_aid/tools/test_handle_user_defined_test_cmd_execution.py b/tests/ra_aid/tools/test_handle_user_defined_test_cmd_execution.py new file mode 100644 index 0000000..4f436b5 --- /dev/null +++ b/tests/ra_aid/tools/test_handle_user_defined_test_cmd_execution.py @@ -0,0 +1,109 @@ +"""Tests for user-defined test command execution utilities.""" + +import pytest +from unittest.mock import patch, Mock +from ra_aid.tools.handle_user_defined_test_cmd_execution import ( + TestState, + execute_test_command, + handle_test_failure, + run_test_command, + handle_user_response, + check_max_retries +) + +@pytest.fixture +def test_state(): + """Create a test state fixture.""" + return TestState( + prompt="test prompt", + test_attempts=0, + auto_test=False + ) + +def test_check_max_retries(): + """Test max retries check.""" + assert not check_max_retries(2, 3) + assert check_max_retries(3, 3) + assert check_max_retries(4, 3) + +def test_handle_test_failure(test_state): + """Test handling of test failures.""" + test_result = {"output": "error message"} + with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.display_test_failure"): + state = handle_test_failure(test_state, "original", test_result) + assert not state.should_break + assert "error message" in state.prompt + +def test_run_test_command_success(test_state): + """Test successful test command execution.""" + with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_shell_command") as mock_run: + mock_run.return_value = {"success": True, "output": ""} + state = run_test_command("test", test_state, "original") + assert state.should_break + assert state.test_attempts == 1 + +def test_run_test_command_failure(test_state): + """Test failed test command execution.""" + with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_shell_command") as mock_run: + mock_run.return_value = {"success": False, "output": "error"} + state = run_test_command("test", test_state, "original") + assert not state.should_break + assert state.test_attempts == 1 + assert "error" in state.prompt + +def test_run_test_command_error(test_state): + """Test test command execution error.""" + with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_shell_command") as mock_run: + mock_run.side_effect = Exception("Command failed") + state = run_test_command("test", test_state, "original") + assert state.should_break + assert state.test_attempts == 1 + +def test_handle_user_response_no(test_state): + """Test handling of 'no' response.""" + state = handle_user_response("n", test_state, "test", "original") + assert state.should_break + assert not state.auto_test + +def test_handle_user_response_auto(test_state): + """Test handling of 'auto' response.""" + with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_test_command") as mock_run: + mock_state = TestState("prompt", 1, True, True) + mock_run.return_value = mock_state + state = handle_user_response("a", test_state, "test", "original") + assert state.auto_test + mock_run.assert_called_once() + +def test_handle_user_response_yes(test_state): + """Test handling of 'yes' response.""" + with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_test_command") as mock_run: + mock_state = TestState("prompt", 1, False, True) + mock_run.return_value = mock_state + state = handle_user_response("y", test_state, "test", "original") + assert not state.auto_test + mock_run.assert_called_once() + +def test_execute_test_command_no_cmd(): + """Test execution with no test command.""" + result = execute_test_command({}, "prompt") + assert result == (True, "prompt", False, 0) + +def test_execute_test_command_manual(): + """Test manual test execution.""" + config = {"test_cmd": "test"} + with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.ask_human") as mock_ask, \ + patch("ra_aid.tools.handle_user_defined_test_cmd_execution.handle_user_response") as mock_handle: + mock_ask.invoke.return_value = "y" + mock_state = TestState("new prompt", 1, False, True) + mock_handle.return_value = mock_state + result = execute_test_command(config, "prompt") + assert result == (True, "new prompt", False, 1) + +def test_execute_test_command_auto(): + """Test auto test execution.""" + config = {"test_cmd": "test", "max_test_cmd_retries": 3} + with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_test_command") as mock_run: + mock_state = TestState("new prompt", 1, True, True) + mock_run.return_value = mock_state + result = execute_test_command(config, "prompt", auto_test=True) + assert result == (True, "new prompt", True, 1) \ No newline at end of file