wip

2025-01-25 03:02:07 +00:00 · 2025-01-25 03:02:07 +00:00 · 3896236fa1
parent 9533ff1957
commit 3896236fa1
11 changed files with 548 additions and 4 deletions
--- a/.python-version
+++ b/.python-version
@ -0,0 +1 @@
+3.12.7
--- a/README.md
+++ b/README.md
@ -179,6 +179,9 @@ ra-aid -m "Add new feature" --verbose
 - `--temperature`: LLM temperature (0.0-2.0) to control randomness in responses
 - `--disable-limit-tokens`: Disable token limiting for Anthropic Claude react agents
 - `--recursion-limit`: Maximum recursion depth for agent operations (default: 100)
+- `--test-cmd`: Custom command to run tests. If set user will be asked if they want to run the test command
+- `--auto-test`: Automatically run tests after each code change
+- `--max-test-cmd-retries`: Maximum number of test command retry attempts (default: 3)
 - `--version`: Show program version number and exit

 ### Example Tasks
--- a/ra_aid/main.py
+++ b/ra_aid/main.py
@ -5,7 +5,7 @@ from datetime import datetime
 from rich.panel import Panel
 from rich.console import Console
 from langgraph.checkpoint.memory import MemorySaver
-from ra_aid.config import DEFAULT_RECURSION_LIMIT
+from ra_aid.config import DEFAULT_MAX_TEST_CMD_RETRIES, DEFAULT_RECURSION_LIMIT
 from ra_aid.env import validate_environment
 from ra_aid.project_info import get_project_info, format_project_info
 from ra_aid.tools.memory import _global_memory
@ -148,7 +148,22 @@ Examples:
    parser.add_argument(
        "--aider-config", type=str, help="Specify the aider config file path"
    )
-
+    parser.add_argument(
+        "--test-cmd",
+        type=str,
+        help="Test command to run before completing tasks (e.g. 'pytest tests/')"
+    )
+    parser.add_argument(
+        "--auto-test",
+        action="store_true",
+        help="Automatically run tests before completing tasks"
+    )
+    parser.add_argument(
+        "--max-test-cmd-retries",
+        type=int,
+        default=DEFAULT_MAX_TEST_CMD_RETRIES,
+        help="Maximum number of retries for the test command (default: 10)",
+    )
    if args is None:
        args = sys.argv[1:]
    parsed_args = parser.parse_args(args)
@ -192,6 +207,10 @@ Examples:
    # Validate recursion limit is positive
    if parsed_args.recursion_limit <= 0:
        parser.error("Recursion limit must be positive")
+        
+    # if auto-test command is provided, validate test-cmd is also provided
+    if parsed_args.auto_test and not parsed_args.test_cmd:
+        parser.error("Test command is required when using --auto-test")

    return parsed_args

@ -344,6 +363,9 @@ def main():
            "web_research_enabled": web_research_enabled,
            "aider_config": args.aider_config,
            "limit_tokens": args.disable_limit_tokens,
+            "auto_test": args.auto_test,
+            "test_cmd": args.test_cmd,
+            "max_test_cmd_retries": args.max_test_cmd_retries,
        }

        # Store config in global memory for access by is_informational_query
--- a/ra_aid/agent_utils.py
+++ b/ra_aid/agent_utils.py
@ -12,7 +12,7 @@ import signal

 from langgraph.checkpoint.memory import MemorySaver
 from langgraph.prebuilt.chat_agent_executor import AgentState
-from ra_aid.config import DEFAULT_RECURSION_LIMIT
+from ra_aid.config import DEFAULT_MAX_TEST_CMD_RETRIES, DEFAULT_RECURSION_LIMIT
 from ra_aid.models_tokens import DEFAULT_TOKEN_LIMIT, models_tokens
 from ra_aid.agents.ciayn_agent import CiaynAgent
 import threading
@ -55,6 +55,8 @@ from ra_aid.prompts import (

 from langchain_core.messages import HumanMessage
 from anthropic import APIError, APITimeoutError, RateLimitError, InternalServerError
+from ra_aid.tools.human import ask_human
+from ra_aid.tools.shell import run_shell_command
 from rich.console import Console
 from rich.markdown import Markdown
 from rich.panel import Panel
@ -64,6 +66,7 @@ from ra_aid.tools.memory import (
    get_memory_value,
    get_related_files,
 )
+from ra_aid.tools.handle_user_defined_test_cmd_execution import execute_test_command


 console = Console()
@ -719,6 +722,10 @@ def run_agent_with_retry(agent, prompt: str, config: dict) -> Optional[str]:

    max_retries = 20
    base_delay = 1
+    test_attempts = 0
+    max_test_retries = config.get("max_test_cmd_retries", DEFAULT_MAX_TEST_CMD_RETRIES)
+    auto_test = config.get("auto_test", False)
+    original_prompt = prompt

    with InterruptibleSection():
        try:
@ -745,6 +752,19 @@ def run_agent_with_retry(agent, prompt: str, config: dict) -> Optional[str]:
                            _global_memory["task_completed"] = False
                            _global_memory["completion_message"] = ""
                            break
+                    
+                    # Execute test command if configured
+                    should_break, prompt, auto_test, test_attempts = execute_test_command(
+                        config,
+                        original_prompt,
+                        test_attempts,
+                        auto_test
+                    )
+                    if should_break:
+                        break
+                    if prompt != original_prompt:
+                        continue
+
                    logger.debug("Agent run completed successfully")
                    return "Agent run completed successfully"
                except (KeyboardInterrupt, AgentInterrupt):
--- a/ra_aid/config.py
+++ b/ra_aid/config.py
@ -1,3 +1,4 @@
 """Configuration utilities."""

 DEFAULT_RECURSION_LIMIT = 100
+DEFAULT_MAX_TEST_CMD_RETRIES = 3
--- a/ra_aid/llm.py
+++ b/ra_aid/llm.py
@ -197,4 +197,4 @@ def initialize_expert_llm(
    provider: str = "openai", model_name: str = "o1"
 ) -> BaseChatModel:
    """Initialize an expert language model client based on the specified provider and model."""
-    return create_llm_client(provider, model_name, temperature=None, is_expert=True)
+    return create_llm_client(provider, model_name, temperature=None, is_expert=True)
--- a/ra_aid/tools/handle_user_defined_test_cmd_execution.py
+++ b/ra_aid/tools/handle_user_defined_test_cmd_execution.py
@ -0,0 +1,167 @@
+"""Utilities for executing and managing user-defined test commands."""
+
+from typing import Dict, Any, Tuple, Optional
+from dataclasses import dataclass
+from rich.console import Console
+from rich.markdown import Markdown
+from rich.panel import Panel
+from ra_aid.tools.human import ask_human
+from ra_aid.tools.shell import run_shell_command
+from ra_aid.logging_config import get_logger
+
+console = Console()
+logger = get_logger(__name__)
+
+@dataclass
+class TestState:
+    """State for test execution."""
+    prompt: str
+    test_attempts: int
+    auto_test: bool
+    should_break: bool = False
+
+def display_test_failure(attempts: int, max_retries: int) -> None:
+    """Display test failure message.
+    
+    Args:
+        attempts: Current number of attempts
+        max_retries: Maximum allowed retries
+    """
+    console.print(
+        Panel(
+            Markdown(f"Test failed. Attempt number {attempts} of {max_retries}. Retrying and informing of failure output"),
+            title="🔎 User Defined Test",
+            border_style="red bold"
+        )
+    )
+
+def handle_test_failure(state: TestState, original_prompt: str, test_result: Dict[str, Any]) -> TestState:
+    """Handle test command failure.
+    
+    Args:
+        state: Current test state
+        original_prompt: Original prompt text
+        test_result: Test command result
+        
+    Returns:
+        Updated test state
+    """
+    state.prompt = f"{original_prompt}. Previous attempt failed with: <test_cmd_stdout>{test_result['output']}</test_cmd_stdout>"
+    display_test_failure(state.test_attempts, 5)  # Default max retries
+    state.should_break = False
+    return state
+
+def run_test_command(cmd: str, state: TestState, original_prompt: str) -> TestState:
+    """Run test command and handle result.
+    
+    Args:
+        cmd: Test command to execute
+        state: Current test state
+        original_prompt: Original prompt text
+        
+    Returns:
+        Updated test state
+    """
+    try:
+        test_result = run_shell_command(cmd)
+        state.test_attempts += 1
+        
+        if not test_result["success"]:
+            return handle_test_failure(state, original_prompt, test_result)
+            
+        state.should_break = True
+        return state
+        
+    except Exception as e:
+        logger.warning(f"Test command execution failed: {str(e)}")
+        state.test_attempts += 1
+        state.should_break = True
+        return state
+
+def handle_user_response(response: str, state: TestState, cmd: str, original_prompt: str) -> TestState:
+    """Handle user's response to test prompt.
+    
+    Args:
+        response: User's response (y/n/a)
+        state: Current test state
+        cmd: Test command
+        original_prompt: Original prompt text
+        
+    Returns:
+        Updated test state
+    """
+    response = response.strip().lower()
+    
+    if response == "n":
+        state.should_break = True
+        return state
+        
+    if response == "a":
+        state.auto_test = True
+        return run_test_command(cmd, state, original_prompt)
+        
+    if response == "y":
+        return run_test_command(cmd, state, original_prompt)
+        
+    return state
+
+def check_max_retries(attempts: int, max_retries: int) -> bool:
+    """Check if max retries reached.
+    
+    Args:
+        attempts: Current number of attempts
+        max_retries: Maximum allowed retries
+        
+    Returns:
+        True if max retries reached
+    """
+    if attempts >= max_retries:
+        logger.warning("Max test retries reached")
+        return True
+    return False
+
+def execute_test_command(
+    config: Dict[str, Any],
+    original_prompt: str,
+    test_attempts: int = 0,
+    auto_test: bool = False,
+) -> Tuple[bool, str, bool, int]:
+    """Execute a test command and handle retries.
+
+    Args:
+        config: Configuration dictionary containing test settings
+        original_prompt: The original prompt to append errors to
+        test_attempts: Current number of test attempts
+        auto_test: Whether auto-test mode is enabled
+
+    Returns:
+        Tuple containing:
+        - bool: Whether to break the retry loop
+        - str: Updated prompt
+        - bool: Updated auto_test flag
+        - int: Updated test_attempts count
+    """
+    state = TestState(
+        prompt=original_prompt,
+        test_attempts=test_attempts,
+        auto_test=auto_test
+    )
+
+    if not config.get("test_cmd"):
+        state.should_break = True
+        return state.should_break, state.prompt, state.auto_test, state.test_attempts
+
+    max_retries = config.get("max_test_cmd_retries", 5)
+    cmd = config["test_cmd"]
+
+    if not auto_test:
+        print()
+        response = ask_human.invoke({"question": "Would you like to run the test command? (y=yes, n=no, a=enable auto-test)"})
+        state = handle_user_response(response, state, cmd, original_prompt)
+    else:
+        if check_max_retries(test_attempts, max_retries):
+            state.should_break = True
+        else:
+            state = run_test_command(cmd, state, original_prompt)
+
+    return state.should_break, state.prompt, state.auto_test, state.test_attempts
--- a/ra_aid/tools/programmer.py
+++ b/ra_aid/tools/programmer.py
@ -45,6 +45,8 @@ Returns: { "output": stdout+stderr, "return_code": 0 if success, "success": True
        "--no-auto-commits",
        "--dark-mode",
        "--no-suggest-shell-commands",
+        "--no-show-release-notes",
+        "--no-check-update",
    ]

    # Add config file if specified
--- a/scripts/init.py
+++ b/scripts/init.py
@ -0,0 +1 @@
+"""Scripts package."""
--- a/tests/ra_aid/tools/test_execution.py
+++ b/tests/ra_aid/tools/test_execution.py
@ -0,0 +1,218 @@
+"""Tests for test execution utilities."""
+
+import pytest
+from unittest.mock import Mock, patch
+from ra_aid.tools.handle_user_defined_test_cmd_execution import execute_test_command
+
+# Test cases for execute_test_command
+test_cases = [
+    # Format: (name, config, original_prompt, test_attempts, auto_test,
+    #          mock_responses, expected_result)
+    
+    # Case 1: No test command configured
+    (
+        "no_test_command",
+        {"other_config": "value"},
+        "original prompt",
+        0,
+        False,
+        {},
+        (True, "original prompt", False, 0)
+    ),
+    
+    # Case 2: User declines to run test
+    (
+        "user_declines_test",
+        {"test_cmd": "pytest"},
+        "original prompt",
+        0,
+        False,
+        {"ask_human_response": "n"},
+        (True, "original prompt", False, 0)
+    ),
+    
+    # Case 3: User enables auto-test
+    (
+        "user_enables_auto_test",
+        {"test_cmd": "pytest"},
+        "original prompt",
+        0,
+        False,
+        {
+            "ask_human_response": "a",
+            "shell_cmd_result": {"success": True, "output": "All tests passed"}
+        },
+        (True, "original prompt", True, 1)
+    ),
+    
+    # Case 4: Auto-test success
+    (
+        "auto_test_success",
+        {"test_cmd": "pytest"},
+        "original prompt",
+        0,
+        True,
+        {"shell_cmd_result": {"success": True, "output": "All tests passed"}},
+        (True, "original prompt", True, 1)
+    ),
+    
+    # Case 5: Auto-test failure with retry
+    (
+        "auto_test_failure_retry",
+        {"test_cmd": "pytest"},
+        "original prompt",
+        0,
+        True,
+        {"shell_cmd_result": {"success": False, "output": "Test failed"}},
+        (False, "original prompt. Previous attempt failed with: <test_cmd_stdout>Test failed</test_cmd_stdout>", True, 1)
+    ),
+    
+    # Case 6: Max retries reached
+    (
+        "max_retries_reached",
+        {"test_cmd": "pytest", "max_test_cmd_retries": 3},
+        "original prompt",
+        3,
+        True,
+        {},
+        (True, "original prompt", True, 3)
+    ),
+    
+    # Case 7: User runs test manually
+    (
+        "manual_test_success",
+        {"test_cmd": "pytest"},
+        "original prompt",
+        0,
+        False,
+        {
+            "ask_human_response": "y",
+            "shell_cmd_result": {"success": True, "output": "All tests passed"}
+        },
+        (True, "original prompt", False, 1)
+    ),
+    
+    # Case 8: Manual test failure
+    (
+        "manual_test_failure",
+        {"test_cmd": "pytest"},
+        "original prompt",
+        0,
+        False,
+        {
+            "ask_human_response": "y",
+            "shell_cmd_result": {"success": False, "output": "Test failed"}
+        },
+        (False, "original prompt. Previous attempt failed with: <test_cmd_stdout>Test failed</test_cmd_stdout>", False, 1)
+    ),
+    
+    # Case 9: Manual test error
+    (
+        "manual_test_error",
+        {"test_cmd": "pytest"},
+        "original prompt",
+        0,
+        False,
+        {
+            "ask_human_response": "y",
+            "shell_cmd_result_error": Exception("Command failed")
+        },
+        (True, "original prompt", False, 1)
+    ),
+    
+    # Case 10: Auto-test error
+    (
+        "auto_test_error",
+        {"test_cmd": "pytest"},
+        "original prompt",
+        0,
+        True,
+        {
+            "shell_cmd_result_error": Exception("Command failed")
+        },
+        (True, "original prompt", True, 1)
+    ),
+]
+
+@pytest.mark.parametrize(
+    "name,config,original_prompt,test_attempts,auto_test,mock_responses,expected",
+    test_cases,
+    ids=[case[0] for case in test_cases]
+)
+def test_execute_test_command(
+    name: str,
+    config: dict,
+    original_prompt: str,
+    test_attempts: int,
+    auto_test: bool,
+    mock_responses: dict,
+    expected: tuple,
+) -> None:
+    """Test execute_test_command with different scenarios.
+    
+    Args:
+        name: Test case name
+        config: Test configuration
+        original_prompt: Original prompt text
+        test_attempts: Number of test attempts
+        auto_test: Auto-test flag
+        mock_responses: Mock response data
+        expected: Expected result tuple
+    """
+    with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.ask_human") as mock_ask_human, \
+         patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_shell_command") as mock_run_cmd, \
+         patch("ra_aid.tools.handle_user_defined_test_cmd_execution.console") as mock_console, \
+         patch("ra_aid.tools.handle_user_defined_test_cmd_execution.logger") as mock_logger:
+        
+        # Configure mocks based on mock_responses
+        if "ask_human_response" in mock_responses:
+            mock_ask_human.invoke.return_value = mock_responses["ask_human_response"]
+        
+        if "shell_cmd_result_error" in mock_responses:
+            mock_run_cmd.side_effect = mock_responses["shell_cmd_result_error"]
+        elif "shell_cmd_result" in mock_responses:
+            mock_run_cmd.return_value = mock_responses["shell_cmd_result"]
+        
+        # Execute test command
+        result = execute_test_command(
+            config,
+            original_prompt,
+            test_attempts,
+            auto_test
+        )
+        
+        # Verify result matches expected
+        assert result == expected, f"Test case '{name}' failed"
+        
+        # Verify mock interactions
+        if config.get("test_cmd") and not auto_test:
+            mock_ask_human.invoke.assert_called_once()
+        
+        if auto_test and test_attempts < config.get("max_test_cmd_retries", 5):
+            if config.get("test_cmd"):
+                mock_run_cmd.assert_called_once_with(config["test_cmd"])
+        
+        # Verify logging for max retries
+        if test_attempts >= config.get("max_test_cmd_retries", 5):
+            mock_logger.warning.assert_called_once_with("Max test retries reached")
+
+def test_execute_test_command_error_handling() -> None:
+    """Test error handling in execute_test_command."""
+    config = {"test_cmd": "pytest"}
+    
+    with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_shell_command") as mock_run_cmd, \
+         patch("ra_aid.tools.handle_user_defined_test_cmd_execution.logger") as mock_logger:
+        
+        # Simulate run_shell_command raising an exception
+        mock_run_cmd.side_effect = Exception("Command failed")
+        
+        result = execute_test_command(
+            config,
+            "original prompt",
+            0,
+            True
+        )
+        
+        # Should handle error and continue
+        assert result == (True, "original prompt", True, 1)
+        mock_logger.warning.assert_called_once()
--- a/tests/ra_aid/tools/test_handle_user_defined_test_cmd_execution.py
+++ b/tests/ra_aid/tools/test_handle_user_defined_test_cmd_execution.py
@ -0,0 +1,109 @@
+"""Tests for user-defined test command execution utilities."""
+
+import pytest
+from unittest.mock import patch, Mock
+from ra_aid.tools.handle_user_defined_test_cmd_execution import (
+    TestState,
+    execute_test_command,
+    handle_test_failure,
+    run_test_command,
+    handle_user_response,
+    check_max_retries
+)
+
+@pytest.fixture
+def test_state():
+    """Create a test state fixture."""
+    return TestState(
+        prompt="test prompt",
+        test_attempts=0,
+        auto_test=False
+    )
+
+def test_check_max_retries():
+    """Test max retries check."""
+    assert not check_max_retries(2, 3)
+    assert check_max_retries(3, 3)
+    assert check_max_retries(4, 3)
+
+def test_handle_test_failure(test_state):
+    """Test handling of test failures."""
+    test_result = {"output": "error message"}
+    with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.display_test_failure"):
+        state = handle_test_failure(test_state, "original", test_result)
+        assert not state.should_break
+        assert "error message" in state.prompt
+
+def test_run_test_command_success(test_state):
+    """Test successful test command execution."""
+    with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_shell_command") as mock_run:
+        mock_run.return_value = {"success": True, "output": ""}
+        state = run_test_command("test", test_state, "original")
+        assert state.should_break
+        assert state.test_attempts == 1
+
+def test_run_test_command_failure(test_state):
+    """Test failed test command execution."""
+    with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_shell_command") as mock_run:
+        mock_run.return_value = {"success": False, "output": "error"}
+        state = run_test_command("test", test_state, "original")
+        assert not state.should_break
+        assert state.test_attempts == 1
+        assert "error" in state.prompt
+
+def test_run_test_command_error(test_state):
+    """Test test command execution error."""
+    with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_shell_command") as mock_run:
+        mock_run.side_effect = Exception("Command failed")
+        state = run_test_command("test", test_state, "original")
+        assert state.should_break
+        assert state.test_attempts == 1
+
+def test_handle_user_response_no(test_state):
+    """Test handling of 'no' response."""
+    state = handle_user_response("n", test_state, "test", "original")
+    assert state.should_break
+    assert not state.auto_test
+
+def test_handle_user_response_auto(test_state):
+    """Test handling of 'auto' response."""
+    with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_test_command") as mock_run:
+        mock_state = TestState("prompt", 1, True, True)
+        mock_run.return_value = mock_state
+        state = handle_user_response("a", test_state, "test", "original")
+        assert state.auto_test
+        mock_run.assert_called_once()
+
+def test_handle_user_response_yes(test_state):
+    """Test handling of 'yes' response."""
+    with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_test_command") as mock_run:
+        mock_state = TestState("prompt", 1, False, True)
+        mock_run.return_value = mock_state
+        state = handle_user_response("y", test_state, "test", "original")
+        assert not state.auto_test
+        mock_run.assert_called_once()
+
+def test_execute_test_command_no_cmd():
+    """Test execution with no test command."""
+    result = execute_test_command({}, "prompt")
+    assert result == (True, "prompt", False, 0)
+
+def test_execute_test_command_manual():
+    """Test manual test execution."""
+    config = {"test_cmd": "test"}
+    with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.ask_human") as mock_ask, \
+         patch("ra_aid.tools.handle_user_defined_test_cmd_execution.handle_user_response") as mock_handle:
+        mock_ask.invoke.return_value = "y"
+        mock_state = TestState("new prompt", 1, False, True)
+        mock_handle.return_value = mock_state
+        result = execute_test_command(config, "prompt")
+        assert result == (True, "new prompt", False, 1)
+
+def test_execute_test_command_auto():
+    """Test auto test execution."""
+    config = {"test_cmd": "test", "max_test_cmd_retries": 3}
+    with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_test_command") as mock_run:
+        mock_state = TestState("new prompt", 1, True, True)
+        mock_run.return_value = mock_state
+        result = execute_test_command(config, "prompt", auto_test=True)
+        assert result == (True, "new prompt", True, 1)