feat(tests): add --test-cmd-timeout argument to README and implement timeout for test command execution

fix(config): define DEFAULT_TEST_CMD_TIMEOUT for consistent test command timeout handling fix(main.py): remove unused AgentInterrupt import and update test command timeout handling fix(programmer.py): adjust interactive command execution to use model-specific latency fix(handle_user_defined_test_cmd_execution.py): update timeout handling for test command execution test(tests): update tests to verify timeout behavior for test command execution
2025-02-17 15:38:20 -08:00 · 2025-02-17 15:38:20 -08:00 · 5a4710b3be
parent becf8a1fd6 e6ba8f5dff
commit 5a4710b3be
8 changed files with 39 additions and 14 deletions
--- a/README.md
+++ b/README.md
@ -183,6 +183,7 @@ More information is available in our [Usage Examples](https://docs.ra-aid.ai/cat
 - `--test-cmd`: Custom command to run tests. If set user will be asked if they want to run the test command
 - `--auto-test`: Automatically run tests after each code change
 - `--max-test-cmd-retries`: Maximum number of test command retry attempts (default: 3)
 - `--test-cmd-timeout`: Timeout in seconds for test command execution (default: 300)
 - `--version`: Show program version number and exit
 - `--webui`: Launch the web interface (alpha feature)
 - `--webui-host`: Host to listen on for web interface (default: 0.0.0.0)  (alpha feature)
--- a/ra_aid/main.py
+++ b/ra_aid/main.py
@ -12,7 +12,6 @@ from rich.text import Text
 from ra_aid import print_error, print_stage_header
 from ra_aid.__version__ import __version__
 from ra_aid.agent_utils import (
    AgentInterrupt,
    create_agent,
    run_agent_with_retry,
    run_planning_agent,
@ -21,11 +20,13 @@ from ra_aid.agent_utils import (
 from ra_aid.config import (
    DEFAULT_MAX_TEST_CMD_RETRIES,
    DEFAULT_RECURSION_LIMIT,
    DEFAULT_TEST_CMD_TIMEOUT,
    VALID_PROVIDERS,
 )
 from ra_aid.console.output import cpm
 from ra_aid.dependencies import check_dependencies
 from ra_aid.env import validate_environment
 from ra_aid.exceptions import AgentInterrupt
 from ra_aid.llm import initialize_llm
 from ra_aid.logging_config import get_logger, setup_logging
 from ra_aid.models_params import DEFAULT_TEMPERATURE, models_params
@ -181,7 +182,13 @@ Examples:
        "--max-test-cmd-retries",
        type=int,
        default=DEFAULT_MAX_TEST_CMD_RETRIES,
-        help="Maximum number of retries for the test command (default: 10)",
+        help="Maximum number of retries for the test command (default: 3)",
    )
    parser.add_argument(
        "--test-cmd-timeout",
        type=int,
        default=DEFAULT_TEST_CMD_TIMEOUT,
        help=f"Timeout in seconds for test command execution (default: {DEFAULT_TEST_CMD_TIMEOUT})",
    )
    parser.add_argument(
        "--webui",
@ -441,6 +448,7 @@ def main():
            "test_cmd": args.test_cmd,
            "max_test_cmd_retries": args.max_test_cmd_retries,
            "experimental_fallback_handler": args.experimental_fallback_handler,
            "test_cmd_timeout": args.test_cmd_timeout,
        }
        # Store config in global memory for access by is_informational_query
--- a/ra_aid/config.py
+++ b/ra_aid/config.py
@ -5,6 +5,7 @@ DEFAULT_MAX_TEST_CMD_RETRIES = 3
 DEFAULT_MAX_TOOL_FAILURES = 3
 FALLBACK_TOOL_MODEL_LIMIT = 5
 RETRY_FALLBACK_COUNT = 3
 DEFAULT_TEST_CMD_TIMEOUT = 60 * 5  # 5 minutes in seconds
 VALID_PROVIDERS = [
--- a/ra_aid/models_params.py
+++ b/ra_aid/models_params.py
@ -4,7 +4,7 @@ List of model parameters
 DEFAULT_TOKEN_LIMIT = 100000
 DEFAULT_TEMPERATURE = 0.7
-DEFAULT_BASE_LATENCY = 180
+DEFAULT_BASE_LATENCY = 240
 models_params = {
    "openai": {
--- a/ra_aid/tools/handle_user_defined_test_cmd_execution.py
+++ b/ra_aid/tools/handle_user_defined_test_cmd_execution.py
@ -8,6 +8,7 @@ from rich.console import Console
 from rich.markdown import Markdown
 from rich.panel import Panel
 from ra_aid.config import DEFAULT_TEST_CMD_TIMEOUT
 from ra_aid.logging_config import get_logger
 from ra_aid.tools.human import ask_human
 from ra_aid.tools.shell import run_shell_command
@ -85,7 +86,7 @@ class TestCommandExecutor:
            cmd: Test command to execute
            original_prompt: Original prompt text
        """
-        timeout = self.config.get("timeout", 30)
+        timeout = self.config.get("test_cmd_timeout", DEFAULT_TEST_CMD_TIMEOUT)
        try:
            logger.info(f"Executing test command: {cmd} with timeout {timeout}s")
            test_result = run_shell_command(cmd, timeout=timeout)
@ -99,11 +100,11 @@ class TestCommandExecutor:
            logger.info("Test command executed successfully")
        except subprocess.TimeoutExpired:
-            logger.warning(f"Test command timed out after {timeout}s: {cmd}")
+            logger.warning(
-            self.state.test_attempts += 1
+                f"Test command timed out after {DEFAULT_TEST_CMD_TIMEOUT}s: {cmd}"
            self.state.prompt = (
                f"{original_prompt}. Previous attempt timed out after {timeout} seconds"
            )
            self.state.test_attempts += 1
            self.state.prompt = f"{original_prompt}. Previous attempt timed out after {DEFAULT_TEST_CMD_TIMEOUT} seconds"
            self.display_test_failure()
        except subprocess.CalledProcessError as e:
--- a/ra_aid/tools/programmer.py
+++ b/ra_aid/tools/programmer.py
@ -10,6 +10,7 @@ from rich.panel import Panel
 from rich.text import Text
 from ra_aid.logging_config import get_logger
 from ra_aid.models_params import DEFAULT_BASE_LATENCY, models_params
 from ra_aid.proc.interactive import run_interactive_command
 from ra_aid.text.processing import truncate_output
 from ra_aid.tools.memory import _global_memory, log_work_event
@ -138,7 +139,16 @@ def run_programming_task(
    try:
        # Run the command interactively
        print()
-        result = run_interactive_command(command)
+        # Get provider/model specific latency coefficient
        provider = _global_memory.get("config", {}).get("provider", "")
        model = _global_memory.get("config", {}).get("model", "")
        latency = (
            models_params.get(provider, {})
            .get(model, {})
            .get("latency_coefficient", DEFAULT_BASE_LATENCY)
        )
        result = run_interactive_command(command, expected_runtime_seconds=latency)
        print()
        # Log the programming task
--- a/tests/ra_aid/tools/test_execution.py
+++ b/tests/ra_aid/tools/test_execution.py
@ -4,6 +4,7 @@ from unittest.mock import patch
 import pytest
 from ra_aid.config import DEFAULT_TEST_CMD_TIMEOUT
 from ra_aid.tools.handle_user_defined_test_cmd_execution import execute_test_command
 # Test cases for execute_test_command
@ -195,9 +196,10 @@ def test_execute_test_command(
        if auto_test and test_attempts < config.get("max_test_cmd_retries", 5):
            if config.get("test_cmd"):
-                # Verify run_shell_command called with command and default timeout
+                # Verify run_shell_command called with command and configured timeout
                mock_run_cmd.assert_called_once_with(
-                    config["test_cmd"], timeout=config.get("timeout", 30)
+                    config["test_cmd"],
                    timeout=config.get("test_cmd_timeout", DEFAULT_TEST_CMD_TIMEOUT),
                )
        # Verify logging for max retries
--- a/tests/ra_aid/tools/test_handle_user_defined_test_cmd_execution.py
+++ b/tests/ra_aid/tools/test_handle_user_defined_test_cmd_execution.py
@ -5,6 +5,7 @@ from unittest.mock import Mock, patch
 import pytest
 from ra_aid.config import DEFAULT_TEST_CMD_TIMEOUT
 from ra_aid.tools.handle_user_defined_test_cmd_execution import (
    TestCommandExecutor,
    TestState,
@ -92,8 +93,9 @@ def test_run_test_command_timeout(test_executor):
            "ra_aid.tools.handle_user_defined_test_cmd_execution.logger.warning"
        ) as mock_logger,
    ):
-        # Create a TimeoutExpired exception
+        # Create a TimeoutExpired exception with configured timeout
-        timeout_exc = subprocess.TimeoutExpired(cmd="test", timeout=30)
+        timeout = test_executor.config.get("test_cmd_timeout", DEFAULT_TEST_CMD_TIMEOUT)
        timeout_exc = subprocess.TimeoutExpired(cmd="test", timeout=timeout)
        mock_run.side_effect = timeout_exc
        test_executor.run_test_command("test", "original")
@ -101,7 +103,7 @@ def test_run_test_command_timeout(test_executor):
        # Verify state updates
        assert not test_executor.state.should_break
        assert test_executor.state.test_attempts == 1
-        assert "timed out after 30 seconds" in test_executor.state.prompt
+        assert f"timed out after {timeout} seconds" in test_executor.state.prompt
        # Verify logging
        mock_logger.assert_called_once()