feat(tests): add --test-cmd-timeout argument to README and implement timeout for test command execution
fix(config): define DEFAULT_TEST_CMD_TIMEOUT for consistent test command timeout handling fix(main.py): remove unused AgentInterrupt import and update test command timeout handling fix(programmer.py): adjust interactive command execution to use model-specific latency fix(handle_user_defined_test_cmd_execution.py): update timeout handling for test command execution test(tests): update tests to verify timeout behavior for test command execution
This commit is contained in:
commit
5a4710b3be
|
|
@ -183,6 +183,7 @@ More information is available in our [Usage Examples](https://docs.ra-aid.ai/cat
|
||||||
- `--test-cmd`: Custom command to run tests. If set user will be asked if they want to run the test command
|
- `--test-cmd`: Custom command to run tests. If set user will be asked if they want to run the test command
|
||||||
- `--auto-test`: Automatically run tests after each code change
|
- `--auto-test`: Automatically run tests after each code change
|
||||||
- `--max-test-cmd-retries`: Maximum number of test command retry attempts (default: 3)
|
- `--max-test-cmd-retries`: Maximum number of test command retry attempts (default: 3)
|
||||||
|
- `--test-cmd-timeout`: Timeout in seconds for test command execution (default: 300)
|
||||||
- `--version`: Show program version number and exit
|
- `--version`: Show program version number and exit
|
||||||
- `--webui`: Launch the web interface (alpha feature)
|
- `--webui`: Launch the web interface (alpha feature)
|
||||||
- `--webui-host`: Host to listen on for web interface (default: 0.0.0.0) (alpha feature)
|
- `--webui-host`: Host to listen on for web interface (default: 0.0.0.0) (alpha feature)
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,6 @@ from rich.text import Text
|
||||||
from ra_aid import print_error, print_stage_header
|
from ra_aid import print_error, print_stage_header
|
||||||
from ra_aid.__version__ import __version__
|
from ra_aid.__version__ import __version__
|
||||||
from ra_aid.agent_utils import (
|
from ra_aid.agent_utils import (
|
||||||
AgentInterrupt,
|
|
||||||
create_agent,
|
create_agent,
|
||||||
run_agent_with_retry,
|
run_agent_with_retry,
|
||||||
run_planning_agent,
|
run_planning_agent,
|
||||||
|
|
@ -21,11 +20,13 @@ from ra_aid.agent_utils import (
|
||||||
from ra_aid.config import (
|
from ra_aid.config import (
|
||||||
DEFAULT_MAX_TEST_CMD_RETRIES,
|
DEFAULT_MAX_TEST_CMD_RETRIES,
|
||||||
DEFAULT_RECURSION_LIMIT,
|
DEFAULT_RECURSION_LIMIT,
|
||||||
|
DEFAULT_TEST_CMD_TIMEOUT,
|
||||||
VALID_PROVIDERS,
|
VALID_PROVIDERS,
|
||||||
)
|
)
|
||||||
from ra_aid.console.output import cpm
|
from ra_aid.console.output import cpm
|
||||||
from ra_aid.dependencies import check_dependencies
|
from ra_aid.dependencies import check_dependencies
|
||||||
from ra_aid.env import validate_environment
|
from ra_aid.env import validate_environment
|
||||||
|
from ra_aid.exceptions import AgentInterrupt
|
||||||
from ra_aid.llm import initialize_llm
|
from ra_aid.llm import initialize_llm
|
||||||
from ra_aid.logging_config import get_logger, setup_logging
|
from ra_aid.logging_config import get_logger, setup_logging
|
||||||
from ra_aid.models_params import DEFAULT_TEMPERATURE, models_params
|
from ra_aid.models_params import DEFAULT_TEMPERATURE, models_params
|
||||||
|
|
@ -181,7 +182,13 @@ Examples:
|
||||||
"--max-test-cmd-retries",
|
"--max-test-cmd-retries",
|
||||||
type=int,
|
type=int,
|
||||||
default=DEFAULT_MAX_TEST_CMD_RETRIES,
|
default=DEFAULT_MAX_TEST_CMD_RETRIES,
|
||||||
help="Maximum number of retries for the test command (default: 10)",
|
help="Maximum number of retries for the test command (default: 3)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--test-cmd-timeout",
|
||||||
|
type=int,
|
||||||
|
default=DEFAULT_TEST_CMD_TIMEOUT,
|
||||||
|
help=f"Timeout in seconds for test command execution (default: {DEFAULT_TEST_CMD_TIMEOUT})",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--webui",
|
"--webui",
|
||||||
|
|
@ -441,6 +448,7 @@ def main():
|
||||||
"test_cmd": args.test_cmd,
|
"test_cmd": args.test_cmd,
|
||||||
"max_test_cmd_retries": args.max_test_cmd_retries,
|
"max_test_cmd_retries": args.max_test_cmd_retries,
|
||||||
"experimental_fallback_handler": args.experimental_fallback_handler,
|
"experimental_fallback_handler": args.experimental_fallback_handler,
|
||||||
|
"test_cmd_timeout": args.test_cmd_timeout,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Store config in global memory for access by is_informational_query
|
# Store config in global memory for access by is_informational_query
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ DEFAULT_MAX_TEST_CMD_RETRIES = 3
|
||||||
DEFAULT_MAX_TOOL_FAILURES = 3
|
DEFAULT_MAX_TOOL_FAILURES = 3
|
||||||
FALLBACK_TOOL_MODEL_LIMIT = 5
|
FALLBACK_TOOL_MODEL_LIMIT = 5
|
||||||
RETRY_FALLBACK_COUNT = 3
|
RETRY_FALLBACK_COUNT = 3
|
||||||
|
DEFAULT_TEST_CMD_TIMEOUT = 60 * 5 # 5 minutes in seconds
|
||||||
|
|
||||||
|
|
||||||
VALID_PROVIDERS = [
|
VALID_PROVIDERS = [
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ List of model parameters
|
||||||
|
|
||||||
DEFAULT_TOKEN_LIMIT = 100000
|
DEFAULT_TOKEN_LIMIT = 100000
|
||||||
DEFAULT_TEMPERATURE = 0.7
|
DEFAULT_TEMPERATURE = 0.7
|
||||||
DEFAULT_BASE_LATENCY = 180
|
DEFAULT_BASE_LATENCY = 240
|
||||||
|
|
||||||
models_params = {
|
models_params = {
|
||||||
"openai": {
|
"openai": {
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ from rich.console import Console
|
||||||
from rich.markdown import Markdown
|
from rich.markdown import Markdown
|
||||||
from rich.panel import Panel
|
from rich.panel import Panel
|
||||||
|
|
||||||
|
from ra_aid.config import DEFAULT_TEST_CMD_TIMEOUT
|
||||||
from ra_aid.logging_config import get_logger
|
from ra_aid.logging_config import get_logger
|
||||||
from ra_aid.tools.human import ask_human
|
from ra_aid.tools.human import ask_human
|
||||||
from ra_aid.tools.shell import run_shell_command
|
from ra_aid.tools.shell import run_shell_command
|
||||||
|
|
@ -85,7 +86,7 @@ class TestCommandExecutor:
|
||||||
cmd: Test command to execute
|
cmd: Test command to execute
|
||||||
original_prompt: Original prompt text
|
original_prompt: Original prompt text
|
||||||
"""
|
"""
|
||||||
timeout = self.config.get("timeout", 30)
|
timeout = self.config.get("test_cmd_timeout", DEFAULT_TEST_CMD_TIMEOUT)
|
||||||
try:
|
try:
|
||||||
logger.info(f"Executing test command: {cmd} with timeout {timeout}s")
|
logger.info(f"Executing test command: {cmd} with timeout {timeout}s")
|
||||||
test_result = run_shell_command(cmd, timeout=timeout)
|
test_result = run_shell_command(cmd, timeout=timeout)
|
||||||
|
|
@ -99,11 +100,11 @@ class TestCommandExecutor:
|
||||||
logger.info("Test command executed successfully")
|
logger.info("Test command executed successfully")
|
||||||
|
|
||||||
except subprocess.TimeoutExpired:
|
except subprocess.TimeoutExpired:
|
||||||
logger.warning(f"Test command timed out after {timeout}s: {cmd}")
|
logger.warning(
|
||||||
self.state.test_attempts += 1
|
f"Test command timed out after {DEFAULT_TEST_CMD_TIMEOUT}s: {cmd}"
|
||||||
self.state.prompt = (
|
|
||||||
f"{original_prompt}. Previous attempt timed out after {timeout} seconds"
|
|
||||||
)
|
)
|
||||||
|
self.state.test_attempts += 1
|
||||||
|
self.state.prompt = f"{original_prompt}. Previous attempt timed out after {DEFAULT_TEST_CMD_TIMEOUT} seconds"
|
||||||
self.display_test_failure()
|
self.display_test_failure()
|
||||||
|
|
||||||
except subprocess.CalledProcessError as e:
|
except subprocess.CalledProcessError as e:
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ from rich.panel import Panel
|
||||||
from rich.text import Text
|
from rich.text import Text
|
||||||
|
|
||||||
from ra_aid.logging_config import get_logger
|
from ra_aid.logging_config import get_logger
|
||||||
|
from ra_aid.models_params import DEFAULT_BASE_LATENCY, models_params
|
||||||
from ra_aid.proc.interactive import run_interactive_command
|
from ra_aid.proc.interactive import run_interactive_command
|
||||||
from ra_aid.text.processing import truncate_output
|
from ra_aid.text.processing import truncate_output
|
||||||
from ra_aid.tools.memory import _global_memory, log_work_event
|
from ra_aid.tools.memory import _global_memory, log_work_event
|
||||||
|
|
@ -138,7 +139,16 @@ def run_programming_task(
|
||||||
try:
|
try:
|
||||||
# Run the command interactively
|
# Run the command interactively
|
||||||
print()
|
print()
|
||||||
result = run_interactive_command(command)
|
# Get provider/model specific latency coefficient
|
||||||
|
provider = _global_memory.get("config", {}).get("provider", "")
|
||||||
|
model = _global_memory.get("config", {}).get("model", "")
|
||||||
|
latency = (
|
||||||
|
models_params.get(provider, {})
|
||||||
|
.get(model, {})
|
||||||
|
.get("latency_coefficient", DEFAULT_BASE_LATENCY)
|
||||||
|
)
|
||||||
|
|
||||||
|
result = run_interactive_command(command, expected_runtime_seconds=latency)
|
||||||
print()
|
print()
|
||||||
|
|
||||||
# Log the programming task
|
# Log the programming task
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ from unittest.mock import patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from ra_aid.config import DEFAULT_TEST_CMD_TIMEOUT
|
||||||
from ra_aid.tools.handle_user_defined_test_cmd_execution import execute_test_command
|
from ra_aid.tools.handle_user_defined_test_cmd_execution import execute_test_command
|
||||||
|
|
||||||
# Test cases for execute_test_command
|
# Test cases for execute_test_command
|
||||||
|
|
@ -195,9 +196,10 @@ def test_execute_test_command(
|
||||||
|
|
||||||
if auto_test and test_attempts < config.get("max_test_cmd_retries", 5):
|
if auto_test and test_attempts < config.get("max_test_cmd_retries", 5):
|
||||||
if config.get("test_cmd"):
|
if config.get("test_cmd"):
|
||||||
# Verify run_shell_command called with command and default timeout
|
# Verify run_shell_command called with command and configured timeout
|
||||||
mock_run_cmd.assert_called_once_with(
|
mock_run_cmd.assert_called_once_with(
|
||||||
config["test_cmd"], timeout=config.get("timeout", 30)
|
config["test_cmd"],
|
||||||
|
timeout=config.get("test_cmd_timeout", DEFAULT_TEST_CMD_TIMEOUT),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Verify logging for max retries
|
# Verify logging for max retries
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ from unittest.mock import Mock, patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from ra_aid.config import DEFAULT_TEST_CMD_TIMEOUT
|
||||||
from ra_aid.tools.handle_user_defined_test_cmd_execution import (
|
from ra_aid.tools.handle_user_defined_test_cmd_execution import (
|
||||||
TestCommandExecutor,
|
TestCommandExecutor,
|
||||||
TestState,
|
TestState,
|
||||||
|
|
@ -92,8 +93,9 @@ def test_run_test_command_timeout(test_executor):
|
||||||
"ra_aid.tools.handle_user_defined_test_cmd_execution.logger.warning"
|
"ra_aid.tools.handle_user_defined_test_cmd_execution.logger.warning"
|
||||||
) as mock_logger,
|
) as mock_logger,
|
||||||
):
|
):
|
||||||
# Create a TimeoutExpired exception
|
# Create a TimeoutExpired exception with configured timeout
|
||||||
timeout_exc = subprocess.TimeoutExpired(cmd="test", timeout=30)
|
timeout = test_executor.config.get("test_cmd_timeout", DEFAULT_TEST_CMD_TIMEOUT)
|
||||||
|
timeout_exc = subprocess.TimeoutExpired(cmd="test", timeout=timeout)
|
||||||
mock_run.side_effect = timeout_exc
|
mock_run.side_effect = timeout_exc
|
||||||
|
|
||||||
test_executor.run_test_command("test", "original")
|
test_executor.run_test_command("test", "original")
|
||||||
|
|
@ -101,7 +103,7 @@ def test_run_test_command_timeout(test_executor):
|
||||||
# Verify state updates
|
# Verify state updates
|
||||||
assert not test_executor.state.should_break
|
assert not test_executor.state.should_break
|
||||||
assert test_executor.state.test_attempts == 1
|
assert test_executor.state.test_attempts == 1
|
||||||
assert "timed out after 30 seconds" in test_executor.state.prompt
|
assert f"timed out after {timeout} seconds" in test_executor.state.prompt
|
||||||
|
|
||||||
# Verify logging
|
# Verify logging
|
||||||
mock_logger.assert_called_once()
|
mock_logger.assert_called_once()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue