wip
This commit is contained in:
parent
9533ff1957
commit
3896236fa1
|
|
@ -0,0 +1 @@
|
||||||
|
3.12.7
|
||||||
|
|
@ -179,6 +179,9 @@ ra-aid -m "Add new feature" --verbose
|
||||||
- `--temperature`: LLM temperature (0.0-2.0) to control randomness in responses
|
- `--temperature`: LLM temperature (0.0-2.0) to control randomness in responses
|
||||||
- `--disable-limit-tokens`: Disable token limiting for Anthropic Claude react agents
|
- `--disable-limit-tokens`: Disable token limiting for Anthropic Claude react agents
|
||||||
- `--recursion-limit`: Maximum recursion depth for agent operations (default: 100)
|
- `--recursion-limit`: Maximum recursion depth for agent operations (default: 100)
|
||||||
|
- `--test-cmd`: Custom command to run tests. If set user will be asked if they want to run the test command
|
||||||
|
- `--auto-test`: Automatically run tests after each code change
|
||||||
|
- `--max-test-cmd-retries`: Maximum number of test command retry attempts (default: 3)
|
||||||
- `--version`: Show program version number and exit
|
- `--version`: Show program version number and exit
|
||||||
|
|
||||||
### Example Tasks
|
### Example Tasks
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ from datetime import datetime
|
||||||
from rich.panel import Panel
|
from rich.panel import Panel
|
||||||
from rich.console import Console
|
from rich.console import Console
|
||||||
from langgraph.checkpoint.memory import MemorySaver
|
from langgraph.checkpoint.memory import MemorySaver
|
||||||
from ra_aid.config import DEFAULT_RECURSION_LIMIT
|
from ra_aid.config import DEFAULT_MAX_TEST_CMD_RETRIES, DEFAULT_RECURSION_LIMIT
|
||||||
from ra_aid.env import validate_environment
|
from ra_aid.env import validate_environment
|
||||||
from ra_aid.project_info import get_project_info, format_project_info
|
from ra_aid.project_info import get_project_info, format_project_info
|
||||||
from ra_aid.tools.memory import _global_memory
|
from ra_aid.tools.memory import _global_memory
|
||||||
|
|
@ -148,7 +148,22 @@ Examples:
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--aider-config", type=str, help="Specify the aider config file path"
|
"--aider-config", type=str, help="Specify the aider config file path"
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--test-cmd",
|
||||||
|
type=str,
|
||||||
|
help="Test command to run before completing tasks (e.g. 'pytest tests/')"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--auto-test",
|
||||||
|
action="store_true",
|
||||||
|
help="Automatically run tests before completing tasks"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--max-test-cmd-retries",
|
||||||
|
type=int,
|
||||||
|
default=DEFAULT_MAX_TEST_CMD_RETRIES,
|
||||||
|
help="Maximum number of retries for the test command (default: 10)",
|
||||||
|
)
|
||||||
if args is None:
|
if args is None:
|
||||||
args = sys.argv[1:]
|
args = sys.argv[1:]
|
||||||
parsed_args = parser.parse_args(args)
|
parsed_args = parser.parse_args(args)
|
||||||
|
|
@ -192,6 +207,10 @@ Examples:
|
||||||
# Validate recursion limit is positive
|
# Validate recursion limit is positive
|
||||||
if parsed_args.recursion_limit <= 0:
|
if parsed_args.recursion_limit <= 0:
|
||||||
parser.error("Recursion limit must be positive")
|
parser.error("Recursion limit must be positive")
|
||||||
|
|
||||||
|
# if auto-test command is provided, validate test-cmd is also provided
|
||||||
|
if parsed_args.auto_test and not parsed_args.test_cmd:
|
||||||
|
parser.error("Test command is required when using --auto-test")
|
||||||
|
|
||||||
return parsed_args
|
return parsed_args
|
||||||
|
|
||||||
|
|
@ -344,6 +363,9 @@ def main():
|
||||||
"web_research_enabled": web_research_enabled,
|
"web_research_enabled": web_research_enabled,
|
||||||
"aider_config": args.aider_config,
|
"aider_config": args.aider_config,
|
||||||
"limit_tokens": args.disable_limit_tokens,
|
"limit_tokens": args.disable_limit_tokens,
|
||||||
|
"auto_test": args.auto_test,
|
||||||
|
"test_cmd": args.test_cmd,
|
||||||
|
"max_test_cmd_retries": args.max_test_cmd_retries,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Store config in global memory for access by is_informational_query
|
# Store config in global memory for access by is_informational_query
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ import signal
|
||||||
|
|
||||||
from langgraph.checkpoint.memory import MemorySaver
|
from langgraph.checkpoint.memory import MemorySaver
|
||||||
from langgraph.prebuilt.chat_agent_executor import AgentState
|
from langgraph.prebuilt.chat_agent_executor import AgentState
|
||||||
from ra_aid.config import DEFAULT_RECURSION_LIMIT
|
from ra_aid.config import DEFAULT_MAX_TEST_CMD_RETRIES, DEFAULT_RECURSION_LIMIT
|
||||||
from ra_aid.models_tokens import DEFAULT_TOKEN_LIMIT, models_tokens
|
from ra_aid.models_tokens import DEFAULT_TOKEN_LIMIT, models_tokens
|
||||||
from ra_aid.agents.ciayn_agent import CiaynAgent
|
from ra_aid.agents.ciayn_agent import CiaynAgent
|
||||||
import threading
|
import threading
|
||||||
|
|
@ -55,6 +55,8 @@ from ra_aid.prompts import (
|
||||||
|
|
||||||
from langchain_core.messages import HumanMessage
|
from langchain_core.messages import HumanMessage
|
||||||
from anthropic import APIError, APITimeoutError, RateLimitError, InternalServerError
|
from anthropic import APIError, APITimeoutError, RateLimitError, InternalServerError
|
||||||
|
from ra_aid.tools.human import ask_human
|
||||||
|
from ra_aid.tools.shell import run_shell_command
|
||||||
from rich.console import Console
|
from rich.console import Console
|
||||||
from rich.markdown import Markdown
|
from rich.markdown import Markdown
|
||||||
from rich.panel import Panel
|
from rich.panel import Panel
|
||||||
|
|
@ -64,6 +66,7 @@ from ra_aid.tools.memory import (
|
||||||
get_memory_value,
|
get_memory_value,
|
||||||
get_related_files,
|
get_related_files,
|
||||||
)
|
)
|
||||||
|
from ra_aid.tools.handle_user_defined_test_cmd_execution import execute_test_command
|
||||||
|
|
||||||
|
|
||||||
console = Console()
|
console = Console()
|
||||||
|
|
@ -719,6 +722,10 @@ def run_agent_with_retry(agent, prompt: str, config: dict) -> Optional[str]:
|
||||||
|
|
||||||
max_retries = 20
|
max_retries = 20
|
||||||
base_delay = 1
|
base_delay = 1
|
||||||
|
test_attempts = 0
|
||||||
|
max_test_retries = config.get("max_test_cmd_retries", DEFAULT_MAX_TEST_CMD_RETRIES)
|
||||||
|
auto_test = config.get("auto_test", False)
|
||||||
|
original_prompt = prompt
|
||||||
|
|
||||||
with InterruptibleSection():
|
with InterruptibleSection():
|
||||||
try:
|
try:
|
||||||
|
|
@ -745,6 +752,19 @@ def run_agent_with_retry(agent, prompt: str, config: dict) -> Optional[str]:
|
||||||
_global_memory["task_completed"] = False
|
_global_memory["task_completed"] = False
|
||||||
_global_memory["completion_message"] = ""
|
_global_memory["completion_message"] = ""
|
||||||
break
|
break
|
||||||
|
|
||||||
|
# Execute test command if configured
|
||||||
|
should_break, prompt, auto_test, test_attempts = execute_test_command(
|
||||||
|
config,
|
||||||
|
original_prompt,
|
||||||
|
test_attempts,
|
||||||
|
auto_test
|
||||||
|
)
|
||||||
|
if should_break:
|
||||||
|
break
|
||||||
|
if prompt != original_prompt:
|
||||||
|
continue
|
||||||
|
|
||||||
logger.debug("Agent run completed successfully")
|
logger.debug("Agent run completed successfully")
|
||||||
return "Agent run completed successfully"
|
return "Agent run completed successfully"
|
||||||
except (KeyboardInterrupt, AgentInterrupt):
|
except (KeyboardInterrupt, AgentInterrupt):
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
"""Configuration utilities."""
|
"""Configuration utilities."""
|
||||||
|
|
||||||
DEFAULT_RECURSION_LIMIT = 100
|
DEFAULT_RECURSION_LIMIT = 100
|
||||||
|
DEFAULT_MAX_TEST_CMD_RETRIES = 3
|
||||||
|
|
@ -197,4 +197,4 @@ def initialize_expert_llm(
|
||||||
provider: str = "openai", model_name: str = "o1"
|
provider: str = "openai", model_name: str = "o1"
|
||||||
) -> BaseChatModel:
|
) -> BaseChatModel:
|
||||||
"""Initialize an expert language model client based on the specified provider and model."""
|
"""Initialize an expert language model client based on the specified provider and model."""
|
||||||
return create_llm_client(provider, model_name, temperature=None, is_expert=True)
|
return create_llm_client(provider, model_name, temperature=None, is_expert=True)
|
||||||
|
|
@ -0,0 +1,167 @@
|
||||||
|
"""Utilities for executing and managing user-defined test commands."""
|
||||||
|
|
||||||
|
from typing import Dict, Any, Tuple, Optional
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from rich.console import Console
|
||||||
|
from rich.markdown import Markdown
|
||||||
|
from rich.panel import Panel
|
||||||
|
from ra_aid.tools.human import ask_human
|
||||||
|
from ra_aid.tools.shell import run_shell_command
|
||||||
|
from ra_aid.logging_config import get_logger
|
||||||
|
|
||||||
|
console = Console()
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TestState:
|
||||||
|
"""State for test execution."""
|
||||||
|
prompt: str
|
||||||
|
test_attempts: int
|
||||||
|
auto_test: bool
|
||||||
|
should_break: bool = False
|
||||||
|
|
||||||
|
def display_test_failure(attempts: int, max_retries: int) -> None:
|
||||||
|
"""Display test failure message.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
attempts: Current number of attempts
|
||||||
|
max_retries: Maximum allowed retries
|
||||||
|
"""
|
||||||
|
console.print(
|
||||||
|
Panel(
|
||||||
|
Markdown(f"Test failed. Attempt number {attempts} of {max_retries}. Retrying and informing of failure output"),
|
||||||
|
title="🔎 User Defined Test",
|
||||||
|
border_style="red bold"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
def handle_test_failure(state: TestState, original_prompt: str, test_result: Dict[str, Any]) -> TestState:
|
||||||
|
"""Handle test command failure.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
state: Current test state
|
||||||
|
original_prompt: Original prompt text
|
||||||
|
test_result: Test command result
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Updated test state
|
||||||
|
"""
|
||||||
|
state.prompt = f"{original_prompt}. Previous attempt failed with: <test_cmd_stdout>{test_result['output']}</test_cmd_stdout>"
|
||||||
|
display_test_failure(state.test_attempts, 5) # Default max retries
|
||||||
|
state.should_break = False
|
||||||
|
return state
|
||||||
|
|
||||||
|
def run_test_command(cmd: str, state: TestState, original_prompt: str) -> TestState:
|
||||||
|
"""Run test command and handle result.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
cmd: Test command to execute
|
||||||
|
state: Current test state
|
||||||
|
original_prompt: Original prompt text
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Updated test state
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
test_result = run_shell_command(cmd)
|
||||||
|
state.test_attempts += 1
|
||||||
|
|
||||||
|
if not test_result["success"]:
|
||||||
|
return handle_test_failure(state, original_prompt, test_result)
|
||||||
|
|
||||||
|
state.should_break = True
|
||||||
|
return state
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Test command execution failed: {str(e)}")
|
||||||
|
state.test_attempts += 1
|
||||||
|
state.should_break = True
|
||||||
|
return state
|
||||||
|
|
||||||
|
def handle_user_response(response: str, state: TestState, cmd: str, original_prompt: str) -> TestState:
|
||||||
|
"""Handle user's response to test prompt.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
response: User's response (y/n/a)
|
||||||
|
state: Current test state
|
||||||
|
cmd: Test command
|
||||||
|
original_prompt: Original prompt text
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Updated test state
|
||||||
|
"""
|
||||||
|
response = response.strip().lower()
|
||||||
|
|
||||||
|
if response == "n":
|
||||||
|
state.should_break = True
|
||||||
|
return state
|
||||||
|
|
||||||
|
if response == "a":
|
||||||
|
state.auto_test = True
|
||||||
|
return run_test_command(cmd, state, original_prompt)
|
||||||
|
|
||||||
|
if response == "y":
|
||||||
|
return run_test_command(cmd, state, original_prompt)
|
||||||
|
|
||||||
|
return state
|
||||||
|
|
||||||
|
def check_max_retries(attempts: int, max_retries: int) -> bool:
|
||||||
|
"""Check if max retries reached.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
attempts: Current number of attempts
|
||||||
|
max_retries: Maximum allowed retries
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if max retries reached
|
||||||
|
"""
|
||||||
|
if attempts >= max_retries:
|
||||||
|
logger.warning("Max test retries reached")
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def execute_test_command(
|
||||||
|
config: Dict[str, Any],
|
||||||
|
original_prompt: str,
|
||||||
|
test_attempts: int = 0,
|
||||||
|
auto_test: bool = False,
|
||||||
|
) -> Tuple[bool, str, bool, int]:
|
||||||
|
"""Execute a test command and handle retries.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config: Configuration dictionary containing test settings
|
||||||
|
original_prompt: The original prompt to append errors to
|
||||||
|
test_attempts: Current number of test attempts
|
||||||
|
auto_test: Whether auto-test mode is enabled
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple containing:
|
||||||
|
- bool: Whether to break the retry loop
|
||||||
|
- str: Updated prompt
|
||||||
|
- bool: Updated auto_test flag
|
||||||
|
- int: Updated test_attempts count
|
||||||
|
"""
|
||||||
|
state = TestState(
|
||||||
|
prompt=original_prompt,
|
||||||
|
test_attempts=test_attempts,
|
||||||
|
auto_test=auto_test
|
||||||
|
)
|
||||||
|
|
||||||
|
if not config.get("test_cmd"):
|
||||||
|
state.should_break = True
|
||||||
|
return state.should_break, state.prompt, state.auto_test, state.test_attempts
|
||||||
|
|
||||||
|
max_retries = config.get("max_test_cmd_retries", 5)
|
||||||
|
cmd = config["test_cmd"]
|
||||||
|
|
||||||
|
if not auto_test:
|
||||||
|
print()
|
||||||
|
response = ask_human.invoke({"question": "Would you like to run the test command? (y=yes, n=no, a=enable auto-test)"})
|
||||||
|
state = handle_user_response(response, state, cmd, original_prompt)
|
||||||
|
else:
|
||||||
|
if check_max_retries(test_attempts, max_retries):
|
||||||
|
state.should_break = True
|
||||||
|
else:
|
||||||
|
state = run_test_command(cmd, state, original_prompt)
|
||||||
|
|
||||||
|
return state.should_break, state.prompt, state.auto_test, state.test_attempts
|
||||||
|
|
@ -45,6 +45,8 @@ Returns: { "output": stdout+stderr, "return_code": 0 if success, "success": True
|
||||||
"--no-auto-commits",
|
"--no-auto-commits",
|
||||||
"--dark-mode",
|
"--dark-mode",
|
||||||
"--no-suggest-shell-commands",
|
"--no-suggest-shell-commands",
|
||||||
|
"--no-show-release-notes",
|
||||||
|
"--no-check-update",
|
||||||
]
|
]
|
||||||
|
|
||||||
# Add config file if specified
|
# Add config file if specified
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
"""Scripts package."""
|
||||||
|
|
@ -0,0 +1,218 @@
|
||||||
|
"""Tests for test execution utilities."""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import Mock, patch
|
||||||
|
from ra_aid.tools.handle_user_defined_test_cmd_execution import execute_test_command
|
||||||
|
|
||||||
|
# Test cases for execute_test_command
|
||||||
|
test_cases = [
|
||||||
|
# Format: (name, config, original_prompt, test_attempts, auto_test,
|
||||||
|
# mock_responses, expected_result)
|
||||||
|
|
||||||
|
# Case 1: No test command configured
|
||||||
|
(
|
||||||
|
"no_test_command",
|
||||||
|
{"other_config": "value"},
|
||||||
|
"original prompt",
|
||||||
|
0,
|
||||||
|
False,
|
||||||
|
{},
|
||||||
|
(True, "original prompt", False, 0)
|
||||||
|
),
|
||||||
|
|
||||||
|
# Case 2: User declines to run test
|
||||||
|
(
|
||||||
|
"user_declines_test",
|
||||||
|
{"test_cmd": "pytest"},
|
||||||
|
"original prompt",
|
||||||
|
0,
|
||||||
|
False,
|
||||||
|
{"ask_human_response": "n"},
|
||||||
|
(True, "original prompt", False, 0)
|
||||||
|
),
|
||||||
|
|
||||||
|
# Case 3: User enables auto-test
|
||||||
|
(
|
||||||
|
"user_enables_auto_test",
|
||||||
|
{"test_cmd": "pytest"},
|
||||||
|
"original prompt",
|
||||||
|
0,
|
||||||
|
False,
|
||||||
|
{
|
||||||
|
"ask_human_response": "a",
|
||||||
|
"shell_cmd_result": {"success": True, "output": "All tests passed"}
|
||||||
|
},
|
||||||
|
(True, "original prompt", True, 1)
|
||||||
|
),
|
||||||
|
|
||||||
|
# Case 4: Auto-test success
|
||||||
|
(
|
||||||
|
"auto_test_success",
|
||||||
|
{"test_cmd": "pytest"},
|
||||||
|
"original prompt",
|
||||||
|
0,
|
||||||
|
True,
|
||||||
|
{"shell_cmd_result": {"success": True, "output": "All tests passed"}},
|
||||||
|
(True, "original prompt", True, 1)
|
||||||
|
),
|
||||||
|
|
||||||
|
# Case 5: Auto-test failure with retry
|
||||||
|
(
|
||||||
|
"auto_test_failure_retry",
|
||||||
|
{"test_cmd": "pytest"},
|
||||||
|
"original prompt",
|
||||||
|
0,
|
||||||
|
True,
|
||||||
|
{"shell_cmd_result": {"success": False, "output": "Test failed"}},
|
||||||
|
(False, "original prompt. Previous attempt failed with: <test_cmd_stdout>Test failed</test_cmd_stdout>", True, 1)
|
||||||
|
),
|
||||||
|
|
||||||
|
# Case 6: Max retries reached
|
||||||
|
(
|
||||||
|
"max_retries_reached",
|
||||||
|
{"test_cmd": "pytest", "max_test_cmd_retries": 3},
|
||||||
|
"original prompt",
|
||||||
|
3,
|
||||||
|
True,
|
||||||
|
{},
|
||||||
|
(True, "original prompt", True, 3)
|
||||||
|
),
|
||||||
|
|
||||||
|
# Case 7: User runs test manually
|
||||||
|
(
|
||||||
|
"manual_test_success",
|
||||||
|
{"test_cmd": "pytest"},
|
||||||
|
"original prompt",
|
||||||
|
0,
|
||||||
|
False,
|
||||||
|
{
|
||||||
|
"ask_human_response": "y",
|
||||||
|
"shell_cmd_result": {"success": True, "output": "All tests passed"}
|
||||||
|
},
|
||||||
|
(True, "original prompt", False, 1)
|
||||||
|
),
|
||||||
|
|
||||||
|
# Case 8: Manual test failure
|
||||||
|
(
|
||||||
|
"manual_test_failure",
|
||||||
|
{"test_cmd": "pytest"},
|
||||||
|
"original prompt",
|
||||||
|
0,
|
||||||
|
False,
|
||||||
|
{
|
||||||
|
"ask_human_response": "y",
|
||||||
|
"shell_cmd_result": {"success": False, "output": "Test failed"}
|
||||||
|
},
|
||||||
|
(False, "original prompt. Previous attempt failed with: <test_cmd_stdout>Test failed</test_cmd_stdout>", False, 1)
|
||||||
|
),
|
||||||
|
|
||||||
|
# Case 9: Manual test error
|
||||||
|
(
|
||||||
|
"manual_test_error",
|
||||||
|
{"test_cmd": "pytest"},
|
||||||
|
"original prompt",
|
||||||
|
0,
|
||||||
|
False,
|
||||||
|
{
|
||||||
|
"ask_human_response": "y",
|
||||||
|
"shell_cmd_result_error": Exception("Command failed")
|
||||||
|
},
|
||||||
|
(True, "original prompt", False, 1)
|
||||||
|
),
|
||||||
|
|
||||||
|
# Case 10: Auto-test error
|
||||||
|
(
|
||||||
|
"auto_test_error",
|
||||||
|
{"test_cmd": "pytest"},
|
||||||
|
"original prompt",
|
||||||
|
0,
|
||||||
|
True,
|
||||||
|
{
|
||||||
|
"shell_cmd_result_error": Exception("Command failed")
|
||||||
|
},
|
||||||
|
(True, "original prompt", True, 1)
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"name,config,original_prompt,test_attempts,auto_test,mock_responses,expected",
|
||||||
|
test_cases,
|
||||||
|
ids=[case[0] for case in test_cases]
|
||||||
|
)
|
||||||
|
def test_execute_test_command(
|
||||||
|
name: str,
|
||||||
|
config: dict,
|
||||||
|
original_prompt: str,
|
||||||
|
test_attempts: int,
|
||||||
|
auto_test: bool,
|
||||||
|
mock_responses: dict,
|
||||||
|
expected: tuple,
|
||||||
|
) -> None:
|
||||||
|
"""Test execute_test_command with different scenarios.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name: Test case name
|
||||||
|
config: Test configuration
|
||||||
|
original_prompt: Original prompt text
|
||||||
|
test_attempts: Number of test attempts
|
||||||
|
auto_test: Auto-test flag
|
||||||
|
mock_responses: Mock response data
|
||||||
|
expected: Expected result tuple
|
||||||
|
"""
|
||||||
|
with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.ask_human") as mock_ask_human, \
|
||||||
|
patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_shell_command") as mock_run_cmd, \
|
||||||
|
patch("ra_aid.tools.handle_user_defined_test_cmd_execution.console") as mock_console, \
|
||||||
|
patch("ra_aid.tools.handle_user_defined_test_cmd_execution.logger") as mock_logger:
|
||||||
|
|
||||||
|
# Configure mocks based on mock_responses
|
||||||
|
if "ask_human_response" in mock_responses:
|
||||||
|
mock_ask_human.invoke.return_value = mock_responses["ask_human_response"]
|
||||||
|
|
||||||
|
if "shell_cmd_result_error" in mock_responses:
|
||||||
|
mock_run_cmd.side_effect = mock_responses["shell_cmd_result_error"]
|
||||||
|
elif "shell_cmd_result" in mock_responses:
|
||||||
|
mock_run_cmd.return_value = mock_responses["shell_cmd_result"]
|
||||||
|
|
||||||
|
# Execute test command
|
||||||
|
result = execute_test_command(
|
||||||
|
config,
|
||||||
|
original_prompt,
|
||||||
|
test_attempts,
|
||||||
|
auto_test
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify result matches expected
|
||||||
|
assert result == expected, f"Test case '{name}' failed"
|
||||||
|
|
||||||
|
# Verify mock interactions
|
||||||
|
if config.get("test_cmd") and not auto_test:
|
||||||
|
mock_ask_human.invoke.assert_called_once()
|
||||||
|
|
||||||
|
if auto_test and test_attempts < config.get("max_test_cmd_retries", 5):
|
||||||
|
if config.get("test_cmd"):
|
||||||
|
mock_run_cmd.assert_called_once_with(config["test_cmd"])
|
||||||
|
|
||||||
|
# Verify logging for max retries
|
||||||
|
if test_attempts >= config.get("max_test_cmd_retries", 5):
|
||||||
|
mock_logger.warning.assert_called_once_with("Max test retries reached")
|
||||||
|
|
||||||
|
def test_execute_test_command_error_handling() -> None:
|
||||||
|
"""Test error handling in execute_test_command."""
|
||||||
|
config = {"test_cmd": "pytest"}
|
||||||
|
|
||||||
|
with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_shell_command") as mock_run_cmd, \
|
||||||
|
patch("ra_aid.tools.handle_user_defined_test_cmd_execution.logger") as mock_logger:
|
||||||
|
|
||||||
|
# Simulate run_shell_command raising an exception
|
||||||
|
mock_run_cmd.side_effect = Exception("Command failed")
|
||||||
|
|
||||||
|
result = execute_test_command(
|
||||||
|
config,
|
||||||
|
"original prompt",
|
||||||
|
0,
|
||||||
|
True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Should handle error and continue
|
||||||
|
assert result == (True, "original prompt", True, 1)
|
||||||
|
mock_logger.warning.assert_called_once()
|
||||||
|
|
@ -0,0 +1,109 @@
|
||||||
|
"""Tests for user-defined test command execution utilities."""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import patch, Mock
|
||||||
|
from ra_aid.tools.handle_user_defined_test_cmd_execution import (
|
||||||
|
TestState,
|
||||||
|
execute_test_command,
|
||||||
|
handle_test_failure,
|
||||||
|
run_test_command,
|
||||||
|
handle_user_response,
|
||||||
|
check_max_retries
|
||||||
|
)
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def test_state():
|
||||||
|
"""Create a test state fixture."""
|
||||||
|
return TestState(
|
||||||
|
prompt="test prompt",
|
||||||
|
test_attempts=0,
|
||||||
|
auto_test=False
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_check_max_retries():
|
||||||
|
"""Test max retries check."""
|
||||||
|
assert not check_max_retries(2, 3)
|
||||||
|
assert check_max_retries(3, 3)
|
||||||
|
assert check_max_retries(4, 3)
|
||||||
|
|
||||||
|
def test_handle_test_failure(test_state):
|
||||||
|
"""Test handling of test failures."""
|
||||||
|
test_result = {"output": "error message"}
|
||||||
|
with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.display_test_failure"):
|
||||||
|
state = handle_test_failure(test_state, "original", test_result)
|
||||||
|
assert not state.should_break
|
||||||
|
assert "error message" in state.prompt
|
||||||
|
|
||||||
|
def test_run_test_command_success(test_state):
|
||||||
|
"""Test successful test command execution."""
|
||||||
|
with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_shell_command") as mock_run:
|
||||||
|
mock_run.return_value = {"success": True, "output": ""}
|
||||||
|
state = run_test_command("test", test_state, "original")
|
||||||
|
assert state.should_break
|
||||||
|
assert state.test_attempts == 1
|
||||||
|
|
||||||
|
def test_run_test_command_failure(test_state):
|
||||||
|
"""Test failed test command execution."""
|
||||||
|
with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_shell_command") as mock_run:
|
||||||
|
mock_run.return_value = {"success": False, "output": "error"}
|
||||||
|
state = run_test_command("test", test_state, "original")
|
||||||
|
assert not state.should_break
|
||||||
|
assert state.test_attempts == 1
|
||||||
|
assert "error" in state.prompt
|
||||||
|
|
||||||
|
def test_run_test_command_error(test_state):
|
||||||
|
"""Test test command execution error."""
|
||||||
|
with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_shell_command") as mock_run:
|
||||||
|
mock_run.side_effect = Exception("Command failed")
|
||||||
|
state = run_test_command("test", test_state, "original")
|
||||||
|
assert state.should_break
|
||||||
|
assert state.test_attempts == 1
|
||||||
|
|
||||||
|
def test_handle_user_response_no(test_state):
|
||||||
|
"""Test handling of 'no' response."""
|
||||||
|
state = handle_user_response("n", test_state, "test", "original")
|
||||||
|
assert state.should_break
|
||||||
|
assert not state.auto_test
|
||||||
|
|
||||||
|
def test_handle_user_response_auto(test_state):
|
||||||
|
"""Test handling of 'auto' response."""
|
||||||
|
with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_test_command") as mock_run:
|
||||||
|
mock_state = TestState("prompt", 1, True, True)
|
||||||
|
mock_run.return_value = mock_state
|
||||||
|
state = handle_user_response("a", test_state, "test", "original")
|
||||||
|
assert state.auto_test
|
||||||
|
mock_run.assert_called_once()
|
||||||
|
|
||||||
|
def test_handle_user_response_yes(test_state):
|
||||||
|
"""Test handling of 'yes' response."""
|
||||||
|
with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_test_command") as mock_run:
|
||||||
|
mock_state = TestState("prompt", 1, False, True)
|
||||||
|
mock_run.return_value = mock_state
|
||||||
|
state = handle_user_response("y", test_state, "test", "original")
|
||||||
|
assert not state.auto_test
|
||||||
|
mock_run.assert_called_once()
|
||||||
|
|
||||||
|
def test_execute_test_command_no_cmd():
|
||||||
|
"""Test execution with no test command."""
|
||||||
|
result = execute_test_command({}, "prompt")
|
||||||
|
assert result == (True, "prompt", False, 0)
|
||||||
|
|
||||||
|
def test_execute_test_command_manual():
|
||||||
|
"""Test manual test execution."""
|
||||||
|
config = {"test_cmd": "test"}
|
||||||
|
with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.ask_human") as mock_ask, \
|
||||||
|
patch("ra_aid.tools.handle_user_defined_test_cmd_execution.handle_user_response") as mock_handle:
|
||||||
|
mock_ask.invoke.return_value = "y"
|
||||||
|
mock_state = TestState("new prompt", 1, False, True)
|
||||||
|
mock_handle.return_value = mock_state
|
||||||
|
result = execute_test_command(config, "prompt")
|
||||||
|
assert result == (True, "new prompt", False, 1)
|
||||||
|
|
||||||
|
def test_execute_test_command_auto():
|
||||||
|
"""Test auto test execution."""
|
||||||
|
config = {"test_cmd": "test", "max_test_cmd_retries": 3}
|
||||||
|
with patch("ra_aid.tools.handle_user_defined_test_cmd_execution.run_test_command") as mock_run:
|
||||||
|
mock_state = TestState("new prompt", 1, True, True)
|
||||||
|
mock_run.return_value = mock_state
|
||||||
|
result = execute_test_command(config, "prompt", auto_test=True)
|
||||||
|
assert result == (True, "new prompt", True, 1)
|
||||||
Loading…
Reference in New Issue