Merge pull request #95 from ariel-frischer/test-cmd-args
feat: add `--test-cmd-timeout` option to specify timeout for test command
This commit is contained in:
commit
e6ba8f5dff
|
|
@ -181,6 +181,7 @@ More information is available in our [Usage Examples](https://docs.ra-aid.ai/cat
|
|||
- `--test-cmd`: Custom command to run tests. If set user will be asked if they want to run the test command
|
||||
- `--auto-test`: Automatically run tests after each code change
|
||||
- `--max-test-cmd-retries`: Maximum number of test command retry attempts (default: 3)
|
||||
- `--test-cmd-timeout`: Timeout in seconds for test command execution (default: 300)
|
||||
- `--version`: Show program version number and exit
|
||||
- `--webui`: Launch the web interface (alpha feature)
|
||||
- `--webui-host`: Host to listen on for web interface (default: 0.0.0.0) (alpha feature)
|
||||
|
|
|
|||
|
|
@ -18,7 +18,11 @@ from ra_aid.agent_utils import (
|
|||
run_planning_agent,
|
||||
run_research_agent,
|
||||
)
|
||||
from ra_aid.config import DEFAULT_MAX_TEST_CMD_RETRIES, DEFAULT_RECURSION_LIMIT
|
||||
from ra_aid.config import (
|
||||
DEFAULT_MAX_TEST_CMD_RETRIES,
|
||||
DEFAULT_RECURSION_LIMIT,
|
||||
DEFAULT_TEST_CMD_TIMEOUT,
|
||||
)
|
||||
from ra_aid.dependencies import check_dependencies
|
||||
from ra_aid.env import validate_environment
|
||||
from ra_aid.llm import initialize_llm
|
||||
|
|
@ -81,9 +85,11 @@ Examples:
|
|||
parser.add_argument(
|
||||
"--provider",
|
||||
type=str,
|
||||
default="openai"
|
||||
if (os.getenv("OPENAI_API_KEY") and not os.getenv("ANTHROPIC_API_KEY"))
|
||||
else "anthropic",
|
||||
default=(
|
||||
"openai"
|
||||
if (os.getenv("OPENAI_API_KEY") and not os.getenv("ANTHROPIC_API_KEY"))
|
||||
else "anthropic"
|
||||
),
|
||||
choices=VALID_PROVIDERS,
|
||||
help="The LLM provider to use",
|
||||
)
|
||||
|
|
@ -173,7 +179,13 @@ Examples:
|
|||
"--max-test-cmd-retries",
|
||||
type=int,
|
||||
default=DEFAULT_MAX_TEST_CMD_RETRIES,
|
||||
help="Maximum number of retries for the test command (default: 10)",
|
||||
help="Maximum number of retries for the test command (default: 3)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--test-cmd-timeout",
|
||||
type=int,
|
||||
default=DEFAULT_TEST_CMD_TIMEOUT,
|
||||
help=f"Timeout in seconds for test command execution (default: {DEFAULT_TEST_CMD_TIMEOUT})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--webui",
|
||||
|
|
@ -227,11 +239,10 @@ Examples:
|
|||
parsed_args.expert_provider = "deepseek"
|
||||
parsed_args.expert_model = "deepseek-reasoner"
|
||||
else:
|
||||
# Fall back to main provider if neither is available
|
||||
# Fall back to main provider if neither is available
|
||||
parsed_args.expert_provider = parsed_args.provider
|
||||
parsed_args.expert_model = parsed_args.model
|
||||
|
||||
|
||||
# Validate temperature range if provided
|
||||
if parsed_args.temperature is not None and not (
|
||||
0.0 <= parsed_args.temperature <= 2.0
|
||||
|
|
@ -294,15 +305,24 @@ def main():
|
|||
|
||||
# Validate model configuration early
|
||||
from ra_aid.models_params import models_params
|
||||
|
||||
model_config = models_params.get(args.provider, {}).get(args.model or "", {})
|
||||
supports_temperature = model_config.get("supports_temperature", args.provider in ["anthropic", "openai", "openrouter", "openai-compatible", "deepseek"])
|
||||
|
||||
supports_temperature = model_config.get(
|
||||
"supports_temperature",
|
||||
args.provider
|
||||
in ["anthropic", "openai", "openrouter", "openai-compatible", "deepseek"],
|
||||
)
|
||||
|
||||
if supports_temperature and args.temperature is None:
|
||||
args.temperature = model_config.get("default_temperature")
|
||||
if args.temperature is None:
|
||||
print_error(f"Temperature must be provided for model {args.model} which supports temperature")
|
||||
print_error(
|
||||
f"Temperature must be provided for model {args.model} which supports temperature"
|
||||
)
|
||||
sys.exit(1)
|
||||
logger.debug(f"Using default temperature {args.temperature} for model {args.model}")
|
||||
logger.debug(
|
||||
f"Using default temperature {args.temperature} for model {args.model}"
|
||||
)
|
||||
|
||||
# Display status lines
|
||||
status = Text()
|
||||
|
|
@ -324,16 +344,13 @@ def main():
|
|||
|
||||
# Search info
|
||||
status.append("🔍 Search: ")
|
||||
status.append("Enabled" if web_research_enabled else "Disabled",
|
||||
style=None if web_research_enabled else "italic")
|
||||
|
||||
status.append(
|
||||
"Enabled" if web_research_enabled else "Disabled",
|
||||
style=None if web_research_enabled else "italic",
|
||||
)
|
||||
|
||||
console.print(
|
||||
Panel(
|
||||
status,
|
||||
title="Config",
|
||||
border_style="bright_blue",
|
||||
padding=(0, 1)
|
||||
)
|
||||
Panel(status, title="Config", border_style="bright_blue", padding=(0, 1))
|
||||
)
|
||||
|
||||
# Handle chat mode
|
||||
|
|
@ -400,9 +417,9 @@ def main():
|
|||
chat_agent,
|
||||
CHAT_PROMPT.format(
|
||||
initial_request=initial_request,
|
||||
web_research_section=WEB_RESEARCH_PROMPT_SECTION_CHAT
|
||||
if web_research_enabled
|
||||
else "",
|
||||
web_research_section=(
|
||||
WEB_RESEARCH_PROMPT_SECTION_CHAT if web_research_enabled else ""
|
||||
),
|
||||
working_directory=working_directory,
|
||||
current_date=current_date,
|
||||
project_info=formatted_project_info,
|
||||
|
|
@ -428,6 +445,7 @@ def main():
|
|||
"auto_test": args.auto_test,
|
||||
"test_cmd": args.test_cmd,
|
||||
"max_test_cmd_retries": args.max_test_cmd_retries,
|
||||
"test_cmd_timeout": args.test_cmd_timeout,
|
||||
}
|
||||
|
||||
# Store config in global memory for access by is_informational_query
|
||||
|
|
|
|||
|
|
@ -2,3 +2,4 @@
|
|||
|
||||
DEFAULT_RECURSION_LIMIT = 100
|
||||
DEFAULT_MAX_TEST_CMD_RETRIES = 3
|
||||
DEFAULT_TEST_CMD_TIMEOUT = 60 * 5 # 5 minutes in seconds
|
||||
|
|
|
|||
|
|
@ -1,11 +1,9 @@
|
|||
"""Module for efficient file listing using git."""
|
||||
|
||||
import subprocess
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Tuple
|
||||
import tempfile
|
||||
import shutil
|
||||
|
||||
|
||||
class FileListerError(Exception):
|
||||
|
|
@ -133,12 +131,18 @@ def get_file_listing(
|
|||
|
||||
# Combine and process the files
|
||||
all_files = []
|
||||
for file in tracked_files_process.stdout.splitlines() + untracked_files_process.stdout.splitlines():
|
||||
for file in (
|
||||
tracked_files_process.stdout.splitlines()
|
||||
+ untracked_files_process.stdout.splitlines()
|
||||
):
|
||||
file = file.strip()
|
||||
if not file:
|
||||
continue
|
||||
# Skip hidden files unless explicitly included
|
||||
if not include_hidden and (file.startswith(".") or any(part.startswith(".") for part in file.split("/"))):
|
||||
if not include_hidden and (
|
||||
file.startswith(".")
|
||||
or any(part.startswith(".") for part in file.split("/"))
|
||||
):
|
||||
continue
|
||||
# Skip .aider files
|
||||
if ".aider" in file:
|
||||
|
|
@ -155,7 +159,7 @@ def get_file_listing(
|
|||
|
||||
return all_files, total_count
|
||||
|
||||
except (DirectoryNotFoundError, DirectoryAccessError, GitCommandError) as e:
|
||||
except (DirectoryNotFoundError, DirectoryAccessError, GitCommandError):
|
||||
# Re-raise known exceptions
|
||||
raise
|
||||
except PermissionError as e:
|
||||
|
|
|
|||
|
|
@ -1,21 +1,21 @@
|
|||
import os
|
||||
from typing import Any, Dict, Optional
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from openai import OpenAI
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
from langchain_core.language_models import BaseChatModel
|
||||
from langchain_google_genai import ChatGoogleGenerativeAI
|
||||
from langchain_openai import ChatOpenAI
|
||||
from openai import OpenAI
|
||||
|
||||
from ra_aid.chat_models.deepseek_chat import ChatDeepseekReasoner
|
||||
from ra_aid.logging_config import get_logger
|
||||
from typing import List
|
||||
|
||||
from .models_params import models_params
|
||||
|
||||
|
||||
def get_available_openai_models() -> List[str]:
|
||||
"""Fetch available OpenAI models using OpenAI client.
|
||||
|
||||
|
||||
Returns:
|
||||
List of available model names
|
||||
"""
|
||||
|
|
@ -25,35 +25,37 @@ def get_available_openai_models() -> List[str]:
|
|||
models = client.models.list()
|
||||
return [str(model.id) for model in models.data]
|
||||
except Exception:
|
||||
# Return empty list if unable to fetch models
|
||||
# Return empty list if unable to fetch models
|
||||
return []
|
||||
|
||||
|
||||
def select_expert_model(provider: str, model: Optional[str] = None) -> Optional[str]:
|
||||
"""Select appropriate expert model based on provider and availability.
|
||||
|
||||
|
||||
Args:
|
||||
provider: The LLM provider
|
||||
model: Optional explicitly specified model name
|
||||
|
||||
|
||||
Returns:
|
||||
Selected model name or None if no suitable model found
|
||||
"""
|
||||
if provider != "openai" or model is not None:
|
||||
return model
|
||||
|
||||
|
||||
# Try to get available models
|
||||
available_models = get_available_openai_models()
|
||||
|
||||
|
||||
# Priority order for expert models
|
||||
priority_models = ["o3-mini", "o1", "o1-preview"]
|
||||
|
||||
|
||||
# Return first available model from priority list
|
||||
for model_name in priority_models:
|
||||
if model_name in available_models:
|
||||
return model_name
|
||||
|
||||
|
||||
return None
|
||||
|
||||
|
||||
known_temp_providers = {
|
||||
"openai",
|
||||
"anthropic",
|
||||
|
|
@ -220,7 +222,9 @@ def create_llm_client(
|
|||
temp_kwargs = {"temperature": 0} if supports_temperature else {}
|
||||
elif supports_temperature:
|
||||
if temperature is None:
|
||||
raise ValueError(f"Temperature must be provided for model {model_name} which supports temperature")
|
||||
raise ValueError(
|
||||
f"Temperature must be provided for model {model_name} which supports temperature"
|
||||
)
|
||||
temp_kwargs = {"temperature": temperature}
|
||||
else:
|
||||
temp_kwargs = {}
|
||||
|
|
@ -248,11 +252,13 @@ def create_llm_client(
|
|||
}
|
||||
if is_expert:
|
||||
openai_kwargs["reasoning_effort"] = "high"
|
||||
return ChatOpenAI(**{
|
||||
**openai_kwargs,
|
||||
"timeout": LLM_REQUEST_TIMEOUT,
|
||||
"max_retries": LLM_MAX_RETRIES,
|
||||
})
|
||||
return ChatOpenAI(
|
||||
**{
|
||||
**openai_kwargs,
|
||||
"timeout": LLM_REQUEST_TIMEOUT,
|
||||
"max_retries": LLM_MAX_RETRIES,
|
||||
}
|
||||
)
|
||||
elif provider == "anthropic":
|
||||
return ChatAnthropic(
|
||||
api_key=config["api_key"],
|
||||
|
|
@ -289,8 +295,6 @@ def initialize_llm(
|
|||
return create_llm_client(provider, model_name, temperature, is_expert=False)
|
||||
|
||||
|
||||
def initialize_expert_llm(
|
||||
provider: str, model_name: str
|
||||
) -> BaseChatModel:
|
||||
def initialize_expert_llm(provider: str, model_name: str) -> BaseChatModel:
|
||||
"""Initialize an expert language model client based on the specified provider and model."""
|
||||
return create_llm_client(provider, model_name, temperature=None, is_expert=True)
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -9,47 +9,50 @@ The interface remains compatible with external callers expecting a tuple (output
|
|||
where output is a bytes object (UTF-8 encoded).
|
||||
"""
|
||||
|
||||
import os
|
||||
import shlex
|
||||
import shutil
|
||||
import errno
|
||||
import sys
|
||||
import io
|
||||
import subprocess
|
||||
import os
|
||||
import select
|
||||
import termios
|
||||
import tty
|
||||
import time
|
||||
import shutil
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import termios
|
||||
import time
|
||||
import tty
|
||||
from typing import List, Tuple
|
||||
|
||||
import pyte
|
||||
from pyte.screens import HistoryScreen
|
||||
|
||||
|
||||
def render_line(line, columns: int) -> str:
|
||||
"""Render a single screen line from the pyte buffer (a mapping of column to Char)."""
|
||||
return "".join(line[x].data for x in range(columns))
|
||||
|
||||
def run_interactive_command(cmd: List[str], expected_runtime_seconds: int = 30) -> Tuple[bytes, int]:
|
||||
|
||||
def run_interactive_command(
|
||||
cmd: List[str], expected_runtime_seconds: int = 30
|
||||
) -> Tuple[bytes, int]:
|
||||
"""
|
||||
Runs an interactive command with a pseudo-tty, capturing final scrollback history.
|
||||
|
||||
|
||||
Assumptions and constraints:
|
||||
- Running on a Linux system.
|
||||
- `cmd` is a non-empty list where cmd[0] is the executable.
|
||||
- The executable is on PATH.
|
||||
|
||||
|
||||
Args:
|
||||
cmd: A list containing the command and its arguments.
|
||||
expected_runtime_seconds: Expected runtime in seconds, defaults to 30.
|
||||
If process exceeds 2x this value, it will be terminated gracefully.
|
||||
If process exceeds 3x this value, it will be killed forcefully.
|
||||
Must be between 1 and 1800 seconds (30 minutes).
|
||||
|
||||
|
||||
Returns:
|
||||
A tuple of (captured_output, return_code), where captured_output is a UTF-8 encoded
|
||||
bytes object containing the trimmed non-empty history lines from the terminal session.
|
||||
|
||||
|
||||
Raises:
|
||||
ValueError: If no command is provided.
|
||||
FileNotFoundError: If the command is not found in PATH.
|
||||
|
|
@ -61,8 +64,10 @@ def run_interactive_command(cmd: List[str], expected_runtime_seconds: int = 30)
|
|||
if shutil.which(cmd[0]) is None:
|
||||
raise FileNotFoundError(f"Command '{cmd[0]}' not found in PATH.")
|
||||
if expected_runtime_seconds <= 0 or expected_runtime_seconds > 1800:
|
||||
raise ValueError("expected_runtime_seconds must be between 1 and 1800 seconds (30 minutes)")
|
||||
|
||||
raise ValueError(
|
||||
"expected_runtime_seconds must be between 1 and 1800 seconds (30 minutes)"
|
||||
)
|
||||
|
||||
try:
|
||||
term_size = os.get_terminal_size()
|
||||
cols, rows = term_size.columns, term_size.lines
|
||||
|
|
@ -85,20 +90,22 @@ def run_interactive_command(cmd: List[str], expected_runtime_seconds: int = 30)
|
|||
|
||||
# Set up environment variables for the subprocess using detected terminal size.
|
||||
env = os.environ.copy()
|
||||
env.update({
|
||||
'DEBIAN_FRONTEND': 'noninteractive',
|
||||
'GIT_PAGER': '',
|
||||
'PYTHONUNBUFFERED': '1',
|
||||
'CI': 'true',
|
||||
'LANG': 'C.UTF-8',
|
||||
'LC_ALL': 'C.UTF-8',
|
||||
'COLUMNS': str(cols),
|
||||
'LINES': str(rows),
|
||||
'FORCE_COLOR': '1',
|
||||
'GIT_TERMINAL_PROMPT': '0',
|
||||
'PYTHONDONTWRITEBYTECODE': '1',
|
||||
'NODE_OPTIONS': '--unhandled-rejections=strict'
|
||||
})
|
||||
env.update(
|
||||
{
|
||||
"DEBIAN_FRONTEND": "noninteractive",
|
||||
"GIT_PAGER": "",
|
||||
"PYTHONUNBUFFERED": "1",
|
||||
"CI": "true",
|
||||
"LANG": "C.UTF-8",
|
||||
"LC_ALL": "C.UTF-8",
|
||||
"COLUMNS": str(cols),
|
||||
"LINES": str(rows),
|
||||
"FORCE_COLOR": "1",
|
||||
"GIT_TERMINAL_PROMPT": "0",
|
||||
"PYTHONDONTWRITEBYTECODE": "1",
|
||||
"NODE_OPTIONS": "--unhandled-rejections=strict",
|
||||
}
|
||||
)
|
||||
|
||||
proc = subprocess.Popen(
|
||||
cmd,
|
||||
|
|
@ -108,7 +115,7 @@ def run_interactive_command(cmd: List[str], expected_runtime_seconds: int = 30)
|
|||
bufsize=0,
|
||||
close_fds=True,
|
||||
env=env,
|
||||
preexec_fn=os.setsid # Create new process group for proper signal handling.
|
||||
preexec_fn=os.setsid, # Create new process group for proper signal handling.
|
||||
)
|
||||
os.close(slave_fd) # Close slave end in the parent process.
|
||||
|
||||
|
|
@ -200,19 +207,21 @@ def run_interactive_command(cmd: List[str], expected_runtime_seconds: int = 30)
|
|||
# Trim out empty lines to get only meaningful "history" lines.
|
||||
trimmed_lines = [line for line in all_lines if line.strip()]
|
||||
final_output = "\n".join(trimmed_lines)
|
||||
|
||||
|
||||
# Add timeout message if process was terminated due to timeout.
|
||||
if was_terminated:
|
||||
timeout_msg = f"\n[Process exceeded timeout ({expected_runtime_seconds} seconds expected)]"
|
||||
final_output += timeout_msg
|
||||
|
||||
|
||||
# Limit output to the last 8000 bytes.
|
||||
final_output = final_output[-8000:]
|
||||
|
||||
|
||||
return final_output.encode("utf-8"), proc.returncode
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: interactive.py <command> [args...]")
|
||||
sys.exit(1)
|
||||
|
|
|
|||
|
|
@ -47,9 +47,10 @@ class OpenAIStrategy(ProviderStrategy):
|
|||
if not key:
|
||||
missing.append("EXPERT_OPENAI_API_KEY environment variable is not set")
|
||||
|
||||
# Handle expert model selection if none specified
|
||||
# Handle expert model selection if none specified
|
||||
if hasattr(args, "expert_model") and not args.expert_model:
|
||||
from ra_aid.llm import select_expert_model
|
||||
|
||||
model = select_expert_model("openai")
|
||||
if model:
|
||||
args.expert_model = model
|
||||
|
|
|
|||
|
|
@ -1,25 +1,19 @@
|
|||
from ra_aid.tools import (
|
||||
ask_expert,
|
||||
ask_human,
|
||||
delete_key_facts,
|
||||
delete_key_snippets,
|
||||
deregister_related_files,
|
||||
emit_expert_context,
|
||||
emit_key_facts,
|
||||
emit_key_snippets,
|
||||
emit_plan,
|
||||
emit_related_files,
|
||||
emit_research_notes,
|
||||
fuzzy_find_project_files,
|
||||
list_directory_tree,
|
||||
monorepo_detected,
|
||||
plan_implementation_completed,
|
||||
read_file_tool,
|
||||
ripgrep_search,
|
||||
run_programming_task,
|
||||
run_shell_command,
|
||||
task_completed,
|
||||
ui_detected,
|
||||
web_search_tavily,
|
||||
)
|
||||
from ra_aid.tools.agent import (
|
||||
|
|
@ -30,7 +24,6 @@ from ra_aid.tools.agent import (
|
|||
request_web_research,
|
||||
)
|
||||
from ra_aid.tools.memory import one_shot_completed
|
||||
from ra_aid.tools.write_file import put_complete_file_contents
|
||||
|
||||
|
||||
# Read-only tools that don't modify system state
|
||||
|
|
@ -73,7 +66,9 @@ def get_read_only_tools(
|
|||
# Define constant tool groups
|
||||
READ_ONLY_TOOLS = get_read_only_tools()
|
||||
# MODIFICATION_TOOLS = [run_programming_task, put_complete_file_contents]
|
||||
MODIFICATION_TOOLS = [run_programming_task] # having put_complete_file_contents causes trouble :(
|
||||
MODIFICATION_TOOLS = [
|
||||
run_programming_task
|
||||
] # having put_complete_file_contents causes trouble :(
|
||||
COMMON_TOOLS = get_read_only_tools()
|
||||
EXPERT_TOOLS = [emit_expert_context, ask_expert]
|
||||
RESEARCH_TOOLS = [
|
||||
|
|
|
|||
|
|
@ -275,6 +275,7 @@ def request_task_implementation(task_spec: str) -> Dict[str, Any]:
|
|||
print_task_header(task_spec)
|
||||
# Run implementation agent
|
||||
from ..agent_utils import run_task_implementation_agent
|
||||
|
||||
_global_memory["completion_message"] = ""
|
||||
|
||||
_result = run_task_implementation_agent(
|
||||
|
|
@ -345,6 +346,7 @@ def request_implementation(task_spec: str) -> Dict[str, Any]:
|
|||
try:
|
||||
# Run planning agent
|
||||
from ..agent_utils import run_planning_agent
|
||||
|
||||
_global_memory["completion_message"] = ""
|
||||
|
||||
_result = run_planning_agent(
|
||||
|
|
|
|||
|
|
@ -185,7 +185,11 @@ def ask_expert(question: str) -> str:
|
|||
|
||||
query_parts.extend(["# Question", question])
|
||||
query_parts.extend(
|
||||
["\n # Addidional Requirements", "**DO NOT OVERTHINK**", "**DO NOT OVERCOMPLICATE**"]
|
||||
[
|
||||
"\n # Addidional Requirements",
|
||||
"**DO NOT OVERTHINK**",
|
||||
"**DO NOT OVERCOMPLICATE**",
|
||||
]
|
||||
)
|
||||
|
||||
# Join all parts
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ from rich.console import Console
|
|||
from rich.markdown import Markdown
|
||||
from rich.panel import Panel
|
||||
|
||||
from ra_aid.config import DEFAULT_TEST_CMD_TIMEOUT
|
||||
from ra_aid.logging_config import get_logger
|
||||
from ra_aid.tools.human import ask_human
|
||||
from ra_aid.tools.shell import run_shell_command
|
||||
|
|
@ -85,7 +86,7 @@ class TestCommandExecutor:
|
|||
cmd: Test command to execute
|
||||
original_prompt: Original prompt text
|
||||
"""
|
||||
timeout = self.config.get("timeout", 30)
|
||||
timeout = self.config.get("test_cmd_timeout", DEFAULT_TEST_CMD_TIMEOUT)
|
||||
try:
|
||||
logger.info(f"Executing test command: {cmd} with timeout {timeout}s")
|
||||
test_result = run_shell_command(cmd, timeout=timeout)
|
||||
|
|
@ -99,11 +100,11 @@ class TestCommandExecutor:
|
|||
logger.info("Test command executed successfully")
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.warning(f"Test command timed out after {timeout}s: {cmd}")
|
||||
self.state.test_attempts += 1
|
||||
self.state.prompt = (
|
||||
f"{original_prompt}. Previous attempt timed out after {timeout} seconds"
|
||||
logger.warning(
|
||||
f"Test command timed out after {DEFAULT_TEST_CMD_TIMEOUT}s: {cmd}"
|
||||
)
|
||||
self.state.test_attempts += 1
|
||||
self.state.prompt = f"{original_prompt}. Previous attempt timed out after {DEFAULT_TEST_CMD_TIMEOUT} seconds"
|
||||
self.display_test_failure()
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import os
|
||||
from typing import Any, Dict, List, Optional, Set, Union
|
||||
from typing import Dict, List, Optional, Set, Union
|
||||
|
||||
from langchain_core.tools import tool
|
||||
from rich.console import Console
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
import os
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Union
|
||||
|
|
@ -22,25 +21,25 @@ logger = get_logger(__name__)
|
|||
|
||||
def get_aider_executable() -> str:
|
||||
"""Get the path to the aider executable in the same bin/Scripts directory as Python.
|
||||
|
||||
|
||||
Returns:
|
||||
str: Full path to aider executable
|
||||
"""
|
||||
# Get directory containing Python executable
|
||||
# Get directory containing Python executable
|
||||
bin_dir = Path(sys.executable).parent
|
||||
|
||||
|
||||
# Check for platform-specific executable name
|
||||
if sys.platform == "win32":
|
||||
aider_exe = bin_dir / "aider.exe"
|
||||
else:
|
||||
aider_exe = bin_dir / "aider"
|
||||
|
||||
|
||||
if not aider_exe.exists():
|
||||
raise RuntimeError(f"Could not find aider executable at {aider_exe}")
|
||||
|
||||
|
||||
if not os.access(aider_exe, os.X_OK):
|
||||
raise RuntimeError(f"Aider executable at {aider_exe} is not executable")
|
||||
|
||||
|
||||
return str(aider_exe)
|
||||
|
||||
|
||||
|
|
@ -91,10 +90,14 @@ def run_programming_task(
|
|||
|
||||
# Get combined list of files (explicit + related) with normalized paths
|
||||
# and deduplicated using set operations
|
||||
files_to_use = list({os.path.abspath(f) for f in (files or [])} | {
|
||||
os.path.abspath(f) for f in _global_memory["related_files"].values()
|
||||
if "related_files" in _global_memory
|
||||
})
|
||||
files_to_use = list(
|
||||
{os.path.abspath(f) for f in (files or [])}
|
||||
| {
|
||||
os.path.abspath(f)
|
||||
for f in _global_memory["related_files"].values()
|
||||
if "related_files" in _global_memory
|
||||
}
|
||||
)
|
||||
|
||||
# Add config file if specified
|
||||
if "config" in _global_memory and _global_memory["config"].get("aider_config"):
|
||||
|
|
@ -146,7 +149,7 @@ def run_programming_task(
|
|||
|
||||
# Log the programming task
|
||||
log_work_event(f"Executed programming task: {_truncate_for_log(instructions)}")
|
||||
|
||||
|
||||
# Return structured output
|
||||
return {
|
||||
"output": truncate_output(result[0].decode()) if result[0] else "",
|
||||
|
|
@ -190,21 +193,21 @@ def parse_aider_flags(aider_flags: str) -> List[str]:
|
|||
|
||||
# Split by comma and strip whitespace
|
||||
flag_groups = [group.strip() for group in aider_flags.split(",")]
|
||||
|
||||
|
||||
result = []
|
||||
for group in flag_groups:
|
||||
if not group:
|
||||
continue
|
||||
|
||||
|
||||
# Split by space to separate flag from value
|
||||
parts = group.split()
|
||||
|
||||
|
||||
# Add '--' prefix to the flag if not present, stripping any extra dashes
|
||||
flag = parts[0].lstrip("-") # Remove all leading dashes
|
||||
flag = f"--{flag}" # Add exactly two dashes
|
||||
|
||||
|
||||
result.append(flag)
|
||||
|
||||
|
||||
# Add any remaining parts as separate values
|
||||
if len(parts) > 1:
|
||||
result.extend(parts[1:])
|
||||
|
|
|
|||
|
|
@ -21,12 +21,14 @@ def _truncate_for_log(text: str, max_length: int = 300) -> str:
|
|||
|
||||
|
||||
@tool
|
||||
def run_shell_command(command: str, expected_runtime_seconds: int = 30) -> Dict[str, Union[str, int, bool]]:
|
||||
def run_shell_command(
|
||||
command: str, timeout: int = 30
|
||||
) -> Dict[str, Union[str, int, bool]]:
|
||||
"""Execute a shell command and return its output.
|
||||
|
||||
Args:
|
||||
command: The shell command to execute
|
||||
expected_runtime_seconds: Expected runtime in seconds, defaults to 30.
|
||||
timeout: Expected runtime in seconds, defaults to 30.
|
||||
If process exceeds 2x this value, it will be terminated gracefully.
|
||||
If process exceeds 3x this value, it will be killed forcefully.
|
||||
|
||||
|
|
@ -79,7 +81,9 @@ def run_shell_command(command: str, expected_runtime_seconds: int = 30) -> Dict[
|
|||
|
||||
try:
|
||||
print()
|
||||
output, return_code = run_interactive_command(["/bin/bash", "-c", command], expected_runtime_seconds=expected_runtime_seconds)
|
||||
output, return_code = run_interactive_command(
|
||||
["/bin/bash", "-c", command], expected_runtime_seconds=timeout
|
||||
)
|
||||
print()
|
||||
result = {
|
||||
"output": truncate_output(output.decode()) if output else "",
|
||||
|
|
|
|||
|
|
@ -12,7 +12,10 @@ console = Console()
|
|||
|
||||
@tool
|
||||
def put_complete_file_contents(
|
||||
filepath: str, complete_file_contents: str = "", encoding: str = "utf-8", verbose: bool = True
|
||||
filepath: str,
|
||||
complete_file_contents: str = "",
|
||||
encoding: str = "utf-8",
|
||||
verbose: bool = True,
|
||||
) -> Dict[str, any]:
|
||||
"""Write the complete contents of a file, creating it if it doesn't exist.
|
||||
This tool is specifically for writing the entire contents of a file at once,
|
||||
|
|
@ -22,7 +25,7 @@ def put_complete_file_contents(
|
|||
|
||||
Args:
|
||||
filepath: (Required) Path to the file to write. Must be provided.
|
||||
complete_file_contents: Complete string content to write to the file. Defaults to
|
||||
complete_file_contents: Complete string content to write to the file. Defaults to
|
||||
an empty string, which will create an empty file.
|
||||
encoding: File encoding to use (default: utf-8)
|
||||
verbose: Whether to display a Rich panel with write statistics (default: True)
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
"""Tests for the interactive subprocess module."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
import pytest
|
||||
|
|
@ -49,7 +48,7 @@ def test_empty_command():
|
|||
|
||||
def test_interactive_command():
|
||||
"""Test running an interactive command.
|
||||
|
||||
|
||||
This test verifies that output appears in real-time using process substitution.
|
||||
We use a command that prints to both stdout and stderr.
|
||||
"""
|
||||
|
|
@ -70,7 +69,9 @@ def test_large_output():
|
|||
# Clean up any leading artifacts
|
||||
output_cleaned = output.lstrip(b"^D")
|
||||
# Verify the output size is limited to 8000 bytes
|
||||
assert len(output_cleaned) <= 8000, f"Output exceeded 8000 bytes: {len(output_cleaned)} bytes"
|
||||
assert (
|
||||
len(output_cleaned) <= 8000
|
||||
), f"Output exceeded 8000 bytes: {len(output_cleaned)} bytes"
|
||||
# Verify we have the last lines (should contain the highest numbers)
|
||||
assert b"Line 1000" in output_cleaned, "Missing last line of output"
|
||||
assert retcode == 0
|
||||
|
|
@ -83,17 +84,19 @@ def test_byte_limit():
|
|||
cmd = 'for i in {1..200}; do printf "%04d: %s\\n" "$i" "This is a line with padding to ensure we go over the byte limit quickly"; done'
|
||||
output, retcode = run_interactive_command(["/bin/bash", "-c", cmd])
|
||||
output_cleaned = output.lstrip(b"^D")
|
||||
|
||||
|
||||
# Verify exact 8000 byte limit
|
||||
assert len(output_cleaned) <= 8000, f"Output exceeded 8000 bytes: {len(output_cleaned)} bytes"
|
||||
|
||||
assert (
|
||||
len(output_cleaned) <= 8000
|
||||
), f"Output exceeded 8000 bytes: {len(output_cleaned)} bytes"
|
||||
|
||||
# Get the last line number from the output
|
||||
last_line = output_cleaned.splitlines()[-1]
|
||||
last_num = int(last_line.split(b':')[0])
|
||||
|
||||
last_num = int(last_line.split(b":")[0])
|
||||
|
||||
# Verify we have a high number in the last line (should be near 200)
|
||||
assert last_num > 150, f"Expected last line number to be near 200, got {last_num}"
|
||||
|
||||
|
||||
assert retcode == 0
|
||||
|
||||
|
||||
|
|
@ -134,16 +137,20 @@ def test_cat_medium_file():
|
|||
for line in output_cleaned.splitlines()
|
||||
if b"Script" not in line and line.strip()
|
||||
]
|
||||
|
||||
|
||||
# With 8000 byte limit, we expect to see the last portion of lines
|
||||
# The exact number may vary due to terminal settings, but we should
|
||||
# at least have the last lines of the file
|
||||
assert len(lines) >= 90, f"Expected at least 90 lines due to 8000 byte limit, got {len(lines)}"
|
||||
|
||||
assert (
|
||||
len(lines) >= 90
|
||||
), f"Expected at least 90 lines due to 8000 byte limit, got {len(lines)}"
|
||||
|
||||
# Most importantly, verify we have the last lines
|
||||
last_line = lines[-1].decode('utf-8')
|
||||
assert "This is test line 499" in last_line, f"Expected last line to be 499, got: {last_line}"
|
||||
|
||||
last_line = lines[-1].decode("utf-8")
|
||||
assert (
|
||||
"This is test line 499" in last_line
|
||||
), f"Expected last line to be 499, got: {last_line}"
|
||||
|
||||
assert retcode == 0
|
||||
finally:
|
||||
os.unlink(temp_path)
|
||||
|
|
@ -155,9 +162,7 @@ def test_realtime_output():
|
|||
cmd = "echo 'first'; sleep 0.1; echo 'second'; sleep 0.1; echo 'third'"
|
||||
output, retcode = run_interactive_command(["/bin/bash", "-c", cmd])
|
||||
lines = [
|
||||
line
|
||||
for line in output.splitlines()
|
||||
if b"Script" not in line and line.strip()
|
||||
line for line in output.splitlines() if b"Script" not in line and line.strip()
|
||||
]
|
||||
assert b"first" in lines[0]
|
||||
assert b"second" in lines[1]
|
||||
|
|
|
|||
|
|
@ -57,7 +57,13 @@ def test_initialize_expert_defaults(clean_env, mock_openai, monkeypatch):
|
|||
monkeypatch.setenv("EXPERT_OPENAI_API_KEY", "test-key")
|
||||
_llm = initialize_expert_llm("openai", "o1")
|
||||
|
||||
mock_openai.assert_called_once_with(api_key="test-key", model="o1", reasoning_effort="high", timeout=180, max_retries=5)
|
||||
mock_openai.assert_called_once_with(
|
||||
api_key="test-key",
|
||||
model="o1",
|
||||
reasoning_effort="high",
|
||||
timeout=180,
|
||||
max_retries=5,
|
||||
)
|
||||
|
||||
|
||||
def test_initialize_expert_openai_custom(clean_env, mock_openai, monkeypatch):
|
||||
|
|
@ -66,8 +72,12 @@ def test_initialize_expert_openai_custom(clean_env, mock_openai, monkeypatch):
|
|||
_llm = initialize_expert_llm("openai", "gpt-4-preview")
|
||||
|
||||
mock_openai.assert_called_once_with(
|
||||
api_key="test-key", model="gpt-4-preview", temperature=0, reasoning_effort="high",
|
||||
timeout=180, max_retries=5
|
||||
api_key="test-key",
|
||||
model="gpt-4-preview",
|
||||
temperature=0,
|
||||
reasoning_effort="high",
|
||||
timeout=180,
|
||||
max_retries=5,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -77,8 +87,11 @@ def test_initialize_expert_gemini(clean_env, mock_gemini, monkeypatch):
|
|||
_llm = initialize_expert_llm("gemini", "gemini-2.0-flash-thinking-exp-1219")
|
||||
|
||||
mock_gemini.assert_called_once_with(
|
||||
api_key="test-key", model="gemini-2.0-flash-thinking-exp-1219", temperature=0,
|
||||
timeout=180, max_retries=5
|
||||
api_key="test-key",
|
||||
model="gemini-2.0-flash-thinking-exp-1219",
|
||||
temperature=0,
|
||||
timeout=180,
|
||||
max_retries=5,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -88,8 +101,11 @@ def test_initialize_expert_anthropic(clean_env, mock_anthropic, monkeypatch):
|
|||
_llm = initialize_expert_llm("anthropic", "claude-3")
|
||||
|
||||
mock_anthropic.assert_called_once_with(
|
||||
api_key="test-key", model_name="claude-3", temperature=0,
|
||||
timeout=180, max_retries=5
|
||||
api_key="test-key",
|
||||
model_name="claude-3",
|
||||
temperature=0,
|
||||
timeout=180,
|
||||
max_retries=5,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -104,7 +120,7 @@ def test_initialize_expert_openrouter(clean_env, mock_openai, monkeypatch):
|
|||
model="models/mistral-large",
|
||||
temperature=0,
|
||||
timeout=180,
|
||||
max_retries=5
|
||||
max_retries=5,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -120,7 +136,7 @@ def test_initialize_expert_openai_compatible(clean_env, mock_openai, monkeypatch
|
|||
model="local-model",
|
||||
temperature=0,
|
||||
timeout=180,
|
||||
max_retries=5
|
||||
max_retries=5,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -151,16 +167,24 @@ def test_initialize_openai(clean_env, mock_openai):
|
|||
os.environ["OPENAI_API_KEY"] = "test-key"
|
||||
_model = initialize_llm("openai", "gpt-4", temperature=0.7)
|
||||
|
||||
mock_openai.assert_called_once_with(api_key="test-key", model="gpt-4", temperature=0.7, timeout=180, max_retries=5)
|
||||
mock_openai.assert_called_once_with(
|
||||
api_key="test-key", model="gpt-4", temperature=0.7, timeout=180, max_retries=5
|
||||
)
|
||||
|
||||
|
||||
def test_initialize_gemini(clean_env, mock_gemini):
|
||||
"""Test Gemini provider initialization"""
|
||||
os.environ["GEMINI_API_KEY"] = "test-key"
|
||||
_model = initialize_llm("gemini", "gemini-2.0-flash-thinking-exp-1219", temperature=0.7)
|
||||
_model = initialize_llm(
|
||||
"gemini", "gemini-2.0-flash-thinking-exp-1219", temperature=0.7
|
||||
)
|
||||
|
||||
mock_gemini.assert_called_with(
|
||||
api_key="test-key", model="gemini-2.0-flash-thinking-exp-1219", temperature=0.7, timeout=180, max_retries=5
|
||||
api_key="test-key",
|
||||
model="gemini-2.0-flash-thinking-exp-1219",
|
||||
temperature=0.7,
|
||||
timeout=180,
|
||||
max_retries=5,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -169,7 +193,13 @@ def test_initialize_anthropic(clean_env, mock_anthropic):
|
|||
os.environ["ANTHROPIC_API_KEY"] = "test-key"
|
||||
_model = initialize_llm("anthropic", "claude-3", temperature=0.7)
|
||||
|
||||
mock_anthropic.assert_called_with(api_key="test-key", model_name="claude-3", temperature=0.7, timeout=180, max_retries=5)
|
||||
mock_anthropic.assert_called_with(
|
||||
api_key="test-key",
|
||||
model_name="claude-3",
|
||||
temperature=0.7,
|
||||
timeout=180,
|
||||
max_retries=5,
|
||||
)
|
||||
|
||||
|
||||
def test_initialize_openrouter(clean_env, mock_openai):
|
||||
|
|
@ -239,10 +269,22 @@ def test_temperature_defaults(clean_env, mock_openai, mock_anthropic, mock_gemin
|
|||
|
||||
# Test expert models don't require temperature
|
||||
initialize_expert_llm("openai", "o1")
|
||||
mock_openai.assert_called_with(api_key="test-key", model="o1", reasoning_effort="high", timeout=180, max_retries=5)
|
||||
mock_openai.assert_called_with(
|
||||
api_key="test-key",
|
||||
model="o1",
|
||||
reasoning_effort="high",
|
||||
timeout=180,
|
||||
max_retries=5,
|
||||
)
|
||||
|
||||
initialize_expert_llm("openai", "o1-mini")
|
||||
mock_openai.assert_called_with(api_key="test-key", model="o1-mini", reasoning_effort="high", timeout=180, max_retries=5)
|
||||
mock_openai.assert_called_with(
|
||||
api_key="test-key",
|
||||
model="o1-mini",
|
||||
reasoning_effort="high",
|
||||
timeout=180,
|
||||
max_retries=5,
|
||||
)
|
||||
|
||||
|
||||
def test_explicit_temperature(clean_env, mock_openai, mock_anthropic, mock_gemini):
|
||||
|
|
@ -257,19 +299,31 @@ def test_explicit_temperature(clean_env, mock_openai, mock_anthropic, mock_gemin
|
|||
# Test OpenAI
|
||||
initialize_llm("openai", "test-model", temperature=test_temp)
|
||||
mock_openai.assert_called_with(
|
||||
api_key="test-key", model="test-model", temperature=test_temp, timeout=180, max_retries=5
|
||||
api_key="test-key",
|
||||
model="test-model",
|
||||
temperature=test_temp,
|
||||
timeout=180,
|
||||
max_retries=5,
|
||||
)
|
||||
|
||||
# Test Gemini
|
||||
initialize_llm("gemini", "test-model", temperature=test_temp)
|
||||
mock_gemini.assert_called_with(
|
||||
api_key="test-key", model="test-model", temperature=test_temp, timeout=180, max_retries=5
|
||||
api_key="test-key",
|
||||
model="test-model",
|
||||
temperature=test_temp,
|
||||
timeout=180,
|
||||
max_retries=5,
|
||||
)
|
||||
|
||||
# Test Anthropic
|
||||
initialize_llm("anthropic", "test-model", temperature=test_temp)
|
||||
mock_anthropic.assert_called_with(
|
||||
api_key="test-key", model_name="test-model", temperature=test_temp, timeout=180, max_retries=5
|
||||
api_key="test-key",
|
||||
model_name="test-model",
|
||||
temperature=test_temp,
|
||||
timeout=180,
|
||||
max_retries=5,
|
||||
)
|
||||
|
||||
# Test OpenRouter
|
||||
|
|
@ -290,13 +344,14 @@ def test_get_available_openai_models_success():
|
|||
mock_model.id = "gpt-4"
|
||||
mock_models = Mock()
|
||||
mock_models.data = [mock_model]
|
||||
|
||||
|
||||
with mock.patch("ra_aid.llm.OpenAI") as mock_client:
|
||||
mock_client.return_value.models.list.return_value = mock_models
|
||||
models = get_available_openai_models()
|
||||
assert models == ["gpt-4"]
|
||||
mock_client.return_value.models.list.assert_called_once()
|
||||
|
||||
|
||||
def test_get_available_openai_models_failure():
|
||||
"""Test graceful handling of model retrieval failure."""
|
||||
with mock.patch("ra_aid.llm.OpenAI") as mock_client:
|
||||
|
|
@ -305,32 +360,41 @@ def test_get_available_openai_models_failure():
|
|||
assert models == []
|
||||
mock_client.return_value.models.list.assert_called_once()
|
||||
|
||||
|
||||
def test_select_expert_model_explicit():
|
||||
"""Test model selection with explicitly specified model."""
|
||||
model = select_expert_model("openai", "gpt-4")
|
||||
assert model == "gpt-4"
|
||||
|
||||
|
||||
def test_select_expert_model_non_openai():
|
||||
"""Test model selection for non-OpenAI provider."""
|
||||
model = select_expert_model("anthropic", None)
|
||||
assert model is None
|
||||
|
||||
|
||||
def test_select_expert_model_priority():
|
||||
"""Test model selection follows priority order."""
|
||||
available_models = ["gpt-4", "o1", "o3-mini"]
|
||||
|
||||
with mock.patch("ra_aid.llm.get_available_openai_models", return_value=available_models):
|
||||
|
||||
with mock.patch(
|
||||
"ra_aid.llm.get_available_openai_models", return_value=available_models
|
||||
):
|
||||
model = select_expert_model("openai")
|
||||
assert model == "o3-mini"
|
||||
|
||||
|
||||
def test_select_expert_model_no_match():
|
||||
"""Test model selection when no priority models available."""
|
||||
available_models = ["gpt-4", "gpt-3.5"]
|
||||
|
||||
with mock.patch("ra_aid.llm.get_available_openai_models", return_value=available_models):
|
||||
|
||||
with mock.patch(
|
||||
"ra_aid.llm.get_available_openai_models", return_value=available_models
|
||||
):
|
||||
model = select_expert_model("openai")
|
||||
assert model is None
|
||||
|
||||
|
||||
def test_temperature_validation(clean_env, mock_openai):
|
||||
"""Test temperature validation in command line arguments."""
|
||||
from ra_aid.__main__ import parse_arguments
|
||||
|
|
@ -358,34 +422,49 @@ def test_provider_name_validation():
|
|||
initialize_llm(provider, "test-model", temperature=0.7)
|
||||
except ValueError as e:
|
||||
if "Temperature must be provided" not in str(e):
|
||||
pytest.fail(f"Valid provider {provider} raised unexpected ValueError: {e}")
|
||||
pytest.fail(
|
||||
f"Valid provider {provider} raised unexpected ValueError: {e}"
|
||||
)
|
||||
|
||||
|
||||
def test_initialize_llm_cross_provider(clean_env, mock_openai, mock_anthropic, mock_gemini, monkeypatch):
|
||||
def test_initialize_llm_cross_provider(
|
||||
clean_env, mock_openai, mock_anthropic, mock_gemini, monkeypatch
|
||||
):
|
||||
"""Test initializing different providers in sequence."""
|
||||
# Initialize OpenAI
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "openai-key")
|
||||
_llm1 = initialize_llm("openai", "gpt-4", temperature=0.7)
|
||||
mock_openai.assert_called_with(api_key="openai-key", model="gpt-4", temperature=0.7, timeout=180, max_retries=5)
|
||||
mock_openai.assert_called_with(
|
||||
api_key="openai-key", model="gpt-4", temperature=0.7, timeout=180, max_retries=5
|
||||
)
|
||||
|
||||
# Initialize Anthropic
|
||||
monkeypatch.setenv("ANTHROPIC_API_KEY", "anthropic-key")
|
||||
_llm2 = initialize_llm("anthropic", "claude-3", temperature=0.7)
|
||||
mock_anthropic.assert_called_with(
|
||||
api_key="anthropic-key", model_name="claude-3", temperature=0.7, timeout=180, max_retries=5
|
||||
api_key="anthropic-key",
|
||||
model_name="claude-3",
|
||||
temperature=0.7,
|
||||
timeout=180,
|
||||
max_retries=5,
|
||||
)
|
||||
|
||||
# Initialize Gemini
|
||||
monkeypatch.setenv("GEMINI_API_KEY", "gemini-key")
|
||||
_llm3 = initialize_llm("gemini", "gemini-pro", temperature=0.7)
|
||||
mock_gemini.assert_called_with(
|
||||
api_key="gemini-key", model="gemini-pro", temperature=0.7, timeout=180, max_retries=5
|
||||
api_key="gemini-key",
|
||||
model="gemini-pro",
|
||||
temperature=0.7,
|
||||
timeout=180,
|
||||
max_retries=5,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Args:
|
||||
"""Test arguments class."""
|
||||
|
||||
provider: str
|
||||
expert_provider: str
|
||||
model: str = None
|
||||
|
|
@ -412,7 +491,13 @@ def test_environment_variable_precedence(clean_env, mock_openai, monkeypatch):
|
|||
|
||||
# Test LLM client creation with expert mode
|
||||
_llm = create_llm_client("openai", "o1", is_expert=True)
|
||||
mock_openai.assert_called_with(api_key="expert-key", model="o1", reasoning_effort="high", timeout=180, max_retries=5)
|
||||
mock_openai.assert_called_with(
|
||||
api_key="expert-key",
|
||||
model="o1",
|
||||
reasoning_effort="high",
|
||||
timeout=180,
|
||||
max_retries=5,
|
||||
)
|
||||
|
||||
# Test environment validation
|
||||
monkeypatch.setenv("EXPERT_OPENAI_API_KEY", "")
|
||||
|
|
@ -459,7 +544,9 @@ def mock_deepseek_reasoner():
|
|||
yield mock
|
||||
|
||||
|
||||
def test_initialize_deepseek(clean_env, mock_openai, mock_deepseek_reasoner, monkeypatch):
|
||||
def test_initialize_deepseek(
|
||||
clean_env, mock_openai, mock_deepseek_reasoner, monkeypatch
|
||||
):
|
||||
"""Test DeepSeek provider initialization with different models."""
|
||||
monkeypatch.setenv("DEEPSEEK_API_KEY", "test-key")
|
||||
|
||||
|
|
@ -486,7 +573,9 @@ def test_initialize_deepseek(clean_env, mock_openai, mock_deepseek_reasoner, mon
|
|||
)
|
||||
|
||||
|
||||
def test_initialize_openrouter_deepseek(clean_env, mock_openai, mock_deepseek_reasoner, monkeypatch):
|
||||
def test_initialize_openrouter_deepseek(
|
||||
clean_env, mock_openai, mock_deepseek_reasoner, monkeypatch
|
||||
):
|
||||
"""Test OpenRouter DeepSeek model initialization."""
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "test-key")
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
import pytest
|
||||
from pathlib import Path
|
||||
from langchain_core.tools import Tool
|
||||
|
||||
from ra_aid.tools.programmer import parse_aider_flags, run_programming_task, get_aider_executable
|
||||
from ra_aid.tools.programmer import (
|
||||
get_aider_executable,
|
||||
parse_aider_flags,
|
||||
run_programming_task,
|
||||
)
|
||||
|
||||
# Test cases for parse_aider_flags function
|
||||
test_cases = [
|
||||
|
|
@ -103,18 +105,24 @@ def test_path_normalization_and_deduplication(mocker, tmp_path):
|
|||
test_file = tmp_path / "test.py"
|
||||
test_file.write_text("")
|
||||
new_file = tmp_path / "new.py"
|
||||
|
||||
|
||||
# Mock dependencies
|
||||
mocker.patch("ra_aid.tools.programmer._global_memory", {"related_files": {}})
|
||||
mocker.patch("ra_aid.tools.programmer.get_aider_executable", return_value="/path/to/aider")
|
||||
mock_run = mocker.patch("ra_aid.tools.programmer.run_interactive_command", return_value=(b"", 0))
|
||||
mocker.patch(
|
||||
"ra_aid.tools.programmer.get_aider_executable", return_value="/path/to/aider"
|
||||
)
|
||||
mock_run = mocker.patch(
|
||||
"ra_aid.tools.programmer.run_interactive_command", return_value=(b"", 0)
|
||||
)
|
||||
|
||||
# Test duplicate paths
|
||||
run_programming_task.invoke({
|
||||
"instructions": "test instruction",
|
||||
"files": [str(test_file), str(test_file)] # Same path twice
|
||||
})
|
||||
|
||||
run_programming_task.invoke(
|
||||
{
|
||||
"instructions": "test instruction",
|
||||
"files": [str(test_file), str(test_file)], # Same path twice
|
||||
}
|
||||
)
|
||||
|
||||
# Get the command list passed to run_interactive_command
|
||||
cmd_args = mock_run.call_args[0][0]
|
||||
# Count occurrences of test_file path in command
|
||||
|
|
@ -122,16 +130,22 @@ def test_path_normalization_and_deduplication(mocker, tmp_path):
|
|||
assert test_file_count == 1, "Expected exactly one instance of test_file path"
|
||||
|
||||
# Test mixed paths
|
||||
run_programming_task.invoke({
|
||||
"instructions": "test instruction",
|
||||
"files": [str(test_file), str(new_file)] # Two different paths
|
||||
})
|
||||
|
||||
run_programming_task.invoke(
|
||||
{
|
||||
"instructions": "test instruction",
|
||||
"files": [str(test_file), str(new_file)], # Two different paths
|
||||
}
|
||||
)
|
||||
|
||||
# Get the command list from the second call
|
||||
cmd_args = mock_run.call_args[0][0]
|
||||
# Verify both paths are present exactly once
|
||||
assert sum(1 for arg in cmd_args if arg == str(test_file)) == 1, "Expected one instance of test_file"
|
||||
assert sum(1 for arg in cmd_args if arg == str(new_file)) == 1, "Expected one instance of new_file"
|
||||
assert (
|
||||
sum(1 for arg in cmd_args if arg == str(test_file)) == 1
|
||||
), "Expected one instance of test_file"
|
||||
assert (
|
||||
sum(1 for arg in cmd_args if arg == str(new_file)) == 1
|
||||
), "Expected one instance of new_file"
|
||||
|
||||
|
||||
def test_get_aider_executable(mocker):
|
||||
|
|
@ -139,11 +153,11 @@ def test_get_aider_executable(mocker):
|
|||
mock_sys = mocker.patch("ra_aid.tools.programmer.sys")
|
||||
mock_path = mocker.patch("ra_aid.tools.programmer.Path")
|
||||
mock_os = mocker.patch("ra_aid.tools.programmer.os")
|
||||
|
||||
|
||||
# Mock sys.executable and platform
|
||||
mock_sys.executable = "/path/to/venv/bin/python"
|
||||
mock_sys.platform = "linux"
|
||||
|
||||
|
||||
# Mock Path().parent and exists()
|
||||
mock_path_instance = mocker.MagicMock()
|
||||
mock_path.return_value = mock_path_instance
|
||||
|
|
@ -152,26 +166,26 @@ def test_get_aider_executable(mocker):
|
|||
mock_aider = mocker.MagicMock()
|
||||
mock_parent.__truediv__.return_value = mock_aider
|
||||
mock_aider.exists.return_value = True
|
||||
|
||||
|
||||
# Mock os.access to return True
|
||||
mock_os.access.return_value = True
|
||||
mock_os.X_OK = 1 # Mock the execute permission constant
|
||||
|
||||
|
||||
# Test happy path on Linux
|
||||
aider_path = get_aider_executable()
|
||||
assert aider_path == str(mock_aider)
|
||||
mock_parent.__truediv__.assert_called_with("aider")
|
||||
|
||||
|
||||
# Test Windows path
|
||||
mock_sys.platform = "win32"
|
||||
aider_path = get_aider_executable()
|
||||
mock_parent.__truediv__.assert_called_with("aider.exe")
|
||||
|
||||
|
||||
# Test executable not found
|
||||
mock_aider.exists.return_value = False
|
||||
with pytest.raises(RuntimeError, match="Could not find aider executable"):
|
||||
get_aider_executable()
|
||||
|
||||
|
||||
# Test not executable
|
||||
mock_aider.exists.return_value = True
|
||||
mock_os.access.return_value = False
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ from unittest.mock import patch
|
|||
|
||||
import pytest
|
||||
|
||||
from ra_aid.config import DEFAULT_TEST_CMD_TIMEOUT
|
||||
from ra_aid.tools.handle_user_defined_test_cmd_execution import execute_test_command
|
||||
|
||||
# Test cases for execute_test_command
|
||||
|
|
@ -195,9 +196,10 @@ def test_execute_test_command(
|
|||
|
||||
if auto_test and test_attempts < config.get("max_test_cmd_retries", 5):
|
||||
if config.get("test_cmd"):
|
||||
# Verify run_shell_command called with command and default timeout
|
||||
# Verify run_shell_command called with command and configured timeout
|
||||
mock_run_cmd.assert_called_once_with(
|
||||
config["test_cmd"], timeout=config.get("timeout", 30)
|
||||
config["test_cmd"],
|
||||
timeout=config.get("test_cmd_timeout", DEFAULT_TEST_CMD_TIMEOUT),
|
||||
)
|
||||
|
||||
# Verify logging for max retries
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ from unittest.mock import Mock, patch
|
|||
|
||||
import pytest
|
||||
|
||||
from ra_aid.config import DEFAULT_TEST_CMD_TIMEOUT
|
||||
from ra_aid.tools.handle_user_defined_test_cmd_execution import (
|
||||
TestCommandExecutor,
|
||||
TestState,
|
||||
|
|
@ -92,8 +93,9 @@ def test_run_test_command_timeout(test_executor):
|
|||
"ra_aid.tools.handle_user_defined_test_cmd_execution.logger.warning"
|
||||
) as mock_logger,
|
||||
):
|
||||
# Create a TimeoutExpired exception
|
||||
timeout_exc = subprocess.TimeoutExpired(cmd="test", timeout=30)
|
||||
# Create a TimeoutExpired exception with configured timeout
|
||||
timeout = test_executor.config.get("test_cmd_timeout", DEFAULT_TEST_CMD_TIMEOUT)
|
||||
timeout_exc = subprocess.TimeoutExpired(cmd="test", timeout=timeout)
|
||||
mock_run.side_effect = timeout_exc
|
||||
|
||||
test_executor.run_test_command("test", "original")
|
||||
|
|
@ -101,7 +103,7 @@ def test_run_test_command_timeout(test_executor):
|
|||
# Verify state updates
|
||||
assert not test_executor.state.should_break
|
||||
assert test_executor.state.test_attempts == 1
|
||||
assert "timed out after 30 seconds" in test_executor.state.prompt
|
||||
assert f"timed out after {timeout} seconds" in test_executor.state.prompt
|
||||
|
||||
# Verify logging
|
||||
mock_logger.assert_called_once()
|
||||
|
|
|
|||
|
|
@ -472,6 +472,7 @@ def test_emit_related_files_path_normalization(reset_memory, tmp_path):
|
|||
|
||||
# Change to the temp directory so relative paths work
|
||||
import os
|
||||
|
||||
original_dir = os.getcwd()
|
||||
os.chdir(tmp_path)
|
||||
|
||||
|
|
|
|||
|
|
@ -19,7 +19,9 @@ def test_basic_write_functionality(temp_test_dir):
|
|||
test_file = temp_test_dir / "test.txt"
|
||||
content = "Hello, World!\nTest content"
|
||||
|
||||
result = put_complete_file_contents({"filepath": str(test_file), "complete_file_contents": content})
|
||||
result = put_complete_file_contents(
|
||||
{"filepath": str(test_file), "complete_file_contents": content}
|
||||
)
|
||||
|
||||
# Verify file contents
|
||||
assert test_file.read_text() == content
|
||||
|
|
@ -39,7 +41,9 @@ def test_directory_creation(temp_test_dir):
|
|||
test_file = nested_dir / "test.txt"
|
||||
content = "Test content"
|
||||
|
||||
result = put_complete_file_contents({"filepath": str(test_file), "complete_file_contents": content})
|
||||
result = put_complete_file_contents(
|
||||
{"filepath": str(test_file), "complete_file_contents": content}
|
||||
)
|
||||
|
||||
assert test_file.exists()
|
||||
assert test_file.read_text() == content
|
||||
|
|
@ -53,14 +57,22 @@ def test_different_encodings(temp_test_dir):
|
|||
|
||||
# Test UTF-8
|
||||
result_utf8 = put_complete_file_contents(
|
||||
{"filepath": str(test_file), "complete_file_contents": content, "encoding": "utf-8"}
|
||||
{
|
||||
"filepath": str(test_file),
|
||||
"complete_file_contents": content,
|
||||
"encoding": "utf-8",
|
||||
}
|
||||
)
|
||||
assert result_utf8["success"] is True
|
||||
assert test_file.read_text(encoding="utf-8") == content
|
||||
|
||||
# Test UTF-16
|
||||
result_utf16 = put_complete_file_contents(
|
||||
{"filepath": str(test_file), "complete_file_contents": content, "encoding": "utf-16"}
|
||||
{
|
||||
"filepath": str(test_file),
|
||||
"complete_file_contents": content,
|
||||
"encoding": "utf-16",
|
||||
}
|
||||
)
|
||||
assert result_utf16["success"] is True
|
||||
assert test_file.read_text(encoding="utf-16") == content
|
||||
|
|
@ -145,7 +157,9 @@ def test_large_file_write(temp_test_dir):
|
|||
test_file = temp_test_dir / "large.txt"
|
||||
content = "Large content\n" * 1000 # Create substantial content
|
||||
|
||||
result = put_complete_file_contents({"filepath": str(test_file), "complete_file_contents": content})
|
||||
result = put_complete_file_contents(
|
||||
{"filepath": str(test_file), "complete_file_contents": content}
|
||||
)
|
||||
|
||||
assert test_file.exists()
|
||||
assert test_file.read_text() == content
|
||||
|
|
|
|||
|
|
@ -60,11 +60,7 @@ def sample_git_repo(empty_git_repo):
|
|||
def git_repo_with_untracked(sample_git_repo):
|
||||
"""Create a git repository with both tracked and untracked files."""
|
||||
# Create untracked files
|
||||
untracked_files = [
|
||||
"untracked.txt",
|
||||
"src/untracked.py",
|
||||
"docs/draft.md"
|
||||
]
|
||||
untracked_files = ["untracked.txt", "src/untracked.py", "docs/draft.md"]
|
||||
|
||||
for file_path in untracked_files:
|
||||
full_path = sample_git_repo / file_path
|
||||
|
|
@ -91,7 +87,7 @@ docs/draft.md
|
|||
"""
|
||||
gitignore_path = git_repo_with_untracked / ".gitignore"
|
||||
gitignore_path.write_text(gitignore_content)
|
||||
|
||||
|
||||
# Add and commit .gitignore first
|
||||
subprocess.run(["git", "add", ".gitignore"], cwd=git_repo_with_untracked)
|
||||
subprocess.run(
|
||||
|
|
@ -109,7 +105,7 @@ docs/draft.md
|
|||
ignored_files = [
|
||||
"ignored.txt",
|
||||
"temp/temp.txt",
|
||||
"src/__pycache__/main.cpython-39.pyc"
|
||||
"src/__pycache__/main.cpython-39.pyc",
|
||||
]
|
||||
|
||||
for file_path in ignored_files:
|
||||
|
|
@ -128,14 +124,11 @@ def git_repo_with_aider_files(sample_git_repo):
|
|||
".aider.chat.history.md",
|
||||
".aider.input.history",
|
||||
".aider.tags.cache.v3/some_file",
|
||||
"src/.aider.local.settings"
|
||||
"src/.aider.local.settings",
|
||||
]
|
||||
|
||||
# Create regular files
|
||||
regular_files = [
|
||||
"main.cpp",
|
||||
"src/helper.cpp"
|
||||
]
|
||||
regular_files = ["main.cpp", "src/helper.cpp"]
|
||||
|
||||
# Create all files
|
||||
for file_path in aider_files + regular_files:
|
||||
|
|
@ -354,14 +347,15 @@ def mock_is_git_repo():
|
|||
@pytest.fixture
|
||||
def mock_os_path(monkeypatch):
|
||||
"""Mock os.path functions."""
|
||||
|
||||
def mock_exists(path):
|
||||
return True
|
||||
|
||||
def mock_isdir(path):
|
||||
return True
|
||||
|
||||
monkeypatch.setattr(os.path, 'exists', mock_exists)
|
||||
monkeypatch.setattr(os.path, 'isdir', mock_isdir)
|
||||
monkeypatch.setattr(os.path, "exists", mock_exists)
|
||||
monkeypatch.setattr(os.path, "isdir", mock_isdir)
|
||||
return monkeypatch
|
||||
|
||||
|
||||
|
|
@ -390,14 +384,18 @@ def test_get_file_listing_git_error(mock_subprocess, mock_is_git_repo, mock_os_p
|
|||
get_file_listing(DUMMY_PATH)
|
||||
|
||||
|
||||
def test_get_file_listing_permission_error(mock_subprocess, mock_is_git_repo, mock_os_path):
|
||||
def test_get_file_listing_permission_error(
|
||||
mock_subprocess, mock_is_git_repo, mock_os_path
|
||||
):
|
||||
"""Test get_file_listing with permission error."""
|
||||
mock_subprocess.side_effect = PermissionError("Permission denied")
|
||||
with pytest.raises(DirectoryAccessError):
|
||||
get_file_listing(DUMMY_PATH)
|
||||
|
||||
|
||||
def test_get_file_listing_unexpected_error(mock_subprocess, mock_is_git_repo, mock_os_path):
|
||||
def test_get_file_listing_unexpected_error(
|
||||
mock_subprocess, mock_is_git_repo, mock_os_path
|
||||
):
|
||||
"""Test get_file_listing with unexpected error."""
|
||||
mock_subprocess.side_effect = Exception("Unexpected error")
|
||||
with pytest.raises(FileListerError):
|
||||
|
|
@ -407,33 +405,35 @@ def test_get_file_listing_unexpected_error(mock_subprocess, mock_is_git_repo, mo
|
|||
def test_get_file_listing_with_untracked(git_repo_with_untracked):
|
||||
"""Test that file listing includes both tracked and untracked files."""
|
||||
files, count = get_file_listing(str(git_repo_with_untracked))
|
||||
|
||||
|
||||
# Check tracked files are present
|
||||
assert "README.md" in files
|
||||
assert "src/main.py" in files
|
||||
|
||||
|
||||
# Check untracked files are present
|
||||
assert "untracked.txt" in files
|
||||
assert "src/untracked.py" in files
|
||||
|
||||
|
||||
# Verify count includes both tracked and untracked
|
||||
expected_count = 8 # 5 tracked + 3 untracked (excluding .gitignore)
|
||||
assert count == expected_count
|
||||
|
||||
|
||||
def test_get_file_listing_with_untracked_and_limit(git_repo_with_untracked):
|
||||
"""Test that file listing with limit works correctly with untracked files."""
|
||||
limit = 3
|
||||
files, count = get_file_listing(str(git_repo_with_untracked), limit=limit)
|
||||
|
||||
|
||||
# Total count should still be full count
|
||||
assert count == 8 # 5 tracked + 3 untracked (excluding .gitignore)
|
||||
|
||||
# Only limit number of files should be returned
|
||||
assert len(files) == limit
|
||||
|
||||
|
||||
# Files should be sorted, so we can check first 3
|
||||
assert files == sorted(files)
|
||||
|
||||
|
||||
def test_get_file_listing_respects_gitignore(git_repo_with_ignores):
|
||||
"""Test that file listing respects .gitignore rules."""
|
||||
# First test with hidden files excluded (default)
|
||||
|
|
@ -468,6 +468,7 @@ def test_get_file_listing_respects_gitignore(git_repo_with_ignores):
|
|||
expected_count = 8 # 5 tracked + 2 untracked + .gitignore
|
||||
assert count == expected_count
|
||||
|
||||
|
||||
def test_aider_files_excluded(git_repo_with_aider_files):
|
||||
"""Test that .aider files are excluded from the file listing."""
|
||||
files, count = get_file_listing(str(git_repo_with_aider_files))
|
||||
|
|
@ -487,21 +488,14 @@ def test_aider_files_excluded(git_repo_with_aider_files):
|
|||
assert count == expected_count
|
||||
assert len(files) == expected_count
|
||||
|
||||
|
||||
def test_hidden_files_excluded_by_default(git_repo_with_aider_files):
|
||||
"""Test that hidden files are excluded by default."""
|
||||
# Create some hidden files
|
||||
hidden_files = [
|
||||
".config",
|
||||
".env",
|
||||
"src/.local",
|
||||
".gitattributes"
|
||||
]
|
||||
hidden_files = [".config", ".env", "src/.local", ".gitattributes"]
|
||||
|
||||
# Create regular files
|
||||
regular_files = [
|
||||
"main.cpp",
|
||||
"src/helper.cpp"
|
||||
]
|
||||
regular_files = ["main.cpp", "src/helper.cpp"]
|
||||
|
||||
# Create all files
|
||||
for file_path in hidden_files + regular_files:
|
||||
|
|
|
|||
Loading…
Reference in New Issue