Merge pull request #95 from ariel-frischer/test-cmd-args

feat: add `--test-cmd-timeout` option to specify timeout for test command
2025-02-17 15:25:09 -08:00 · 2025-02-17 15:25:09 -08:00 · e6ba8f5dff
parent 7c3c616531 685d098f21
commit e6ba8f5dff
24 changed files with 1199 additions and 383 deletions
--- a/README.md
+++ b/README.md
@ -181,6 +181,7 @@ More information is available in our [Usage Examples](https://docs.ra-aid.ai/cat
 - `--test-cmd`: Custom command to run tests. If set user will be asked if they want to run the test command
 - `--auto-test`: Automatically run tests after each code change
 - `--max-test-cmd-retries`: Maximum number of test command retry attempts (default: 3)
+- `--test-cmd-timeout`: Timeout in seconds for test command execution (default: 300)
 - `--version`: Show program version number and exit
 - `--webui`: Launch the web interface (alpha feature)
 - `--webui-host`: Host to listen on for web interface (default: 0.0.0.0)  (alpha feature)
--- a/ra_aid/main.py
+++ b/ra_aid/main.py
@ -18,7 +18,11 @@ from ra_aid.agent_utils import (
    run_planning_agent,
    run_research_agent,
 )
-from ra_aid.config import DEFAULT_MAX_TEST_CMD_RETRIES, DEFAULT_RECURSION_LIMIT
+from ra_aid.config import (
+    DEFAULT_MAX_TEST_CMD_RETRIES,
+    DEFAULT_RECURSION_LIMIT,
+    DEFAULT_TEST_CMD_TIMEOUT,
+)
 from ra_aid.dependencies import check_dependencies
 from ra_aid.env import validate_environment
 from ra_aid.llm import initialize_llm
@ -81,9 +85,11 @@ Examples:
    parser.add_argument(
        "--provider",
        type=str,
-        default="openai"
-        if (os.getenv("OPENAI_API_KEY") and not os.getenv("ANTHROPIC_API_KEY"))
-        else "anthropic",
+        default=(
+            "openai"
+            if (os.getenv("OPENAI_API_KEY") and not os.getenv("ANTHROPIC_API_KEY"))
+            else "anthropic"
+        ),
        choices=VALID_PROVIDERS,
        help="The LLM provider to use",
    )
@ -173,7 +179,13 @@ Examples:
        "--max-test-cmd-retries",
        type=int,
        default=DEFAULT_MAX_TEST_CMD_RETRIES,
-        help="Maximum number of retries for the test command (default: 10)",
+        help="Maximum number of retries for the test command (default: 3)",
+    )
+    parser.add_argument(
+        "--test-cmd-timeout",
+        type=int,
+        default=DEFAULT_TEST_CMD_TIMEOUT,
+        help=f"Timeout in seconds for test command execution (default: {DEFAULT_TEST_CMD_TIMEOUT})",
    )
    parser.add_argument(
        "--webui",
@ -227,11 +239,10 @@ Examples:
            parsed_args.expert_provider = "deepseek"
            parsed_args.expert_model = "deepseek-reasoner"
        else:
-            # Fall back to main provider if neither is available 
+            # Fall back to main provider if neither is available
            parsed_args.expert_provider = parsed_args.provider
            parsed_args.expert_model = parsed_args.model

-
    # Validate temperature range if provided
    if parsed_args.temperature is not None and not (
        0.0 <= parsed_args.temperature <= 2.0
@ -294,15 +305,24 @@ def main():

        # Validate model configuration early
        from ra_aid.models_params import models_params
+
        model_config = models_params.get(args.provider, {}).get(args.model or "", {})
-        supports_temperature = model_config.get("supports_temperature", args.provider in ["anthropic", "openai", "openrouter", "openai-compatible", "deepseek"])
-        
+        supports_temperature = model_config.get(
+            "supports_temperature",
+            args.provider
+            in ["anthropic", "openai", "openrouter", "openai-compatible", "deepseek"],
+        )
+
        if supports_temperature and args.temperature is None:
            args.temperature = model_config.get("default_temperature")
            if args.temperature is None:
-                print_error(f"Temperature must be provided for model {args.model} which supports temperature")
+                print_error(
+                    f"Temperature must be provided for model {args.model} which supports temperature"
+                )
                sys.exit(1)
-            logger.debug(f"Using default temperature {args.temperature} for model {args.model}")
+            logger.debug(
+                f"Using default temperature {args.temperature} for model {args.model}"
+            )

        # Display status lines
        status = Text()
@ -324,16 +344,13 @@ def main():

        # Search info
        status.append("🔍 Search: ")
-        status.append("Enabled" if web_research_enabled else "Disabled",
-                     style=None if web_research_enabled else "italic")
-        
+        status.append(
+            "Enabled" if web_research_enabled else "Disabled",
+            style=None if web_research_enabled else "italic",
+        )
+
        console.print(
-            Panel(
-                status,
-                title="Config",
-                border_style="bright_blue",
-                padding=(0, 1)
-            )
+            Panel(status, title="Config", border_style="bright_blue", padding=(0, 1))
        )

        # Handle chat mode
@ -400,9 +417,9 @@ def main():
                chat_agent,
                CHAT_PROMPT.format(
                    initial_request=initial_request,
-                    web_research_section=WEB_RESEARCH_PROMPT_SECTION_CHAT
-                    if web_research_enabled
-                    else "",
+                    web_research_section=(
+                        WEB_RESEARCH_PROMPT_SECTION_CHAT if web_research_enabled else ""
+                    ),
                    working_directory=working_directory,
                    current_date=current_date,
                    project_info=formatted_project_info,
@ -428,6 +445,7 @@ def main():
            "auto_test": args.auto_test,
            "test_cmd": args.test_cmd,
            "max_test_cmd_retries": args.max_test_cmd_retries,
+            "test_cmd_timeout": args.test_cmd_timeout,
        }

        # Store config in global memory for access by is_informational_query
--- a/ra_aid/config.py
+++ b/ra_aid/config.py
@ -2,3 +2,4 @@

 DEFAULT_RECURSION_LIMIT = 100
 DEFAULT_MAX_TEST_CMD_RETRIES = 3
+DEFAULT_TEST_CMD_TIMEOUT = 60 * 5  # 5 minutes in seconds
--- a/ra_aid/file_listing.py
+++ b/ra_aid/file_listing.py
@ -1,11 +1,9 @@
 """Module for efficient file listing using git."""

-import subprocess
 import os
+import subprocess
 from pathlib import Path
 from typing import List, Optional, Tuple
-import tempfile
-import shutil


 class FileListerError(Exception):
@ -133,12 +131,18 @@ def get_file_listing(

        # Combine and process the files
        all_files = []
-        for file in tracked_files_process.stdout.splitlines() + untracked_files_process.stdout.splitlines():
+        for file in (
+            tracked_files_process.stdout.splitlines()
+            + untracked_files_process.stdout.splitlines()
+        ):
            file = file.strip()
            if not file:
                continue
            # Skip hidden files unless explicitly included
-            if not include_hidden and (file.startswith(".") or any(part.startswith(".") for part in file.split("/"))):
+            if not include_hidden and (
+                file.startswith(".")
+                or any(part.startswith(".") for part in file.split("/"))
+            ):
                continue
            # Skip .aider files
            if ".aider" in file:
@ -155,7 +159,7 @@ def get_file_listing(

        return all_files, total_count

-    except (DirectoryNotFoundError, DirectoryAccessError, GitCommandError) as e:
+    except (DirectoryNotFoundError, DirectoryAccessError, GitCommandError):
        # Re-raise known exceptions
        raise
    except PermissionError as e:
--- a/ra_aid/llm.py
+++ b/ra_aid/llm.py
@ -1,21 +1,21 @@
 import os
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional

-from openai import OpenAI
 from langchain_anthropic import ChatAnthropic
 from langchain_core.language_models import BaseChatModel
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_openai import ChatOpenAI
+from openai import OpenAI

 from ra_aid.chat_models.deepseek_chat import ChatDeepseekReasoner
 from ra_aid.logging_config import get_logger
-from typing import List

 from .models_params import models_params

+
 def get_available_openai_models() -> List[str]:
    """Fetch available OpenAI models using OpenAI client.
-    
+
    Returns:
        List of available model names
    """
@ -25,35 +25,37 @@ def get_available_openai_models() -> List[str]:
        models = client.models.list()
        return [str(model.id) for model in models.data]
    except Exception:
-        # Return empty list if unable to fetch models 
+        # Return empty list if unable to fetch models
        return []

+
 def select_expert_model(provider: str, model: Optional[str] = None) -> Optional[str]:
    """Select appropriate expert model based on provider and availability.
-    
+
    Args:
        provider: The LLM provider
        model: Optional explicitly specified model name
-    
+
    Returns:
        Selected model name or None if no suitable model found
    """
    if provider != "openai" or model is not None:
        return model
-        
+
    # Try to get available models
    available_models = get_available_openai_models()
-    
+
    # Priority order for expert models
    priority_models = ["o3-mini", "o1", "o1-preview"]
-    
+
    # Return first available model from priority list
    for model_name in priority_models:
        if model_name in available_models:
            return model_name
-            
+
    return None

+
 known_temp_providers = {
    "openai",
    "anthropic",
@ -220,7 +222,9 @@ def create_llm_client(
        temp_kwargs = {"temperature": 0} if supports_temperature else {}
    elif supports_temperature:
        if temperature is None:
-            raise ValueError(f"Temperature must be provided for model {model_name} which supports temperature")
+            raise ValueError(
+                f"Temperature must be provided for model {model_name} which supports temperature"
+            )
        temp_kwargs = {"temperature": temperature}
    else:
        temp_kwargs = {}
@ -248,11 +252,13 @@ def create_llm_client(
        }
        if is_expert:
            openai_kwargs["reasoning_effort"] = "high"
-        return ChatOpenAI(**{
-            **openai_kwargs,
-            "timeout": LLM_REQUEST_TIMEOUT,
-            "max_retries": LLM_MAX_RETRIES,
-        })
+        return ChatOpenAI(
+            **{
+                **openai_kwargs,
+                "timeout": LLM_REQUEST_TIMEOUT,
+                "max_retries": LLM_MAX_RETRIES,
+            }
+        )
    elif provider == "anthropic":
        return ChatAnthropic(
            api_key=config["api_key"],
@ -289,8 +295,6 @@ def initialize_llm(
    return create_llm_client(provider, model_name, temperature, is_expert=False)


-def initialize_expert_llm(
-    provider: str, model_name: str
-) -> BaseChatModel:
+def initialize_expert_llm(provider: str, model_name: str) -> BaseChatModel:
    """Initialize an expert language model client based on the specified provider and model."""
    return create_llm_client(provider, model_name, temperature=None, is_expert=True)
--- a/ra_aid/models_params.py
+++ b/ra_aid/models_params.py
--- a/ra_aid/proc/interactive.py
+++ b/ra_aid/proc/interactive.py
@ -9,47 +9,50 @@ The interface remains compatible with external callers expecting a tuple (output
 where output is a bytes object (UTF-8 encoded).
 """

-import os
-import shlex
-import shutil
 import errno
-import sys
 import io
-import subprocess
+import os
 import select
-import termios
-import tty
-import time
+import shutil
 import signal
+import subprocess
+import sys
+import termios
+import time
+import tty
 from typing import List, Tuple

 import pyte
 from pyte.screens import HistoryScreen

+
 def render_line(line, columns: int) -> str:
    """Render a single screen line from the pyte buffer (a mapping of column to Char)."""
    return "".join(line[x].data for x in range(columns))

-def run_interactive_command(cmd: List[str], expected_runtime_seconds: int = 30) -> Tuple[bytes, int]:
+
+def run_interactive_command(
+    cmd: List[str], expected_runtime_seconds: int = 30
+) -> Tuple[bytes, int]:
    """
    Runs an interactive command with a pseudo-tty, capturing final scrollback history.
-    
+
    Assumptions and constraints:
      - Running on a Linux system.
      - `cmd` is a non-empty list where cmd[0] is the executable.
      - The executable is on PATH.
-      
+
    Args:
      cmd: A list containing the command and its arguments.
      expected_runtime_seconds: Expected runtime in seconds, defaults to 30.
        If process exceeds 2x this value, it will be terminated gracefully.
        If process exceeds 3x this value, it will be killed forcefully.
        Must be between 1 and 1800 seconds (30 minutes).
-      
+
    Returns:
      A tuple of (captured_output, return_code), where captured_output is a UTF-8 encoded
      bytes object containing the trimmed non-empty history lines from the terminal session.
-    
+
    Raises:
      ValueError: If no command is provided.
      FileNotFoundError: If the command is not found in PATH.
@ -61,8 +64,10 @@ def run_interactive_command(cmd: List[str], expected_runtime_seconds: int = 30)
    if shutil.which(cmd[0]) is None:
        raise FileNotFoundError(f"Command '{cmd[0]}' not found in PATH.")
    if expected_runtime_seconds <= 0 or expected_runtime_seconds > 1800:
-        raise ValueError("expected_runtime_seconds must be between 1 and 1800 seconds (30 minutes)")
-    
+        raise ValueError(
+            "expected_runtime_seconds must be between 1 and 1800 seconds (30 minutes)"
+        )
+
    try:
        term_size = os.get_terminal_size()
        cols, rows = term_size.columns, term_size.lines
@ -85,20 +90,22 @@ def run_interactive_command(cmd: List[str], expected_runtime_seconds: int = 30)

    # Set up environment variables for the subprocess using detected terminal size.
    env = os.environ.copy()
-    env.update({
-        'DEBIAN_FRONTEND': 'noninteractive',
-        'GIT_PAGER': '',
-        'PYTHONUNBUFFERED': '1',
-        'CI': 'true',
-        'LANG': 'C.UTF-8',
-        'LC_ALL': 'C.UTF-8',
-        'COLUMNS': str(cols),
-        'LINES': str(rows),
-        'FORCE_COLOR': '1',
-        'GIT_TERMINAL_PROMPT': '0',
-        'PYTHONDONTWRITEBYTECODE': '1',
-        'NODE_OPTIONS': '--unhandled-rejections=strict'
-    })
+    env.update(
+        {
+            "DEBIAN_FRONTEND": "noninteractive",
+            "GIT_PAGER": "",
+            "PYTHONUNBUFFERED": "1",
+            "CI": "true",
+            "LANG": "C.UTF-8",
+            "LC_ALL": "C.UTF-8",
+            "COLUMNS": str(cols),
+            "LINES": str(rows),
+            "FORCE_COLOR": "1",
+            "GIT_TERMINAL_PROMPT": "0",
+            "PYTHONDONTWRITEBYTECODE": "1",
+            "NODE_OPTIONS": "--unhandled-rejections=strict",
+        }
+    )

    proc = subprocess.Popen(
        cmd,
@ -108,7 +115,7 @@ def run_interactive_command(cmd: List[str], expected_runtime_seconds: int = 30)
        bufsize=0,
        close_fds=True,
        env=env,
-        preexec_fn=os.setsid  # Create new process group for proper signal handling.
+        preexec_fn=os.setsid,  # Create new process group for proper signal handling.
    )
    os.close(slave_fd)  # Close slave end in the parent process.

@ -200,19 +207,21 @@ def run_interactive_command(cmd: List[str], expected_runtime_seconds: int = 30)
    # Trim out empty lines to get only meaningful "history" lines.
    trimmed_lines = [line for line in all_lines if line.strip()]
    final_output = "\n".join(trimmed_lines)
-    
+
    # Add timeout message if process was terminated due to timeout.
    if was_terminated:
        timeout_msg = f"\n[Process exceeded timeout ({expected_runtime_seconds} seconds expected)]"
        final_output += timeout_msg
-    
+
    # Limit output to the last 8000 bytes.
    final_output = final_output[-8000:]
-    
+
    return final_output.encode("utf-8"), proc.returncode

+
 if __name__ == "__main__":
    import sys
+
    if len(sys.argv) < 2:
        print("Usage: interactive.py <command> [args...]")
        sys.exit(1)
--- a/ra_aid/provider_strategy.py
+++ b/ra_aid/provider_strategy.py
@ -47,9 +47,10 @@ class OpenAIStrategy(ProviderStrategy):
            if not key:
                missing.append("EXPERT_OPENAI_API_KEY environment variable is not set")

-            # Handle expert model selection if none specified 
+            # Handle expert model selection if none specified
            if hasattr(args, "expert_model") and not args.expert_model:
                from ra_aid.llm import select_expert_model
+
                model = select_expert_model("openai")
                if model:
                    args.expert_model = model
--- a/ra_aid/tool_configs.py
+++ b/ra_aid/tool_configs.py
@ -1,25 +1,19 @@
 from ra_aid.tools import (
    ask_expert,
    ask_human,
-    delete_key_facts,
-    delete_key_snippets,
-    deregister_related_files,
    emit_expert_context,
    emit_key_facts,
    emit_key_snippets,
-    emit_plan,
    emit_related_files,
    emit_research_notes,
    fuzzy_find_project_files,
    list_directory_tree,
-    monorepo_detected,
    plan_implementation_completed,
    read_file_tool,
    ripgrep_search,
    run_programming_task,
    run_shell_command,
    task_completed,
-    ui_detected,
    web_search_tavily,
 )
 from ra_aid.tools.agent import (
@ -30,7 +24,6 @@ from ra_aid.tools.agent import (
    request_web_research,
 )
 from ra_aid.tools.memory import one_shot_completed
-from ra_aid.tools.write_file import put_complete_file_contents


 # Read-only tools that don't modify system state
@ -73,7 +66,9 @@ def get_read_only_tools(
 # Define constant tool groups
 READ_ONLY_TOOLS = get_read_only_tools()
 # MODIFICATION_TOOLS = [run_programming_task, put_complete_file_contents]
-MODIFICATION_TOOLS = [run_programming_task] # having put_complete_file_contents causes trouble :(
+MODIFICATION_TOOLS = [
+    run_programming_task
+]  # having put_complete_file_contents causes trouble :(
 COMMON_TOOLS = get_read_only_tools()
 EXPERT_TOOLS = [emit_expert_context, ask_expert]
 RESEARCH_TOOLS = [
--- a/ra_aid/tools/agent.py
+++ b/ra_aid/tools/agent.py
@ -275,6 +275,7 @@ def request_task_implementation(task_spec: str) -> Dict[str, Any]:
        print_task_header(task_spec)
        # Run implementation agent
        from ..agent_utils import run_task_implementation_agent
+
        _global_memory["completion_message"] = ""

        _result = run_task_implementation_agent(
@ -345,6 +346,7 @@ def request_implementation(task_spec: str) -> Dict[str, Any]:
    try:
        # Run planning agent
        from ..agent_utils import run_planning_agent
+
        _global_memory["completion_message"] = ""

        _result = run_planning_agent(
--- a/ra_aid/tools/expert.py
+++ b/ra_aid/tools/expert.py
@ -185,7 +185,11 @@ def ask_expert(question: str) -> str:

    query_parts.extend(["# Question", question])
    query_parts.extend(
-        ["\n # Addidional Requirements", "**DO NOT OVERTHINK**", "**DO NOT OVERCOMPLICATE**"]
+        [
+            "\n # Addidional Requirements",
+            "**DO NOT OVERTHINK**",
+            "**DO NOT OVERCOMPLICATE**",
+        ]
    )

    # Join all parts
--- a/ra_aid/tools/handle_user_defined_test_cmd_execution.py
+++ b/ra_aid/tools/handle_user_defined_test_cmd_execution.py
@ -8,6 +8,7 @@ from rich.console import Console
 from rich.markdown import Markdown
 from rich.panel import Panel

+from ra_aid.config import DEFAULT_TEST_CMD_TIMEOUT
 from ra_aid.logging_config import get_logger
 from ra_aid.tools.human import ask_human
 from ra_aid.tools.shell import run_shell_command
@ -85,7 +86,7 @@ class TestCommandExecutor:
            cmd: Test command to execute
            original_prompt: Original prompt text
        """
-        timeout = self.config.get("timeout", 30)
+        timeout = self.config.get("test_cmd_timeout", DEFAULT_TEST_CMD_TIMEOUT)
        try:
            logger.info(f"Executing test command: {cmd} with timeout {timeout}s")
            test_result = run_shell_command(cmd, timeout=timeout)
@ -99,11 +100,11 @@ class TestCommandExecutor:
            logger.info("Test command executed successfully")

        except subprocess.TimeoutExpired:
-            logger.warning(f"Test command timed out after {timeout}s: {cmd}")
-            self.state.test_attempts += 1
-            self.state.prompt = (
-                f"{original_prompt}. Previous attempt timed out after {timeout} seconds"
+            logger.warning(
+                f"Test command timed out after {DEFAULT_TEST_CMD_TIMEOUT}s: {cmd}"
            )
+            self.state.test_attempts += 1
+            self.state.prompt = f"{original_prompt}. Previous attempt timed out after {DEFAULT_TEST_CMD_TIMEOUT} seconds"
            self.display_test_failure()

        except subprocess.CalledProcessError as e:
--- a/ra_aid/tools/memory.py
+++ b/ra_aid/tools/memory.py
@ -1,5 +1,5 @@
 import os
-from typing import Any, Dict, List, Optional, Set, Union
+from typing import Dict, List, Optional, Set, Union

 from langchain_core.tools import tool
 from rich.console import Console
--- a/ra_aid/tools/programmer.py
+++ b/ra_aid/tools/programmer.py
@ -1,5 +1,4 @@
 import os
-import os
 import sys
 from pathlib import Path
 from typing import Dict, List, Union
@ -22,25 +21,25 @@ logger = get_logger(__name__)

 def get_aider_executable() -> str:
    """Get the path to the aider executable in the same bin/Scripts directory as Python.
-    
+
    Returns:
        str: Full path to aider executable
    """
-    # Get directory containing Python executable 
+    # Get directory containing Python executable
    bin_dir = Path(sys.executable).parent
-    
+
    # Check for platform-specific executable name
    if sys.platform == "win32":
        aider_exe = bin_dir / "aider.exe"
    else:
        aider_exe = bin_dir / "aider"
-        
+
    if not aider_exe.exists():
        raise RuntimeError(f"Could not find aider executable at {aider_exe}")
-    
+
    if not os.access(aider_exe, os.X_OK):
        raise RuntimeError(f"Aider executable at {aider_exe} is not executable")
-        
+
    return str(aider_exe)


@ -91,10 +90,14 @@ def run_programming_task(

    # Get combined list of files (explicit + related) with normalized paths
    # and deduplicated using set operations
-    files_to_use = list({os.path.abspath(f) for f in (files or [])} | {
-        os.path.abspath(f) for f in _global_memory["related_files"].values() 
-        if "related_files" in _global_memory
-    })
+    files_to_use = list(
+        {os.path.abspath(f) for f in (files or [])}
+        | {
+            os.path.abspath(f)
+            for f in _global_memory["related_files"].values()
+            if "related_files" in _global_memory
+        }
+    )

    # Add config file if specified
    if "config" in _global_memory and _global_memory["config"].get("aider_config"):
@ -146,7 +149,7 @@ def run_programming_task(

        # Log the programming task
        log_work_event(f"Executed programming task: {_truncate_for_log(instructions)}")
-        
+
        # Return structured output
        return {
            "output": truncate_output(result[0].decode()) if result[0] else "",
@ -190,21 +193,21 @@ def parse_aider_flags(aider_flags: str) -> List[str]:

    # Split by comma and strip whitespace
    flag_groups = [group.strip() for group in aider_flags.split(",")]
-    
+
    result = []
    for group in flag_groups:
        if not group:
            continue
-            
+
        # Split by space to separate flag from value
        parts = group.split()
-        
+
        # Add '--' prefix to the flag if not present, stripping any extra dashes
        flag = parts[0].lstrip("-")  # Remove all leading dashes
        flag = f"--{flag}"  # Add exactly two dashes
-        
+
        result.append(flag)
-        
+
        # Add any remaining parts as separate values
        if len(parts) > 1:
            result.extend(parts[1:])
--- a/ra_aid/tools/shell.py
+++ b/ra_aid/tools/shell.py
@ -21,12 +21,14 @@ def _truncate_for_log(text: str, max_length: int = 300) -> str:


@tool
-def run_shell_command(command: str, expected_runtime_seconds: int = 30) -> Dict[str, Union[str, int, bool]]:
+def run_shell_command(
+    command: str, timeout: int = 30
+) -> Dict[str, Union[str, int, bool]]:
    """Execute a shell command and return its output.

    Args:
        command: The shell command to execute
-        expected_runtime_seconds: Expected runtime in seconds, defaults to 30.
+        timeout: Expected runtime in seconds, defaults to 30.
            If process exceeds 2x this value, it will be terminated gracefully.
            If process exceeds 3x this value, it will be killed forcefully.

@ -79,7 +81,9 @@ def run_shell_command(command: str, expected_runtime_seconds: int = 30) -> Dict[

    try:
        print()
-        output, return_code = run_interactive_command(["/bin/bash", "-c", command], expected_runtime_seconds=expected_runtime_seconds)
+        output, return_code = run_interactive_command(
+            ["/bin/bash", "-c", command], expected_runtime_seconds=timeout
+        )
        print()
        result = {
            "output": truncate_output(output.decode()) if output else "",
--- a/ra_aid/tools/write_file.py
+++ b/ra_aid/tools/write_file.py
@ -12,7 +12,10 @@ console = Console()

@tool
 def put_complete_file_contents(
-    filepath: str, complete_file_contents: str = "", encoding: str = "utf-8", verbose: bool = True
+    filepath: str,
+    complete_file_contents: str = "",
+    encoding: str = "utf-8",
+    verbose: bool = True,
 ) -> Dict[str, any]:
    """Write the complete contents of a file, creating it if it doesn't exist.
    This tool is specifically for writing the entire contents of a file at once,
@ -22,7 +25,7 @@ def put_complete_file_contents(

    Args:
        filepath: (Required) Path to the file to write. Must be provided.
-        complete_file_contents: Complete string content to write to the file. Defaults to 
+        complete_file_contents: Complete string content to write to the file. Defaults to
                              an empty string, which will create an empty file.
        encoding: File encoding to use (default: utf-8)
        verbose: Whether to display a Rich panel with write statistics (default: True)
--- a/tests/ra_aid/proc/test_interactive.py
+++ b/tests/ra_aid/proc/test_interactive.py
@ -1,7 +1,6 @@
 """Tests for the interactive subprocess module."""

 import os
-import sys
 import tempfile

 import pytest
@ -49,7 +48,7 @@ def test_empty_command():

 def test_interactive_command():
    """Test running an interactive command.
-    
+
    This test verifies that output appears in real-time using process substitution.
    We use a command that prints to both stdout and stderr.
    """
@ -70,7 +69,9 @@ def test_large_output():
    # Clean up any leading artifacts
    output_cleaned = output.lstrip(b"^D")
    # Verify the output size is limited to 8000 bytes
-    assert len(output_cleaned) <= 8000, f"Output exceeded 8000 bytes: {len(output_cleaned)} bytes"
+    assert (
+        len(output_cleaned) <= 8000
+    ), f"Output exceeded 8000 bytes: {len(output_cleaned)} bytes"
    # Verify we have the last lines (should contain the highest numbers)
    assert b"Line 1000" in output_cleaned, "Missing last line of output"
    assert retcode == 0
@ -83,17 +84,19 @@ def test_byte_limit():
    cmd = 'for i in {1..200}; do printf "%04d: %s\\n" "$i" "This is a line with padding to ensure we go over the byte limit quickly"; done'
    output, retcode = run_interactive_command(["/bin/bash", "-c", cmd])
    output_cleaned = output.lstrip(b"^D")
-    
+
    # Verify exact 8000 byte limit
-    assert len(output_cleaned) <= 8000, f"Output exceeded 8000 bytes: {len(output_cleaned)} bytes"
-    
+    assert (
+        len(output_cleaned) <= 8000
+    ), f"Output exceeded 8000 bytes: {len(output_cleaned)} bytes"
+
    # Get the last line number from the output
    last_line = output_cleaned.splitlines()[-1]
-    last_num = int(last_line.split(b':')[0])
-    
+    last_num = int(last_line.split(b":")[0])
+
    # Verify we have a high number in the last line (should be near 200)
    assert last_num > 150, f"Expected last line number to be near 200, got {last_num}"
-    
+
    assert retcode == 0


@ -134,16 +137,20 @@ def test_cat_medium_file():
            for line in output_cleaned.splitlines()
            if b"Script" not in line and line.strip()
        ]
-        
+
        # With 8000 byte limit, we expect to see the last portion of lines
        # The exact number may vary due to terminal settings, but we should
        # at least have the last lines of the file
-        assert len(lines) >= 90, f"Expected at least 90 lines due to 8000 byte limit, got {len(lines)}"
-        
+        assert (
+            len(lines) >= 90
+        ), f"Expected at least 90 lines due to 8000 byte limit, got {len(lines)}"
+
        # Most importantly, verify we have the last lines
-        last_line = lines[-1].decode('utf-8')
-        assert "This is test line 499" in last_line, f"Expected last line to be 499, got: {last_line}"
-        
+        last_line = lines[-1].decode("utf-8")
+        assert (
+            "This is test line 499" in last_line
+        ), f"Expected last line to be 499, got: {last_line}"
+
        assert retcode == 0
    finally:
        os.unlink(temp_path)
@ -155,9 +162,7 @@ def test_realtime_output():
    cmd = "echo 'first'; sleep 0.1; echo 'second'; sleep 0.1; echo 'third'"
    output, retcode = run_interactive_command(["/bin/bash", "-c", cmd])
    lines = [
-        line
-        for line in output.splitlines()
-        if b"Script" not in line and line.strip()
+        line for line in output.splitlines() if b"Script" not in line and line.strip()
    ]
    assert b"first" in lines[0]
    assert b"second" in lines[1]
--- a/tests/ra_aid/test_llm.py
+++ b/tests/ra_aid/test_llm.py
@ -57,7 +57,13 @@ def test_initialize_expert_defaults(clean_env, mock_openai, monkeypatch):
    monkeypatch.setenv("EXPERT_OPENAI_API_KEY", "test-key")
    _llm = initialize_expert_llm("openai", "o1")

-    mock_openai.assert_called_once_with(api_key="test-key", model="o1", reasoning_effort="high", timeout=180, max_retries=5)
+    mock_openai.assert_called_once_with(
+        api_key="test-key",
+        model="o1",
+        reasoning_effort="high",
+        timeout=180,
+        max_retries=5,
+    )


 def test_initialize_expert_openai_custom(clean_env, mock_openai, monkeypatch):
@ -66,8 +72,12 @@ def test_initialize_expert_openai_custom(clean_env, mock_openai, monkeypatch):
    _llm = initialize_expert_llm("openai", "gpt-4-preview")

    mock_openai.assert_called_once_with(
-        api_key="test-key", model="gpt-4-preview", temperature=0, reasoning_effort="high",
-        timeout=180, max_retries=5
+        api_key="test-key",
+        model="gpt-4-preview",
+        temperature=0,
+        reasoning_effort="high",
+        timeout=180,
+        max_retries=5,
    )


@ -77,8 +87,11 @@ def test_initialize_expert_gemini(clean_env, mock_gemini, monkeypatch):
    _llm = initialize_expert_llm("gemini", "gemini-2.0-flash-thinking-exp-1219")

    mock_gemini.assert_called_once_with(
-        api_key="test-key", model="gemini-2.0-flash-thinking-exp-1219", temperature=0,
-        timeout=180, max_retries=5
+        api_key="test-key",
+        model="gemini-2.0-flash-thinking-exp-1219",
+        temperature=0,
+        timeout=180,
+        max_retries=5,
    )


@ -88,8 +101,11 @@ def test_initialize_expert_anthropic(clean_env, mock_anthropic, monkeypatch):
    _llm = initialize_expert_llm("anthropic", "claude-3")

    mock_anthropic.assert_called_once_with(
-        api_key="test-key", model_name="claude-3", temperature=0,
-        timeout=180, max_retries=5
+        api_key="test-key",
+        model_name="claude-3",
+        temperature=0,
+        timeout=180,
+        max_retries=5,
    )


@ -104,7 +120,7 @@ def test_initialize_expert_openrouter(clean_env, mock_openai, monkeypatch):
        model="models/mistral-large",
        temperature=0,
        timeout=180,
-        max_retries=5
+        max_retries=5,
    )


@ -120,7 +136,7 @@ def test_initialize_expert_openai_compatible(clean_env, mock_openai, monkeypatch
        model="local-model",
        temperature=0,
        timeout=180,
-        max_retries=5
+        max_retries=5,
    )


@ -151,16 +167,24 @@ def test_initialize_openai(clean_env, mock_openai):
    os.environ["OPENAI_API_KEY"] = "test-key"
    _model = initialize_llm("openai", "gpt-4", temperature=0.7)

-    mock_openai.assert_called_once_with(api_key="test-key", model="gpt-4", temperature=0.7, timeout=180, max_retries=5)
+    mock_openai.assert_called_once_with(
+        api_key="test-key", model="gpt-4", temperature=0.7, timeout=180, max_retries=5
+    )


 def test_initialize_gemini(clean_env, mock_gemini):
    """Test Gemini provider initialization"""
    os.environ["GEMINI_API_KEY"] = "test-key"
-    _model = initialize_llm("gemini", "gemini-2.0-flash-thinking-exp-1219", temperature=0.7)
+    _model = initialize_llm(
+        "gemini", "gemini-2.0-flash-thinking-exp-1219", temperature=0.7
+    )

    mock_gemini.assert_called_with(
-        api_key="test-key", model="gemini-2.0-flash-thinking-exp-1219", temperature=0.7, timeout=180, max_retries=5
+        api_key="test-key",
+        model="gemini-2.0-flash-thinking-exp-1219",
+        temperature=0.7,
+        timeout=180,
+        max_retries=5,
    )


@ -169,7 +193,13 @@ def test_initialize_anthropic(clean_env, mock_anthropic):
    os.environ["ANTHROPIC_API_KEY"] = "test-key"
    _model = initialize_llm("anthropic", "claude-3", temperature=0.7)

-    mock_anthropic.assert_called_with(api_key="test-key", model_name="claude-3", temperature=0.7, timeout=180, max_retries=5)
+    mock_anthropic.assert_called_with(
+        api_key="test-key",
+        model_name="claude-3",
+        temperature=0.7,
+        timeout=180,
+        max_retries=5,
+    )


 def test_initialize_openrouter(clean_env, mock_openai):
@ -239,10 +269,22 @@ def test_temperature_defaults(clean_env, mock_openai, mock_anthropic, mock_gemin

    # Test expert models don't require temperature
    initialize_expert_llm("openai", "o1")
-    mock_openai.assert_called_with(api_key="test-key", model="o1", reasoning_effort="high", timeout=180, max_retries=5)
+    mock_openai.assert_called_with(
+        api_key="test-key",
+        model="o1",
+        reasoning_effort="high",
+        timeout=180,
+        max_retries=5,
+    )

    initialize_expert_llm("openai", "o1-mini")
-    mock_openai.assert_called_with(api_key="test-key", model="o1-mini", reasoning_effort="high", timeout=180, max_retries=5)
+    mock_openai.assert_called_with(
+        api_key="test-key",
+        model="o1-mini",
+        reasoning_effort="high",
+        timeout=180,
+        max_retries=5,
+    )


 def test_explicit_temperature(clean_env, mock_openai, mock_anthropic, mock_gemini):
@ -257,19 +299,31 @@ def test_explicit_temperature(clean_env, mock_openai, mock_anthropic, mock_gemin
    # Test OpenAI
    initialize_llm("openai", "test-model", temperature=test_temp)
    mock_openai.assert_called_with(
-        api_key="test-key", model="test-model", temperature=test_temp, timeout=180, max_retries=5
+        api_key="test-key",
+        model="test-model",
+        temperature=test_temp,
+        timeout=180,
+        max_retries=5,
    )

    # Test Gemini
    initialize_llm("gemini", "test-model", temperature=test_temp)
    mock_gemini.assert_called_with(
-        api_key="test-key", model="test-model", temperature=test_temp, timeout=180, max_retries=5
+        api_key="test-key",
+        model="test-model",
+        temperature=test_temp,
+        timeout=180,
+        max_retries=5,
    )

    # Test Anthropic
    initialize_llm("anthropic", "test-model", temperature=test_temp)
    mock_anthropic.assert_called_with(
-        api_key="test-key", model_name="test-model", temperature=test_temp, timeout=180, max_retries=5
+        api_key="test-key",
+        model_name="test-model",
+        temperature=test_temp,
+        timeout=180,
+        max_retries=5,
    )

    # Test OpenRouter
@ -290,13 +344,14 @@ def test_get_available_openai_models_success():
    mock_model.id = "gpt-4"
    mock_models = Mock()
    mock_models.data = [mock_model]
-    
+
    with mock.patch("ra_aid.llm.OpenAI") as mock_client:
        mock_client.return_value.models.list.return_value = mock_models
        models = get_available_openai_models()
        assert models == ["gpt-4"]
        mock_client.return_value.models.list.assert_called_once()

+
 def test_get_available_openai_models_failure():
    """Test graceful handling of model retrieval failure."""
    with mock.patch("ra_aid.llm.OpenAI") as mock_client:
@ -305,32 +360,41 @@ def test_get_available_openai_models_failure():
        assert models == []
        mock_client.return_value.models.list.assert_called_once()

+
 def test_select_expert_model_explicit():
    """Test model selection with explicitly specified model."""
    model = select_expert_model("openai", "gpt-4")
    assert model == "gpt-4"

+
 def test_select_expert_model_non_openai():
    """Test model selection for non-OpenAI provider."""
    model = select_expert_model("anthropic", None)
    assert model is None

+
 def test_select_expert_model_priority():
    """Test model selection follows priority order."""
    available_models = ["gpt-4", "o1", "o3-mini"]
-    
-    with mock.patch("ra_aid.llm.get_available_openai_models", return_value=available_models):
+
+    with mock.patch(
+        "ra_aid.llm.get_available_openai_models", return_value=available_models
+    ):
        model = select_expert_model("openai")
        assert model == "o3-mini"

+
 def test_select_expert_model_no_match():
    """Test model selection when no priority models available."""
    available_models = ["gpt-4", "gpt-3.5"]
-    
-    with mock.patch("ra_aid.llm.get_available_openai_models", return_value=available_models):
+
+    with mock.patch(
+        "ra_aid.llm.get_available_openai_models", return_value=available_models
+    ):
        model = select_expert_model("openai")
        assert model is None

+
 def test_temperature_validation(clean_env, mock_openai):
    """Test temperature validation in command line arguments."""
    from ra_aid.__main__ import parse_arguments
@ -358,34 +422,49 @@ def test_provider_name_validation():
                initialize_llm(provider, "test-model", temperature=0.7)
        except ValueError as e:
            if "Temperature must be provided" not in str(e):
-                pytest.fail(f"Valid provider {provider} raised unexpected ValueError: {e}")
+                pytest.fail(
+                    f"Valid provider {provider} raised unexpected ValueError: {e}"
+                )


-def test_initialize_llm_cross_provider(clean_env, mock_openai, mock_anthropic, mock_gemini, monkeypatch):
+def test_initialize_llm_cross_provider(
+    clean_env, mock_openai, mock_anthropic, mock_gemini, monkeypatch
+):
    """Test initializing different providers in sequence."""
    # Initialize OpenAI
    monkeypatch.setenv("OPENAI_API_KEY", "openai-key")
    _llm1 = initialize_llm("openai", "gpt-4", temperature=0.7)
-    mock_openai.assert_called_with(api_key="openai-key", model="gpt-4", temperature=0.7, timeout=180, max_retries=5)
+    mock_openai.assert_called_with(
+        api_key="openai-key", model="gpt-4", temperature=0.7, timeout=180, max_retries=5
+    )

    # Initialize Anthropic
    monkeypatch.setenv("ANTHROPIC_API_KEY", "anthropic-key")
    _llm2 = initialize_llm("anthropic", "claude-3", temperature=0.7)
    mock_anthropic.assert_called_with(
-        api_key="anthropic-key", model_name="claude-3", temperature=0.7, timeout=180, max_retries=5
+        api_key="anthropic-key",
+        model_name="claude-3",
+        temperature=0.7,
+        timeout=180,
+        max_retries=5,
    )

    # Initialize Gemini
    monkeypatch.setenv("GEMINI_API_KEY", "gemini-key")
    _llm3 = initialize_llm("gemini", "gemini-pro", temperature=0.7)
    mock_gemini.assert_called_with(
-        api_key="gemini-key", model="gemini-pro", temperature=0.7, timeout=180, max_retries=5
+        api_key="gemini-key",
+        model="gemini-pro",
+        temperature=0.7,
+        timeout=180,
+        max_retries=5,
    )


@dataclass
 class Args:
    """Test arguments class."""
+
    provider: str
    expert_provider: str
    model: str = None
@ -412,7 +491,13 @@ def test_environment_variable_precedence(clean_env, mock_openai, monkeypatch):

    # Test LLM client creation with expert mode
    _llm = create_llm_client("openai", "o1", is_expert=True)
-    mock_openai.assert_called_with(api_key="expert-key", model="o1", reasoning_effort="high", timeout=180, max_retries=5)
+    mock_openai.assert_called_with(
+        api_key="expert-key",
+        model="o1",
+        reasoning_effort="high",
+        timeout=180,
+        max_retries=5,
+    )

    # Test environment validation
    monkeypatch.setenv("EXPERT_OPENAI_API_KEY", "")
@ -459,7 +544,9 @@ def mock_deepseek_reasoner():
        yield mock


-def test_initialize_deepseek(clean_env, mock_openai, mock_deepseek_reasoner, monkeypatch):
+def test_initialize_deepseek(
+    clean_env, mock_openai, mock_deepseek_reasoner, monkeypatch
+):
    """Test DeepSeek provider initialization with different models."""
    monkeypatch.setenv("DEEPSEEK_API_KEY", "test-key")

@ -486,7 +573,9 @@ def test_initialize_deepseek(clean_env, mock_openai, mock_deepseek_reasoner, mon
    )


-def test_initialize_openrouter_deepseek(clean_env, mock_openai, mock_deepseek_reasoner, monkeypatch):
+def test_initialize_openrouter_deepseek(
+    clean_env, mock_openai, mock_deepseek_reasoner, monkeypatch
+):
    """Test OpenRouter DeepSeek model initialization."""
    monkeypatch.setenv("OPENROUTER_API_KEY", "test-key")

--- a/tests/ra_aid/test_programmer.py
+++ b/tests/ra_aid/test_programmer.py
@ -1,8 +1,10 @@
 import pytest
-from pathlib import Path
-from langchain_core.tools import Tool

-from ra_aid.tools.programmer import parse_aider_flags, run_programming_task, get_aider_executable
+from ra_aid.tools.programmer import (
+    get_aider_executable,
+    parse_aider_flags,
+    run_programming_task,
+)

 # Test cases for parse_aider_flags function
 test_cases = [
@ -103,18 +105,24 @@ def test_path_normalization_and_deduplication(mocker, tmp_path):
    test_file = tmp_path / "test.py"
    test_file.write_text("")
    new_file = tmp_path / "new.py"
-    
+
    # Mock dependencies
    mocker.patch("ra_aid.tools.programmer._global_memory", {"related_files": {}})
-    mocker.patch("ra_aid.tools.programmer.get_aider_executable", return_value="/path/to/aider")
-    mock_run = mocker.patch("ra_aid.tools.programmer.run_interactive_command", return_value=(b"", 0))
+    mocker.patch(
+        "ra_aid.tools.programmer.get_aider_executable", return_value="/path/to/aider"
+    )
+    mock_run = mocker.patch(
+        "ra_aid.tools.programmer.run_interactive_command", return_value=(b"", 0)
+    )

    # Test duplicate paths
-    run_programming_task.invoke({
-        "instructions": "test instruction",
-        "files": [str(test_file), str(test_file)]  # Same path twice
-    })
-    
+    run_programming_task.invoke(
+        {
+            "instructions": "test instruction",
+            "files": [str(test_file), str(test_file)],  # Same path twice
+        }
+    )
+
    # Get the command list passed to run_interactive_command
    cmd_args = mock_run.call_args[0][0]
    # Count occurrences of test_file path in command
@ -122,16 +130,22 @@ def test_path_normalization_and_deduplication(mocker, tmp_path):
    assert test_file_count == 1, "Expected exactly one instance of test_file path"

    # Test mixed paths
-    run_programming_task.invoke({
-        "instructions": "test instruction",
-        "files": [str(test_file), str(new_file)]  # Two different paths
-    })
-    
+    run_programming_task.invoke(
+        {
+            "instructions": "test instruction",
+            "files": [str(test_file), str(new_file)],  # Two different paths
+        }
+    )
+
    # Get the command list from the second call
    cmd_args = mock_run.call_args[0][0]
    # Verify both paths are present exactly once
-    assert sum(1 for arg in cmd_args if arg == str(test_file)) == 1, "Expected one instance of test_file"
-    assert sum(1 for arg in cmd_args if arg == str(new_file)) == 1, "Expected one instance of new_file"
+    assert (
+        sum(1 for arg in cmd_args if arg == str(test_file)) == 1
+    ), "Expected one instance of test_file"
+    assert (
+        sum(1 for arg in cmd_args if arg == str(new_file)) == 1
+    ), "Expected one instance of new_file"


 def test_get_aider_executable(mocker):
@ -139,11 +153,11 @@ def test_get_aider_executable(mocker):
    mock_sys = mocker.patch("ra_aid.tools.programmer.sys")
    mock_path = mocker.patch("ra_aid.tools.programmer.Path")
    mock_os = mocker.patch("ra_aid.tools.programmer.os")
-    
+
    # Mock sys.executable and platform
    mock_sys.executable = "/path/to/venv/bin/python"
    mock_sys.platform = "linux"
-    
+
    # Mock Path().parent and exists()
    mock_path_instance = mocker.MagicMock()
    mock_path.return_value = mock_path_instance
@ -152,26 +166,26 @@ def test_get_aider_executable(mocker):
    mock_aider = mocker.MagicMock()
    mock_parent.__truediv__.return_value = mock_aider
    mock_aider.exists.return_value = True
-    
+
    # Mock os.access to return True
    mock_os.access.return_value = True
    mock_os.X_OK = 1  # Mock the execute permission constant
-    
+
    # Test happy path on Linux
    aider_path = get_aider_executable()
    assert aider_path == str(mock_aider)
    mock_parent.__truediv__.assert_called_with("aider")
-    
+
    # Test Windows path
    mock_sys.platform = "win32"
    aider_path = get_aider_executable()
    mock_parent.__truediv__.assert_called_with("aider.exe")
-    
+
    # Test executable not found
    mock_aider.exists.return_value = False
    with pytest.raises(RuntimeError, match="Could not find aider executable"):
        get_aider_executable()
-        
+
    # Test not executable
    mock_aider.exists.return_value = True
    mock_os.access.return_value = False
--- a/tests/ra_aid/tools/test_execution.py
+++ b/tests/ra_aid/tools/test_execution.py
@ -4,6 +4,7 @@ from unittest.mock import patch

 import pytest

+from ra_aid.config import DEFAULT_TEST_CMD_TIMEOUT
 from ra_aid.tools.handle_user_defined_test_cmd_execution import execute_test_command

 # Test cases for execute_test_command
@ -195,9 +196,10 @@ def test_execute_test_command(

        if auto_test and test_attempts < config.get("max_test_cmd_retries", 5):
            if config.get("test_cmd"):
-                # Verify run_shell_command called with command and default timeout
+                # Verify run_shell_command called with command and configured timeout
                mock_run_cmd.assert_called_once_with(
-                    config["test_cmd"], timeout=config.get("timeout", 30)
+                    config["test_cmd"],
+                    timeout=config.get("test_cmd_timeout", DEFAULT_TEST_CMD_TIMEOUT),
                )

        # Verify logging for max retries
--- a/tests/ra_aid/tools/test_handle_user_defined_test_cmd_execution.py
+++ b/tests/ra_aid/tools/test_handle_user_defined_test_cmd_execution.py
@ -5,6 +5,7 @@ from unittest.mock import Mock, patch

 import pytest

+from ra_aid.config import DEFAULT_TEST_CMD_TIMEOUT
 from ra_aid.tools.handle_user_defined_test_cmd_execution import (
    TestCommandExecutor,
    TestState,
@ -92,8 +93,9 @@ def test_run_test_command_timeout(test_executor):
            "ra_aid.tools.handle_user_defined_test_cmd_execution.logger.warning"
        ) as mock_logger,
    ):
-        # Create a TimeoutExpired exception
-        timeout_exc = subprocess.TimeoutExpired(cmd="test", timeout=30)
+        # Create a TimeoutExpired exception with configured timeout
+        timeout = test_executor.config.get("test_cmd_timeout", DEFAULT_TEST_CMD_TIMEOUT)
+        timeout_exc = subprocess.TimeoutExpired(cmd="test", timeout=timeout)
        mock_run.side_effect = timeout_exc

        test_executor.run_test_command("test", "original")
@ -101,7 +103,7 @@ def test_run_test_command_timeout(test_executor):
        # Verify state updates
        assert not test_executor.state.should_break
        assert test_executor.state.test_attempts == 1
-        assert "timed out after 30 seconds" in test_executor.state.prompt
+        assert f"timed out after {timeout} seconds" in test_executor.state.prompt

        # Verify logging
        mock_logger.assert_called_once()
--- a/tests/ra_aid/tools/test_memory.py
+++ b/tests/ra_aid/tools/test_memory.py
@ -472,6 +472,7 @@ def test_emit_related_files_path_normalization(reset_memory, tmp_path):

    # Change to the temp directory so relative paths work
    import os
+
    original_dir = os.getcwd()
    os.chdir(tmp_path)

--- a/tests/ra_aid/tools/test_write_file.py
+++ b/tests/ra_aid/tools/test_write_file.py
@ -19,7 +19,9 @@ def test_basic_write_functionality(temp_test_dir):
    test_file = temp_test_dir / "test.txt"
    content = "Hello, World!\nTest content"

-    result = put_complete_file_contents({"filepath": str(test_file), "complete_file_contents": content})
+    result = put_complete_file_contents(
+        {"filepath": str(test_file), "complete_file_contents": content}
+    )

    # Verify file contents
    assert test_file.read_text() == content
@ -39,7 +41,9 @@ def test_directory_creation(temp_test_dir):
    test_file = nested_dir / "test.txt"
    content = "Test content"

-    result = put_complete_file_contents({"filepath": str(test_file), "complete_file_contents": content})
+    result = put_complete_file_contents(
+        {"filepath": str(test_file), "complete_file_contents": content}
+    )

    assert test_file.exists()
    assert test_file.read_text() == content
@ -53,14 +57,22 @@ def test_different_encodings(temp_test_dir):

    # Test UTF-8
    result_utf8 = put_complete_file_contents(
-        {"filepath": str(test_file), "complete_file_contents": content, "encoding": "utf-8"}
+        {
+            "filepath": str(test_file),
+            "complete_file_contents": content,
+            "encoding": "utf-8",
+        }
    )
    assert result_utf8["success"] is True
    assert test_file.read_text(encoding="utf-8") == content

    # Test UTF-16
    result_utf16 = put_complete_file_contents(
-        {"filepath": str(test_file), "complete_file_contents": content, "encoding": "utf-16"}
+        {
+            "filepath": str(test_file),
+            "complete_file_contents": content,
+            "encoding": "utf-16",
+        }
    )
    assert result_utf16["success"] is True
    assert test_file.read_text(encoding="utf-16") == content
@ -145,7 +157,9 @@ def test_large_file_write(temp_test_dir):
    test_file = temp_test_dir / "large.txt"
    content = "Large content\n" * 1000  # Create substantial content

-    result = put_complete_file_contents({"filepath": str(test_file), "complete_file_contents": content})
+    result = put_complete_file_contents(
+        {"filepath": str(test_file), "complete_file_contents": content}
+    )

    assert test_file.exists()
    assert test_file.read_text() == content
--- a/tests/test_file_listing.py
+++ b/tests/test_file_listing.py
@ -60,11 +60,7 @@ def sample_git_repo(empty_git_repo):
 def git_repo_with_untracked(sample_git_repo):
    """Create a git repository with both tracked and untracked files."""
    # Create untracked files
-    untracked_files = [
-        "untracked.txt",
-        "src/untracked.py",
-        "docs/draft.md"
-    ]
+    untracked_files = ["untracked.txt", "src/untracked.py", "docs/draft.md"]

    for file_path in untracked_files:
        full_path = sample_git_repo / file_path
@ -91,7 +87,7 @@ docs/draft.md
 """
    gitignore_path = git_repo_with_untracked / ".gitignore"
    gitignore_path.write_text(gitignore_content)
-    
+
    # Add and commit .gitignore first
    subprocess.run(["git", "add", ".gitignore"], cwd=git_repo_with_untracked)
    subprocess.run(
@ -109,7 +105,7 @@ docs/draft.md
    ignored_files = [
        "ignored.txt",
        "temp/temp.txt",
-        "src/__pycache__/main.cpython-39.pyc"
+        "src/__pycache__/main.cpython-39.pyc",
    ]

    for file_path in ignored_files:
@ -128,14 +124,11 @@ def git_repo_with_aider_files(sample_git_repo):
        ".aider.chat.history.md",
        ".aider.input.history",
        ".aider.tags.cache.v3/some_file",
-        "src/.aider.local.settings"
+        "src/.aider.local.settings",
    ]

    # Create regular files
-    regular_files = [
-        "main.cpp",
-        "src/helper.cpp"
-    ]
+    regular_files = ["main.cpp", "src/helper.cpp"]

    # Create all files
    for file_path in aider_files + regular_files:
@ -354,14 +347,15 @@ def mock_is_git_repo():
@pytest.fixture
 def mock_os_path(monkeypatch):
    """Mock os.path functions."""
+
    def mock_exists(path):
        return True

    def mock_isdir(path):
        return True

-    monkeypatch.setattr(os.path, 'exists', mock_exists)
-    monkeypatch.setattr(os.path, 'isdir', mock_isdir)
+    monkeypatch.setattr(os.path, "exists", mock_exists)
+    monkeypatch.setattr(os.path, "isdir", mock_isdir)
    return monkeypatch


@ -390,14 +384,18 @@ def test_get_file_listing_git_error(mock_subprocess, mock_is_git_repo, mock_os_p
        get_file_listing(DUMMY_PATH)


-def test_get_file_listing_permission_error(mock_subprocess, mock_is_git_repo, mock_os_path):
+def test_get_file_listing_permission_error(
+    mock_subprocess, mock_is_git_repo, mock_os_path
+):
    """Test get_file_listing with permission error."""
    mock_subprocess.side_effect = PermissionError("Permission denied")
    with pytest.raises(DirectoryAccessError):
        get_file_listing(DUMMY_PATH)


-def test_get_file_listing_unexpected_error(mock_subprocess, mock_is_git_repo, mock_os_path):
+def test_get_file_listing_unexpected_error(
+    mock_subprocess, mock_is_git_repo, mock_os_path
+):
    """Test get_file_listing with unexpected error."""
    mock_subprocess.side_effect = Exception("Unexpected error")
    with pytest.raises(FileListerError):
@ -407,33 +405,35 @@ def test_get_file_listing_unexpected_error(mock_subprocess, mock_is_git_repo, mo
 def test_get_file_listing_with_untracked(git_repo_with_untracked):
    """Test that file listing includes both tracked and untracked files."""
    files, count = get_file_listing(str(git_repo_with_untracked))
-    
+
    # Check tracked files are present
    assert "README.md" in files
    assert "src/main.py" in files
-    
+
    # Check untracked files are present
    assert "untracked.txt" in files
    assert "src/untracked.py" in files
-    
+
    # Verify count includes both tracked and untracked
    expected_count = 8  # 5 tracked + 3 untracked (excluding .gitignore)
    assert count == expected_count

+
 def test_get_file_listing_with_untracked_and_limit(git_repo_with_untracked):
    """Test that file listing with limit works correctly with untracked files."""
    limit = 3
    files, count = get_file_listing(str(git_repo_with_untracked), limit=limit)
-    
+
    # Total count should still be full count
    assert count == 8  # 5 tracked + 3 untracked (excluding .gitignore)

    # Only limit number of files should be returned
    assert len(files) == limit
-    
+
    # Files should be sorted, so we can check first 3
    assert files == sorted(files)

+
 def test_get_file_listing_respects_gitignore(git_repo_with_ignores):
    """Test that file listing respects .gitignore rules."""
    # First test with hidden files excluded (default)
@ -468,6 +468,7 @@ def test_get_file_listing_respects_gitignore(git_repo_with_ignores):
    expected_count = 8  # 5 tracked + 2 untracked + .gitignore
    assert count == expected_count

+
 def test_aider_files_excluded(git_repo_with_aider_files):
    """Test that .aider files are excluded from the file listing."""
    files, count = get_file_listing(str(git_repo_with_aider_files))
@ -487,21 +488,14 @@ def test_aider_files_excluded(git_repo_with_aider_files):
    assert count == expected_count
    assert len(files) == expected_count

+
 def test_hidden_files_excluded_by_default(git_repo_with_aider_files):
    """Test that hidden files are excluded by default."""
    # Create some hidden files
-    hidden_files = [
-        ".config",
-        ".env",
-        "src/.local",
-        ".gitattributes"
-    ]
+    hidden_files = [".config", ".env", "src/.local", ".gitattributes"]

    # Create regular files
-    regular_files = [
-        "main.cpp",
-        "src/helper.cpp"
-    ]
+    regular_files = ["main.cpp", "src/helper.cpp"]

    # Create all files
    for file_path in hidden_files + regular_files: