project file and ciayn improvements

2025-03-04 00:49:48 -05:00 · 2025-03-04 00:49:48 -05:00 · 9cae0ef1fc
parent bd02bffc55
commit 9cae0ef1fc
10 changed files with 437 additions and 173 deletions
--- a/ra_aid/file_listing.py
+++ b/ra_aid/file_listing.py
@ -4,6 +4,7 @@ import os
 import subprocess
 from pathlib import Path
 from typing import List, Optional, Tuple
+import fnmatch


 class FileListerError(Exception):
@ -70,6 +71,117 @@ def is_git_repo(directory: str) -> bool:
        raise FileListerError(f"Error checking git repository: {e}")


+def get_all_project_files(
+    directory: str, include_hidden: bool = False, exclude_patterns: Optional[List[str]] = None
+) -> List[str]:
+    """
+    Get a list of all files in a project directory, handling both git and non-git repositories.
+    
+    Args:
+        directory: Path to the directory
+        include_hidden: Whether to include hidden files (starting with .) in the results
+        exclude_patterns: Optional list of patterns to exclude from the results
+        
+    Returns:
+        List[str]: List of file paths relative to the directory
+        
+    Raises:
+        DirectoryNotFoundError: If directory does not exist
+        DirectoryAccessError: If directory cannot be accessed
+        GitCommandError: If git command fails
+        FileListerError: For other unexpected errors
+    """
+    # Check if directory exists and is accessible
+    if not os.path.exists(directory):
+        raise DirectoryNotFoundError(f"Directory not found: {directory}")
+    if not os.path.isdir(directory):
+        raise DirectoryNotFoundError(f"Not a directory: {directory}")
+    
+    # Default excluded directories
+    excluded_dirs = {'.ra-aid', '.venv', '.git', '.aider', '__pycache__'}
+    
+    # Check if it's a git repository
+    try:
+        is_git = is_git_repo(directory)
+    except FileListerError:
+        # If checking fails, default to non-git approach
+        is_git = False
+    
+    all_files = []
+    
+    if is_git:
+        # Get list of files from git ls-files
+        try:
+            # Get both tracked and untracked files
+            tracked_files_process = subprocess.run(
+                ["git", "ls-files"],
+                cwd=directory,
+                capture_output=True,
+                text=True,
+                check=True,
+            )
+            untracked_files_process = subprocess.run(
+                ["git", "ls-files", "--others", "--exclude-standard"],
+                cwd=directory,
+                capture_output=True,
+                text=True,
+                check=True,
+            )
+        except subprocess.CalledProcessError as e:
+            raise GitCommandError(f"Git command failed: {e}")
+        except PermissionError as e:
+            raise DirectoryAccessError(f"Permission denied: {e}")
+
+        # Combine and process the files
+        for file in (
+            tracked_files_process.stdout.splitlines()
+            + untracked_files_process.stdout.splitlines()
+        ):
+            file = file.strip()
+            if not file:
+                continue
+            # Skip hidden files unless explicitly included
+            if not include_hidden and (
+                file.startswith(".")
+                or any(part.startswith(".") for part in file.split("/"))
+            ):
+                continue
+            # Skip .aider files
+            if ".aider" in file:
+                continue
+            all_files.append(file)
+    else:
+        # Not a git repository, use manual file listing
+        base_path = Path(directory)
+        
+        for root, dirs, files in os.walk(directory):
+            # Filter out excluded directories
+            dirs[:] = [d for d in dirs if d not in excluded_dirs and (include_hidden or not d.startswith('.'))]
+            
+            # Calculate relative path
+            rel_root = os.path.relpath(root, directory)
+            if rel_root == '.':
+                rel_root = ''
+            
+            # Process files
+            for file in files:
+                # Skip hidden files unless explicitly included
+                if not include_hidden and file.startswith('.'):
+                    continue
+                
+                # Create relative path
+                rel_path = os.path.join(rel_root, file) if rel_root else file
+                all_files.append(rel_path)
+    
+    # Apply additional exclude patterns if specified
+    if exclude_patterns:
+        for pattern in exclude_patterns:
+            all_files = [f for f in all_files if not fnmatch.fnmatch(f, pattern)]
+            
+    # Remove duplicates and sort
+    return sorted(set(all_files))
+
+
 def get_file_listing(
    directory: str, limit: Optional[int] = None, include_hidden: bool = False
 ) -> Tuple[List[str], int]:
@ -98,84 +210,10 @@ def get_file_listing(
        FileListerError: For other unexpected errors
    """
    try:
-        # Check if directory exists and is accessible
-        if not os.path.exists(directory):
-            raise DirectoryNotFoundError(f"Directory not found: {directory}")
-        if not os.path.isdir(directory):
-            raise DirectoryNotFoundError(f"Not a directory: {directory}")
+        # Use the common function to get all files
+        all_files = get_all_project_files(directory, include_hidden)
        
-        # Check if it's a git repository
-        is_git = is_git_repo(directory)
-        
-        all_files = []
-        
-        if is_git:
-            # Get list of files from git ls-files
-            try:
-                # Get both tracked and untracked files
-                tracked_files_process = subprocess.run(
-                    ["git", "ls-files"],
-                    cwd=directory,
-                    capture_output=True,
-                    text=True,
-                    check=True,
-                )
-                untracked_files_process = subprocess.run(
-                    ["git", "ls-files", "--others", "--exclude-standard"],
-                    cwd=directory,
-                    capture_output=True,
-                    text=True,
-                    check=True,
-                )
-            except subprocess.CalledProcessError as e:
-                raise GitCommandError(f"Git command failed: {e}")
-            except PermissionError as e:
-                raise DirectoryAccessError(f"Permission denied: {e}")
-
-            # Combine and process the files
-            for file in (
-                tracked_files_process.stdout.splitlines()
-                + untracked_files_process.stdout.splitlines()
-            ):
-                file = file.strip()
-                if not file:
-                    continue
-                # Skip hidden files unless explicitly included
-                if not include_hidden and (
-                    file.startswith(".")
-                    or any(part.startswith(".") for part in file.split("/"))
-                ):
-                    continue
-                # Skip .aider files
-                if ".aider" in file:
-                    continue
-                all_files.append(file)
-        else:
-            # Not a git repository, use manual file listing
-            base_path = Path(directory)
-            excluded_dirs = {'.ra-aid', '.venv', '.git', '.aider', '__pycache__'}
-            
-            for root, dirs, files in os.walk(directory):
-                # Filter out excluded directories
-                dirs[:] = [d for d in dirs if d not in excluded_dirs and (include_hidden or not d.startswith('.'))]
-                
-                # Calculate relative path
-                rel_root = os.path.relpath(root, directory)
-                if rel_root == '.':
-                    rel_root = ''
-                
-                # Process files
-                for file in files:
-                    # Skip hidden files unless explicitly included
-                    if not include_hidden and file.startswith('.'):
-                        continue
-                    
-                    # Create relative path
-                    rel_path = os.path.join(rel_root, file) if rel_root else file
-                    all_files.append(rel_path)
-
-        # Remove duplicates and sort
-        all_files = sorted(set(all_files))
+        # Get total count before truncation
        total_count = len(all_files)

        # Apply limit if specified
--- a/ra_aid/project_state.py
+++ b/ra_aid/project_state.py
@ -50,7 +50,7 @@ def is_new_project(directory: str) -> bool:
            raise DirectoryNotFoundError(f"Path is not a directory: {directory}")

        # Get all files/dirs in the directory, excluding allowed items
-        _allowed_items: Set[str] = {".git", ".gitignore", ".ra-aid"}
+        _allowed_items: Set[str] = {".git", ".gitignore", ".ra-aid", ".venv"}
        try:
            contents = set()
            for item in path.iterdir():
--- a/ra_aid/prompts/ciayn_prompts.py
+++ b/ra_aid/prompts/ciayn_prompts.py
@ -27,7 +27,7 @@ The user cannot see the results of function calls, so you have to explicitly use

 YOU MUST ALWAYS RESPOND WITH A SINGLE LINE OF PYTHON THAT CALLS ONE OF THE AVAILABLE TOOLS.
 NEVER RETURN AN EMPTY MESSAGE.
-NEVER RETURN PLAIN TEXT - ONLY RETURN A TOOL CALL.
+NEVER RETURN PLAIN TEXT - ONLY RETURN A SINGLE TOOL CALL.
 IF UNSURE WHAT TO DO, JUST YEET IT AND CALL THE BEST FUNCTION YOU CAN THINK OF.

 Use as many steps as you need to in order to fully complete the task.
@ -74,12 +74,6 @@ You typically don't want to keep calling the same function over and over with th
 def example_function():
    print("Hello world")
 \"\"\")
-
- Example of INCORRECT format (DO NOT DO THIS):
-  put_complete_file_contents("/path/to/file.py", \"\"\"
-  def example_function():
-      print("Hello world")
-  \"\"\")
 </function call guidelines>

 As an agent, you will carefully plan ahead, carefully analyze tool call responses, and adapt to circumstances in order to accomplish your goal.
--- a/ra_aid/tools/fuzzy_find.py
+++ b/ra_aid/tools/fuzzy_find.py
@ -2,12 +2,14 @@ import fnmatch
 from typing import List, Tuple

 from fuzzywuzzy import process
-from git import Repo
+from git import Repo, exc
 from langchain_core.tools import tool
 from rich.console import Console
 from rich.markdown import Markdown
 from rich.panel import Panel

+from ra_aid.file_listing import get_all_project_files, FileListerError
+
 console = Console()

 DEFAULT_EXCLUDE_PATTERNS = [
@ -29,27 +31,29 @@ def fuzzy_find_project_files(
    max_results: int = 10,
    include_paths: List[str] = None,
    exclude_patterns: List[str] = None,
+    include_hidden: bool = False,
 ) -> List[Tuple[str, int]]:
-    """Fuzzy find files in a git repository matching the search term.
+    """Fuzzy find files in a project matching the search term.

-    This tool searches for files within a git repository using fuzzy string matching,
+    This tool searches for files within a project directory using fuzzy string matching,
    allowing for approximate matches to the search term. It returns a list of matched
-    files along with their match scores.
+    files along with their match scores. Works with both git and non-git repositories.

    Args:
        search_term: String to match against file paths
-        repo_path: Path to git repository (defaults to current directory)
+        repo_path: Path to project directory (defaults to current directory)
        threshold: Minimum similarity score (0-100) for matches (default: 60)
        max_results: Maximum number of results to return (default: 10)
        include_paths: Optional list of path patterns to include in search
        exclude_patterns: Optional list of path patterns to exclude from search
+        include_hidden: Whether to include hidden files in search (default: False)

    Returns:
        List of tuples containing (file_path, match_score)

    Raises:
-        InvalidGitRepositoryError: If repo_path is not a git repository
        ValueError: If threshold is not between 0 and 100
+        FileListerError: If there's an error accessing or listing files
    """
    # Validate threshold
    if not 0 <= threshold <= 100:
@ -59,81 +63,80 @@ def fuzzy_find_project_files(
    if not search_term:
        return []

-    # Initialize repo for normal search
-    repo = Repo(repo_path)
+    # Combine default and user-provided exclude patterns
+    all_exclude_patterns = DEFAULT_EXCLUDE_PATTERNS + (exclude_patterns or [])
    
-    # Get all tracked files
-    tracked_files = repo.git.ls_files().splitlines()
-
-    # Get all untracked files
-    untracked_files = repo.untracked_files
-
-    # Combine file lists
-    all_files = tracked_files + untracked_files
-
-    # Apply include patterns if specified
-    if include_paths:
-        filtered_files = []
-        for pattern in include_paths:
-            filtered_files.extend(f for f in all_files if fnmatch.fnmatch(f, pattern))
-        all_files = filtered_files
-
-    # Apply exclude patterns
-    patterns = DEFAULT_EXCLUDE_PATTERNS + (exclude_patterns or [])
-    for pattern in patterns:
-        all_files = [f for f in all_files if not fnmatch.fnmatch(f, pattern)]
-
-    # Perform fuzzy matching
-    matches = process.extract(search_term, all_files, limit=max_results)
-
-    # Filter by threshold
-    filtered_matches = [(path, score) for path, score in matches if score >= threshold]
-
-    # Build info panel content
-    info_sections = []
-
-    # Search parameters section
-    params_section = [
-        "## Search Parameters",
-        f"**Search Term**: `{search_term}`",
-        f"**Repository**: `{repo_path}`",
-        f"**Threshold**: {threshold}",
-        f"**Max Results**: {max_results}",
-    ]
-    if include_paths:
-        params_section.append("\n**Include Patterns**:")
-        for pattern in include_paths:
-            params_section.append(f"- `{pattern}`")
-    if exclude_patterns:
-        params_section.append("\n**Exclude Patterns**:")
-        for pattern in exclude_patterns:
-            params_section.append(f"- `{pattern}`")
-    info_sections.append("\n".join(params_section))
-
-    # Results statistics section
-    stats_section = [
-        "## Results Statistics",
-        f"**Total Files Scanned**: {len(all_files)}",
-        f"**Matches Found**: {len(filtered_matches)}",
-    ]
-    info_sections.append("\n".join(stats_section))
-
-    # Top results section
-    if filtered_matches:
-        results_section = ["## Top Matches"]
-        for path, score in filtered_matches[:5]:  # Show top 5 matches
-            results_section.append(f"- `{path}` (score: {score})")
-        info_sections.append("\n".join(results_section))
-    else:
-        info_sections.append("## Results\n*No matches found*")
-
-    # Display the panel
-    console.print(
-        Panel(
-            Markdown("\n\n".join(info_sections)),
-            title="🔍 Fuzzy Find Results",
-            border_style="bright_blue",
+    try:
+        # Get all project files using the common utility function
+        all_files = get_all_project_files(
+            repo_path, 
+            include_hidden=include_hidden, 
+            exclude_patterns=all_exclude_patterns
        )
-    )
        
-    return filtered_matches
+        # Apply include patterns if specified
+        if include_paths:
+            filtered_files = []
+            for pattern in include_paths:
+                filtered_files.extend(f for f in all_files if fnmatch.fnmatch(f, pattern))
+            all_files = filtered_files
+
+        # Perform fuzzy matching
+        matches = process.extract(search_term, all_files, limit=max_results)
+
+        # Filter by threshold
+        filtered_matches = [(path, score) for path, score in matches if score >= threshold]
+
+        # Build info panel content
+        info_sections = []
+
+        # Search parameters section
+        params_section = [
+            "## Search Parameters",
+            f"**Search Term**: `{search_term}`",
+            f"**Directory**: `{repo_path}`",
+            f"**Threshold**: {threshold}",
+            f"**Max Results**: {max_results}",
+            f"**Include Hidden Files**: {include_hidden}",
+        ]
+        if include_paths:
+            params_section.append("\n**Include Patterns**:")
+            for pattern in include_paths:
+                params_section.append(f"- `{pattern}`")
+        if exclude_patterns:
+            params_section.append("\n**Exclude Patterns**:")
+            for pattern in exclude_patterns:
+                params_section.append(f"- `{pattern}`")
+        info_sections.append("\n".join(params_section))
+
+        # Results statistics section
+        stats_section = [
+            "## Results Statistics",
+            f"**Total Files Scanned**: {len(all_files)}",
+            f"**Matches Found**: {len(filtered_matches)}",
+        ]
+        info_sections.append("\n".join(stats_section))
+
+        # Top results section
+        if filtered_matches:
+            results_section = ["## Top Matches"]
+            for path, score in filtered_matches[:5]:  # Show top 5 matches
+                results_section.append(f"- `{path}` (score: {score})")
+            info_sections.append("\n".join(results_section))
+        else:
+            info_sections.append("## Results\n*No matches found*")
+
+        # Display the panel
+        console.print(
+            Panel(
+                Markdown("\n\n".join(info_sections)),
+                title="🔍 Fuzzy Find Results",
+                border_style="bright_blue",
+            )
+        )
+
+        return filtered_matches
+        
+    except FileListerError as e:
+        console.print(f"[bold red]Error listing files: {e}[/bold red]")
+        return []
--- a/ra_aid/tools/memory.py
+++ b/ra_aid/tools/memory.py
@ -536,6 +536,10 @@ def log_work_event(event: str) -> str:

 def is_binary_file(filepath):
    """Check if a file is binary using magic library if available."""
+    # First check if file is empty
+    if os.path.getsize(filepath) == 0:
+        return False  # Empty files are not binary
+        
    if magic:
        try:
            mime = magic.from_file(filepath, mime=True)
@ -565,6 +569,10 @@ def is_binary_file(filepath):
 def _is_binary_fallback(filepath):
    """Fallback method to detect binary files without using magic."""
    try:
+        # First check if file is empty
+        if os.path.getsize(filepath) == 0:
+            return False  # Empty files are not binary
+            
        with open(filepath, "r", encoding="utf-8") as f:
            chunk = f.read(1024)

--- a/ra_aid/tools/read_file.py
+++ b/ra_aid/tools/read_file.py
@ -8,6 +8,7 @@ from rich.console import Console
 from rich.panel import Panel

 from ra_aid.text.processing import truncate_output
+from ra_aid.tools.memory import is_binary_file

 console = Console()

@ -22,12 +23,25 @@ def read_file_tool(filepath: str, encoding: str = "utf-8") -> Dict[str, str]:
    Args:
        filepath: Path to the file to read
        encoding: File encoding to use (default: utf-8)
+    
+    DO NOT ATTEMPT TO READ BINARY FILES
    """
    start_time = time.time()
    try:
        if not os.path.exists(filepath):
            raise FileNotFoundError(f"File not found: {filepath}")

+        # Check if the file is binary
+        if is_binary_file(filepath):
+            console.print(
+                Panel(
+                    f"Cannot read binary file: {filepath}",
+                    title="⚠️ Binary File Detected",
+                    border_style="bright_red",
+                )
+            )
+            return {"error": "read_file failed because we cannot read binary files"}
+
        logging.debug(f"Starting to read file: {filepath}")
        content = []
        line_count = 0
--- a/tests/ra_aid/agent_backends/test_ciayn_tool_execution.py
+++ b/tests/ra_aid/agent_backends/test_ciayn_tool_execution.py
@ -0,0 +1,91 @@
+import pytest
+from unittest.mock import MagicMock, patch
+
+from langchain_core.messages import AIMessage
+
+from ra_aid.agent_backends.ciayn_agent import CiaynAgent
+from ra_aid.tools import fuzzy_find_project_files
+from ra_aid.exceptions import ToolExecutionError
+from ra_aid.file_listing import FileListerError
+
+
+def test_fuzzy_find_project_files_none_args_execution():
+    """Test that the CiaynAgent can correctly execute fuzzy_find_project_files 
+    with None arguments as seen in the failing case."""
+    
+    # Create a mock agent with the fuzzy_find_project_files tool
+    mock_model = MagicMock()
+    agent = CiaynAgent(
+        model=mock_model,
+        tools=[fuzzy_find_project_files],
+        max_history_messages=5
+    )
+    
+    # This is the exact function call from the error message
+    function_call = 'fuzzy_find_project_files(search_term="nonexistent_term", repo_path=".", threshold=60, max_results=10, include_paths=None, exclude_patterns=None)'
+    
+    # Mock the response from the LLM
+    mock_response = AIMessage(content=function_call)
+    
+    # Patch process.extract to return empty results for any search
+    with patch('ra_aid.tools.fuzzy_find.process.extract', return_value=[]):
+        result = agent._execute_tool(mock_response)
+        assert result == []
+
+
+def test_error_handling_with_nonexistent_path():
+    """Test that we handle errors gracefully with nonexistent paths."""
+    
+    # Create a mock agent with the fuzzy_find_project_files tool
+    mock_model = MagicMock()
+    agent = CiaynAgent(
+        model=mock_model,
+        tools=[fuzzy_find_project_files],
+        max_history_messages=5
+    )
+    
+    function_call = 'fuzzy_find_project_files(search_term="test", repo_path="/nonexistent/path", threshold=60, max_results=10)'
+    
+    # Mock the response from the LLM
+    mock_response = AIMessage(content=function_call)
+    
+    # Patch get_all_project_files to raise a FileListerError
+    with patch('ra_aid.file_listing.get_all_project_files', side_effect=FileListerError("Directory not found")):
+        # The function should now return an empty list and log the error rather than raising an exception
+        result = agent._execute_tool(mock_response)
+        assert result == []
+
+
+def test_fallback_not_needed_for_fuzzy_find():
+    """Test that fallback handling is not needed for fuzzy_find_project_files 
+    since it now handles errors gracefully."""
+    
+    # Create a mock agent with the fuzzy_find_project_files tool
+    mock_model = MagicMock()
+    
+    # Create a predefined response for the model.invoke
+    function_call = 'fuzzy_find_project_files(search_term="bullet physics", repo_path="/nonexistent/path", threshold=60, max_results=10, include_paths=None, exclude_patterns=None)'
+    mock_model.invoke.return_value = AIMessage(content=function_call)
+    
+    # Create the agent with fallback enabled
+    agent = CiaynAgent(
+        model=mock_model,
+        tools=[fuzzy_find_project_files],
+        max_history_messages=5,
+        config={"experimental_fallback_handler": True}
+    )
+    
+    # Mock the fallback handler methods
+    agent.fallback_handler.handle_failure = MagicMock()
+    agent.handle_fallback_response = MagicMock()
+    
+    # Patch get_all_project_files to raise a FileListerError
+    with patch('ra_aid.file_listing.get_all_project_files', side_effect=FileListerError("Directory not found")):
+        # Call _execute_tool directly, it should not raise an exception
+        result = agent._execute_tool(mock_model.invoke.return_value)
+        
+        # Verify the result is an empty list
+        assert result == []
+        
+        # Verify that fallback_handler was not called since no exception was raised
+        agent.fallback_handler.handle_failure.assert_not_called()
--- a/tests/ra_aid/agent_backends/test_ciayn_tool_validation.py
+++ b/tests/ra_aid/agent_backends/test_ciayn_tool_validation.py
@ -0,0 +1,34 @@
+import pytest
+from ra_aid.agent_backends.ciayn_agent import validate_function_call_pattern
+
+def test_fuzzy_find_validation():
+    # This is the exact function call from the error message
+    function_call = 'fuzzy_find_project_files(search_term="bullet physics", repo_path=".", threshold=60, max_results=10, include_paths=None, exclude_patterns=None)'
+    
+    # The validate_function_call_pattern should return False for valid function calls
+    # (False means "not invalid" in this function's logic)
+    assert validate_function_call_pattern(function_call) is False, "The fuzzy_find_project_files call should be considered valid"
+
+def test_validate_function_call_pattern_with_none_args():
+    # Test with None as arguments for various parameter types
+    valid_calls = [
+        'some_function(arg1="test", arg2=None)',
+        'some_function(arg1=None, arg2=123)',
+        'some_function(arg1=None, arg2=None, arg3="text")',
+        'fuzzy_find_project_files(search_term="bullet physics", repo_path=".", threshold=60, max_results=10, include_paths=None, exclude_patterns=None)',
+    ]
+    
+    for call in valid_calls:
+        assert validate_function_call_pattern(call) is False, f"Call should be valid: {call}"
+
+def test_validate_function_call_pattern_invalid_syntax():
+    # Test with invalid syntax
+    invalid_calls = [
+        'some_function(arg1="test"',  # Missing closing parenthesis
+        'some_function arg1="test")',  # Missing opening parenthesis
+        'some_function("test") another_function()',  # Multiple function calls
+        '= some_function(arg1="test")',  # Invalid start
+    ]
+    
+    for call in invalid_calls:
+        assert validate_function_call_pattern(call) is True, f"Call should be invalid: {call}"
--- a/tests/ra_aid/tools/test_fuzzy_find.py
+++ b/tests/ra_aid/tools/test_fuzzy_find.py
@ -29,6 +29,30 @@ def git_repo(tmp_path):
    return tmp_path


+@pytest.fixture
+def non_git_repo(tmp_path):
+    """Create a temporary directory with some test files but not a git repository"""
+    # Create some files
+    (tmp_path / "main.py").write_text("print('hello')")
+    (tmp_path / "test_main.py").write_text("def test_main(): pass")
+    (tmp_path / "lib").mkdir()
+    (tmp_path / "lib/utils.py").write_text("def util(): pass")
+    (tmp_path / "lib/__pycache__").mkdir()
+    (tmp_path / "lib/__pycache__/utils.cpython-39.pyc").write_text("cache")
+    
+    # Create some additional files
+    (tmp_path / "data.txt").write_text("some data")
+    (tmp_path / "config.py").write_text("CONFIG = {'key': 'value'}")
+    
+    # Create hidden files/directories that should be excluded by default
+    (tmp_path / ".venv").mkdir()
+    (tmp_path / ".venv/lib").mkdir()
+    (tmp_path / ".venv/lib/python3.9").mkdir()
+    (tmp_path / ".hidden_file.txt").write_text("hidden content")
+    
+    return tmp_path
+
+
 def test_basic_fuzzy_search(git_repo):
    """Test basic fuzzy matching functionality"""
    results = fuzzy_find_project_files.invoke(
@ -91,12 +115,30 @@ def test_invalid_threshold():
        fuzzy_find_project_files.invoke({"search_term": "test", "threshold": 101})


-def test_non_git_repo(tmp_path):
-    """Test error handling outside git repo"""
-    with pytest.raises(InvalidGitRepositoryError):
-        fuzzy_find_project_files.invoke(
-            {"search_term": "test", "repo_path": str(tmp_path)}
-        )
+def test_non_git_repo(non_git_repo):
+    """Test fuzzy find works in non-git directories"""
+    # Now the function should work with non-git repositories
+    results = fuzzy_find_project_files.invoke(
+        {"search_term": "main", "repo_path": str(non_git_repo)}
+    )
+    assert len(results) >= 1
+    assert any("main.py" in match[0] for match in results)
+
+
+def test_hidden_files_inclusion(non_git_repo):
+    """Test include_hidden parameter works correctly"""
+    # Without include_hidden parameter (default False)
+    results_without_hidden = fuzzy_find_project_files.invoke(
+        {"search_term": "hidden", "repo_path": str(non_git_repo)}
+    )
+    assert len(results_without_hidden) == 0
+    
+    # With include_hidden=True
+    results_with_hidden = fuzzy_find_project_files.invoke(
+        {"search_term": "hidden", "repo_path": str(non_git_repo), "include_hidden": True}
+    )
+    assert len(results_with_hidden) >= 1
+    assert any(".hidden_file.txt" in match[0] for match in results_with_hidden)


 def test_exact_match(git_repo):
@ -131,3 +173,27 @@ def test_no_matches(git_repo):
        {"search_term": "nonexistentfile", "threshold": 80, "repo_path": str(git_repo)}
    )
    assert len(results) == 0
+
+
+def test_excluding_system_dirs(non_git_repo):
+    """Test that system directories are excluded by default"""
+    # Create files in directories that should be excluded by default
+    (non_git_repo / "__pycache__").mkdir(exist_ok=True)
+    (non_git_repo / "__pycache__/module.cpython-39.pyc").write_text("cache data")
+    (non_git_repo / ".ra-aid").mkdir(exist_ok=True)
+    (non_git_repo / ".ra-aid/config.json").write_text('{"setting": "value"}')
+    
+    # Run search for files that should be excluded
+    results = fuzzy_find_project_files.invoke(
+        {"search_term": "config", "repo_path": str(non_git_repo)}
+    )
+    
+    # Should find config.py but not .ra-aid/config.json
+    assert any("config.py" in match[0] for match in results)
+    assert not any(".ra-aid/config.json" in match[0] for match in results)
+    
+    # Similarly for __pycache__
+    results_cache = fuzzy_find_project_files.invoke(
+        {"search_term": "module", "repo_path": str(non_git_repo)}
+    )
+    assert len(results_cache) == 0  # Should not find __pycache__ files
--- a/tests/ra_aid/tools/test_read_file.py
+++ b/tests/ra_aid/tools/test_read_file.py
@ -74,3 +74,19 @@ def test_empty_file(tmp_path):
    assert isinstance(result, dict)
    assert "content" in result
    assert result["content"] == ""
+
+
+def test_binary_file_detection(tmp_path):
+    """Test that binary files are detected and not read"""
+    # Create a binary file with null bytes
+    test_file = tmp_path / "binary.bin"
+    with open(test_file, "wb") as f:
+        f.write(b"Some text with \x00 null bytes \x00 to make it binary")
+
+    # Try to read the binary file
+    result = read_file_tool.invoke({"filepath": str(test_file)})
+
+    # Verify that the tool detected it as a binary file
+    assert isinstance(result, dict)
+    assert "error" in result
+    assert "read_file failed because we cannot read binary files" == result["error"]