From 9cae0ef1fc07c8f42f68122c742962ee8b722b05 Mon Sep 17 00:00:00 2001 From: AI Christianson Date: Tue, 4 Mar 2025 00:49:48 -0500 Subject: [PATCH] project file and ciayn improvements --- ra_aid/file_listing.py | 192 +++++++++++------- ra_aid/project_state.py | 2 +- ra_aid/prompts/ciayn_prompts.py | 8 +- ra_aid/tools/fuzzy_find.py | 167 +++++++-------- ra_aid/tools/memory.py | 8 + ra_aid/tools/read_file.py | 14 ++ .../test_ciayn_tool_execution.py | 91 +++++++++ .../test_ciayn_tool_validation.py | 34 ++++ tests/ra_aid/tools/test_fuzzy_find.py | 78 ++++++- tests/ra_aid/tools/test_read_file.py | 16 ++ 10 files changed, 437 insertions(+), 173 deletions(-) create mode 100644 tests/ra_aid/agent_backends/test_ciayn_tool_execution.py create mode 100644 tests/ra_aid/agent_backends/test_ciayn_tool_validation.py diff --git a/ra_aid/file_listing.py b/ra_aid/file_listing.py index 9b5dfef..1aeeb50 100644 --- a/ra_aid/file_listing.py +++ b/ra_aid/file_listing.py @@ -4,6 +4,7 @@ import os import subprocess from pathlib import Path from typing import List, Optional, Tuple +import fnmatch class FileListerError(Exception): @@ -70,6 +71,117 @@ def is_git_repo(directory: str) -> bool: raise FileListerError(f"Error checking git repository: {e}") +def get_all_project_files( + directory: str, include_hidden: bool = False, exclude_patterns: Optional[List[str]] = None +) -> List[str]: + """ + Get a list of all files in a project directory, handling both git and non-git repositories. + + Args: + directory: Path to the directory + include_hidden: Whether to include hidden files (starting with .) in the results + exclude_patterns: Optional list of patterns to exclude from the results + + Returns: + List[str]: List of file paths relative to the directory + + Raises: + DirectoryNotFoundError: If directory does not exist + DirectoryAccessError: If directory cannot be accessed + GitCommandError: If git command fails + FileListerError: For other unexpected errors + """ + # Check if directory exists and is accessible + if not os.path.exists(directory): + raise DirectoryNotFoundError(f"Directory not found: {directory}") + if not os.path.isdir(directory): + raise DirectoryNotFoundError(f"Not a directory: {directory}") + + # Default excluded directories + excluded_dirs = {'.ra-aid', '.venv', '.git', '.aider', '__pycache__'} + + # Check if it's a git repository + try: + is_git = is_git_repo(directory) + except FileListerError: + # If checking fails, default to non-git approach + is_git = False + + all_files = [] + + if is_git: + # Get list of files from git ls-files + try: + # Get both tracked and untracked files + tracked_files_process = subprocess.run( + ["git", "ls-files"], + cwd=directory, + capture_output=True, + text=True, + check=True, + ) + untracked_files_process = subprocess.run( + ["git", "ls-files", "--others", "--exclude-standard"], + cwd=directory, + capture_output=True, + text=True, + check=True, + ) + except subprocess.CalledProcessError as e: + raise GitCommandError(f"Git command failed: {e}") + except PermissionError as e: + raise DirectoryAccessError(f"Permission denied: {e}") + + # Combine and process the files + for file in ( + tracked_files_process.stdout.splitlines() + + untracked_files_process.stdout.splitlines() + ): + file = file.strip() + if not file: + continue + # Skip hidden files unless explicitly included + if not include_hidden and ( + file.startswith(".") + or any(part.startswith(".") for part in file.split("/")) + ): + continue + # Skip .aider files + if ".aider" in file: + continue + all_files.append(file) + else: + # Not a git repository, use manual file listing + base_path = Path(directory) + + for root, dirs, files in os.walk(directory): + # Filter out excluded directories + dirs[:] = [d for d in dirs if d not in excluded_dirs and (include_hidden or not d.startswith('.'))] + + # Calculate relative path + rel_root = os.path.relpath(root, directory) + if rel_root == '.': + rel_root = '' + + # Process files + for file in files: + # Skip hidden files unless explicitly included + if not include_hidden and file.startswith('.'): + continue + + # Create relative path + rel_path = os.path.join(rel_root, file) if rel_root else file + all_files.append(rel_path) + + # Apply additional exclude patterns if specified + if exclude_patterns: + for pattern in exclude_patterns: + all_files = [f for f in all_files if not fnmatch.fnmatch(f, pattern)] + + # Remove duplicates and sort + return sorted(set(all_files)) + + def get_file_listing( directory: str, limit: Optional[int] = None, include_hidden: bool = False ) -> Tuple[List[str], int]: @@ -98,84 +210,10 @@ def get_file_listing( FileListerError: For other unexpected errors """ try: - # Check if directory exists and is accessible - if not os.path.exists(directory): - raise DirectoryNotFoundError(f"Directory not found: {directory}") - if not os.path.isdir(directory): - raise DirectoryNotFoundError(f"Not a directory: {directory}") - - # Check if it's a git repository - is_git = is_git_repo(directory) + # Use the common function to get all files + all_files = get_all_project_files(directory, include_hidden) - all_files = [] - - if is_git: - # Get list of files from git ls-files - try: - # Get both tracked and untracked files - tracked_files_process = subprocess.run( - ["git", "ls-files"], - cwd=directory, - capture_output=True, - text=True, - check=True, - ) - untracked_files_process = subprocess.run( - ["git", "ls-files", "--others", "--exclude-standard"], - cwd=directory, - capture_output=True, - text=True, - check=True, - ) - except subprocess.CalledProcessError as e: - raise GitCommandError(f"Git command failed: {e}") - except PermissionError as e: - raise DirectoryAccessError(f"Permission denied: {e}") - - # Combine and process the files - for file in ( - tracked_files_process.stdout.splitlines() - + untracked_files_process.stdout.splitlines() - ): - file = file.strip() - if not file: - continue - # Skip hidden files unless explicitly included - if not include_hidden and ( - file.startswith(".") - or any(part.startswith(".") for part in file.split("/")) - ): - continue - # Skip .aider files - if ".aider" in file: - continue - all_files.append(file) - else: - # Not a git repository, use manual file listing - base_path = Path(directory) - excluded_dirs = {'.ra-aid', '.venv', '.git', '.aider', '__pycache__'} - - for root, dirs, files in os.walk(directory): - # Filter out excluded directories - dirs[:] = [d for d in dirs if d not in excluded_dirs and (include_hidden or not d.startswith('.'))] - - # Calculate relative path - rel_root = os.path.relpath(root, directory) - if rel_root == '.': - rel_root = '' - - # Process files - for file in files: - # Skip hidden files unless explicitly included - if not include_hidden and file.startswith('.'): - continue - - # Create relative path - rel_path = os.path.join(rel_root, file) if rel_root else file - all_files.append(rel_path) - - # Remove duplicates and sort - all_files = sorted(set(all_files)) + # Get total count before truncation total_count = len(all_files) # Apply limit if specified diff --git a/ra_aid/project_state.py b/ra_aid/project_state.py index 9f62fd5..b71ac1e 100644 --- a/ra_aid/project_state.py +++ b/ra_aid/project_state.py @@ -50,7 +50,7 @@ def is_new_project(directory: str) -> bool: raise DirectoryNotFoundError(f"Path is not a directory: {directory}") # Get all files/dirs in the directory, excluding allowed items - _allowed_items: Set[str] = {".git", ".gitignore", ".ra-aid"} + _allowed_items: Set[str] = {".git", ".gitignore", ".ra-aid", ".venv"} try: contents = set() for item in path.iterdir(): diff --git a/ra_aid/prompts/ciayn_prompts.py b/ra_aid/prompts/ciayn_prompts.py index cd7589f..aca6c75 100644 --- a/ra_aid/prompts/ciayn_prompts.py +++ b/ra_aid/prompts/ciayn_prompts.py @@ -27,7 +27,7 @@ The user cannot see the results of function calls, so you have to explicitly use YOU MUST ALWAYS RESPOND WITH A SINGLE LINE OF PYTHON THAT CALLS ONE OF THE AVAILABLE TOOLS. NEVER RETURN AN EMPTY MESSAGE. -NEVER RETURN PLAIN TEXT - ONLY RETURN A TOOL CALL. +NEVER RETURN PLAIN TEXT - ONLY RETURN A SINGLE TOOL CALL. IF UNSURE WHAT TO DO, JUST YEET IT AND CALL THE BEST FUNCTION YOU CAN THINK OF. Use as many steps as you need to in order to fully complete the task. @@ -74,12 +74,6 @@ You typically don't want to keep calling the same function over and over with th def example_function(): print("Hello world") \"\"\") - -- Example of INCORRECT format (DO NOT DO THIS): - put_complete_file_contents("/path/to/file.py", \"\"\" - def example_function(): - print("Hello world") - \"\"\") As an agent, you will carefully plan ahead, carefully analyze tool call responses, and adapt to circumstances in order to accomplish your goal. diff --git a/ra_aid/tools/fuzzy_find.py b/ra_aid/tools/fuzzy_find.py index 0b0deee..f1b50ec 100644 --- a/ra_aid/tools/fuzzy_find.py +++ b/ra_aid/tools/fuzzy_find.py @@ -2,12 +2,14 @@ import fnmatch from typing import List, Tuple from fuzzywuzzy import process -from git import Repo +from git import Repo, exc from langchain_core.tools import tool from rich.console import Console from rich.markdown import Markdown from rich.panel import Panel +from ra_aid.file_listing import get_all_project_files, FileListerError + console = Console() DEFAULT_EXCLUDE_PATTERNS = [ @@ -29,27 +31,29 @@ def fuzzy_find_project_files( max_results: int = 10, include_paths: List[str] = None, exclude_patterns: List[str] = None, + include_hidden: bool = False, ) -> List[Tuple[str, int]]: - """Fuzzy find files in a git repository matching the search term. + """Fuzzy find files in a project matching the search term. - This tool searches for files within a git repository using fuzzy string matching, + This tool searches for files within a project directory using fuzzy string matching, allowing for approximate matches to the search term. It returns a list of matched - files along with their match scores. + files along with their match scores. Works with both git and non-git repositories. Args: search_term: String to match against file paths - repo_path: Path to git repository (defaults to current directory) + repo_path: Path to project directory (defaults to current directory) threshold: Minimum similarity score (0-100) for matches (default: 60) max_results: Maximum number of results to return (default: 10) include_paths: Optional list of path patterns to include in search exclude_patterns: Optional list of path patterns to exclude from search + include_hidden: Whether to include hidden files in search (default: False) Returns: List of tuples containing (file_path, match_score) Raises: - InvalidGitRepositoryError: If repo_path is not a git repository ValueError: If threshold is not between 0 and 100 + FileListerError: If there's an error accessing or listing files """ # Validate threshold if not 0 <= threshold <= 100: @@ -59,81 +63,80 @@ def fuzzy_find_project_files( if not search_term: return [] - # Initialize repo for normal search - repo = Repo(repo_path) - - # Get all tracked files - tracked_files = repo.git.ls_files().splitlines() - - # Get all untracked files - untracked_files = repo.untracked_files - - # Combine file lists - all_files = tracked_files + untracked_files - - # Apply include patterns if specified - if include_paths: - filtered_files = [] - for pattern in include_paths: - filtered_files.extend(f for f in all_files if fnmatch.fnmatch(f, pattern)) - all_files = filtered_files - - # Apply exclude patterns - patterns = DEFAULT_EXCLUDE_PATTERNS + (exclude_patterns or []) - for pattern in patterns: - all_files = [f for f in all_files if not fnmatch.fnmatch(f, pattern)] - - # Perform fuzzy matching - matches = process.extract(search_term, all_files, limit=max_results) - - # Filter by threshold - filtered_matches = [(path, score) for path, score in matches if score >= threshold] - - # Build info panel content - info_sections = [] - - # Search parameters section - params_section = [ - "## Search Parameters", - f"**Search Term**: `{search_term}`", - f"**Repository**: `{repo_path}`", - f"**Threshold**: {threshold}", - f"**Max Results**: {max_results}", - ] - if include_paths: - params_section.append("\n**Include Patterns**:") - for pattern in include_paths: - params_section.append(f"- `{pattern}`") - if exclude_patterns: - params_section.append("\n**Exclude Patterns**:") - for pattern in exclude_patterns: - params_section.append(f"- `{pattern}`") - info_sections.append("\n".join(params_section)) - - # Results statistics section - stats_section = [ - "## Results Statistics", - f"**Total Files Scanned**: {len(all_files)}", - f"**Matches Found**: {len(filtered_matches)}", - ] - info_sections.append("\n".join(stats_section)) - - # Top results section - if filtered_matches: - results_section = ["## Top Matches"] - for path, score in filtered_matches[:5]: # Show top 5 matches - results_section.append(f"- `{path}` (score: {score})") - info_sections.append("\n".join(results_section)) - else: - info_sections.append("## Results\n*No matches found*") - - # Display the panel - console.print( - Panel( - Markdown("\n\n".join(info_sections)), - title="🔍 Fuzzy Find Results", - border_style="bright_blue", + # Combine default and user-provided exclude patterns + all_exclude_patterns = DEFAULT_EXCLUDE_PATTERNS + (exclude_patterns or []) + + try: + # Get all project files using the common utility function + all_files = get_all_project_files( + repo_path, + include_hidden=include_hidden, + exclude_patterns=all_exclude_patterns ) - ) + + # Apply include patterns if specified + if include_paths: + filtered_files = [] + for pattern in include_paths: + filtered_files.extend(f for f in all_files if fnmatch.fnmatch(f, pattern)) + all_files = filtered_files - return filtered_matches + # Perform fuzzy matching + matches = process.extract(search_term, all_files, limit=max_results) + + # Filter by threshold + filtered_matches = [(path, score) for path, score in matches if score >= threshold] + + # Build info panel content + info_sections = [] + + # Search parameters section + params_section = [ + "## Search Parameters", + f"**Search Term**: `{search_term}`", + f"**Directory**: `{repo_path}`", + f"**Threshold**: {threshold}", + f"**Max Results**: {max_results}", + f"**Include Hidden Files**: {include_hidden}", + ] + if include_paths: + params_section.append("\n**Include Patterns**:") + for pattern in include_paths: + params_section.append(f"- `{pattern}`") + if exclude_patterns: + params_section.append("\n**Exclude Patterns**:") + for pattern in exclude_patterns: + params_section.append(f"- `{pattern}`") + info_sections.append("\n".join(params_section)) + + # Results statistics section + stats_section = [ + "## Results Statistics", + f"**Total Files Scanned**: {len(all_files)}", + f"**Matches Found**: {len(filtered_matches)}", + ] + info_sections.append("\n".join(stats_section)) + + # Top results section + if filtered_matches: + results_section = ["## Top Matches"] + for path, score in filtered_matches[:5]: # Show top 5 matches + results_section.append(f"- `{path}` (score: {score})") + info_sections.append("\n".join(results_section)) + else: + info_sections.append("## Results\n*No matches found*") + + # Display the panel + console.print( + Panel( + Markdown("\n\n".join(info_sections)), + title="🔍 Fuzzy Find Results", + border_style="bright_blue", + ) + ) + + return filtered_matches + + except FileListerError as e: + console.print(f"[bold red]Error listing files: {e}[/bold red]") + return [] diff --git a/ra_aid/tools/memory.py b/ra_aid/tools/memory.py index c729606..c316e04 100644 --- a/ra_aid/tools/memory.py +++ b/ra_aid/tools/memory.py @@ -536,6 +536,10 @@ def log_work_event(event: str) -> str: def is_binary_file(filepath): """Check if a file is binary using magic library if available.""" + # First check if file is empty + if os.path.getsize(filepath) == 0: + return False # Empty files are not binary + if magic: try: mime = magic.from_file(filepath, mime=True) @@ -565,6 +569,10 @@ def is_binary_file(filepath): def _is_binary_fallback(filepath): """Fallback method to detect binary files without using magic.""" try: + # First check if file is empty + if os.path.getsize(filepath) == 0: + return False # Empty files are not binary + with open(filepath, "r", encoding="utf-8") as f: chunk = f.read(1024) diff --git a/ra_aid/tools/read_file.py b/ra_aid/tools/read_file.py index 564bc79..66207e9 100644 --- a/ra_aid/tools/read_file.py +++ b/ra_aid/tools/read_file.py @@ -8,6 +8,7 @@ from rich.console import Console from rich.panel import Panel from ra_aid.text.processing import truncate_output +from ra_aid.tools.memory import is_binary_file console = Console() @@ -22,12 +23,25 @@ def read_file_tool(filepath: str, encoding: str = "utf-8") -> Dict[str, str]: Args: filepath: Path to the file to read encoding: File encoding to use (default: utf-8) + + DO NOT ATTEMPT TO READ BINARY FILES """ start_time = time.time() try: if not os.path.exists(filepath): raise FileNotFoundError(f"File not found: {filepath}") + # Check if the file is binary + if is_binary_file(filepath): + console.print( + Panel( + f"Cannot read binary file: {filepath}", + title="⚠️ Binary File Detected", + border_style="bright_red", + ) + ) + return {"error": "read_file failed because we cannot read binary files"} + logging.debug(f"Starting to read file: {filepath}") content = [] line_count = 0 diff --git a/tests/ra_aid/agent_backends/test_ciayn_tool_execution.py b/tests/ra_aid/agent_backends/test_ciayn_tool_execution.py new file mode 100644 index 0000000..d424329 --- /dev/null +++ b/tests/ra_aid/agent_backends/test_ciayn_tool_execution.py @@ -0,0 +1,91 @@ +import pytest +from unittest.mock import MagicMock, patch + +from langchain_core.messages import AIMessage + +from ra_aid.agent_backends.ciayn_agent import CiaynAgent +from ra_aid.tools import fuzzy_find_project_files +from ra_aid.exceptions import ToolExecutionError +from ra_aid.file_listing import FileListerError + + +def test_fuzzy_find_project_files_none_args_execution(): + """Test that the CiaynAgent can correctly execute fuzzy_find_project_files + with None arguments as seen in the failing case.""" + + # Create a mock agent with the fuzzy_find_project_files tool + mock_model = MagicMock() + agent = CiaynAgent( + model=mock_model, + tools=[fuzzy_find_project_files], + max_history_messages=5 + ) + + # This is the exact function call from the error message + function_call = 'fuzzy_find_project_files(search_term="nonexistent_term", repo_path=".", threshold=60, max_results=10, include_paths=None, exclude_patterns=None)' + + # Mock the response from the LLM + mock_response = AIMessage(content=function_call) + + # Patch process.extract to return empty results for any search + with patch('ra_aid.tools.fuzzy_find.process.extract', return_value=[]): + result = agent._execute_tool(mock_response) + assert result == [] + + +def test_error_handling_with_nonexistent_path(): + """Test that we handle errors gracefully with nonexistent paths.""" + + # Create a mock agent with the fuzzy_find_project_files tool + mock_model = MagicMock() + agent = CiaynAgent( + model=mock_model, + tools=[fuzzy_find_project_files], + max_history_messages=5 + ) + + function_call = 'fuzzy_find_project_files(search_term="test", repo_path="/nonexistent/path", threshold=60, max_results=10)' + + # Mock the response from the LLM + mock_response = AIMessage(content=function_call) + + # Patch get_all_project_files to raise a FileListerError + with patch('ra_aid.file_listing.get_all_project_files', side_effect=FileListerError("Directory not found")): + # The function should now return an empty list and log the error rather than raising an exception + result = agent._execute_tool(mock_response) + assert result == [] + + +def test_fallback_not_needed_for_fuzzy_find(): + """Test that fallback handling is not needed for fuzzy_find_project_files + since it now handles errors gracefully.""" + + # Create a mock agent with the fuzzy_find_project_files tool + mock_model = MagicMock() + + # Create a predefined response for the model.invoke + function_call = 'fuzzy_find_project_files(search_term="bullet physics", repo_path="/nonexistent/path", threshold=60, max_results=10, include_paths=None, exclude_patterns=None)' + mock_model.invoke.return_value = AIMessage(content=function_call) + + # Create the agent with fallback enabled + agent = CiaynAgent( + model=mock_model, + tools=[fuzzy_find_project_files], + max_history_messages=5, + config={"experimental_fallback_handler": True} + ) + + # Mock the fallback handler methods + agent.fallback_handler.handle_failure = MagicMock() + agent.handle_fallback_response = MagicMock() + + # Patch get_all_project_files to raise a FileListerError + with patch('ra_aid.file_listing.get_all_project_files', side_effect=FileListerError("Directory not found")): + # Call _execute_tool directly, it should not raise an exception + result = agent._execute_tool(mock_model.invoke.return_value) + + # Verify the result is an empty list + assert result == [] + + # Verify that fallback_handler was not called since no exception was raised + agent.fallback_handler.handle_failure.assert_not_called() diff --git a/tests/ra_aid/agent_backends/test_ciayn_tool_validation.py b/tests/ra_aid/agent_backends/test_ciayn_tool_validation.py new file mode 100644 index 0000000..3d6c3f8 --- /dev/null +++ b/tests/ra_aid/agent_backends/test_ciayn_tool_validation.py @@ -0,0 +1,34 @@ +import pytest +from ra_aid.agent_backends.ciayn_agent import validate_function_call_pattern + +def test_fuzzy_find_validation(): + # This is the exact function call from the error message + function_call = 'fuzzy_find_project_files(search_term="bullet physics", repo_path=".", threshold=60, max_results=10, include_paths=None, exclude_patterns=None)' + + # The validate_function_call_pattern should return False for valid function calls + # (False means "not invalid" in this function's logic) + assert validate_function_call_pattern(function_call) is False, "The fuzzy_find_project_files call should be considered valid" + +def test_validate_function_call_pattern_with_none_args(): + # Test with None as arguments for various parameter types + valid_calls = [ + 'some_function(arg1="test", arg2=None)', + 'some_function(arg1=None, arg2=123)', + 'some_function(arg1=None, arg2=None, arg3="text")', + 'fuzzy_find_project_files(search_term="bullet physics", repo_path=".", threshold=60, max_results=10, include_paths=None, exclude_patterns=None)', + ] + + for call in valid_calls: + assert validate_function_call_pattern(call) is False, f"Call should be valid: {call}" + +def test_validate_function_call_pattern_invalid_syntax(): + # Test with invalid syntax + invalid_calls = [ + 'some_function(arg1="test"', # Missing closing parenthesis + 'some_function arg1="test")', # Missing opening parenthesis + 'some_function("test") another_function()', # Multiple function calls + '= some_function(arg1="test")', # Invalid start + ] + + for call in invalid_calls: + assert validate_function_call_pattern(call) is True, f"Call should be invalid: {call}" diff --git a/tests/ra_aid/tools/test_fuzzy_find.py b/tests/ra_aid/tools/test_fuzzy_find.py index b3c99d8..043e200 100644 --- a/tests/ra_aid/tools/test_fuzzy_find.py +++ b/tests/ra_aid/tools/test_fuzzy_find.py @@ -29,6 +29,30 @@ def git_repo(tmp_path): return tmp_path +@pytest.fixture +def non_git_repo(tmp_path): + """Create a temporary directory with some test files but not a git repository""" + # Create some files + (tmp_path / "main.py").write_text("print('hello')") + (tmp_path / "test_main.py").write_text("def test_main(): pass") + (tmp_path / "lib").mkdir() + (tmp_path / "lib/utils.py").write_text("def util(): pass") + (tmp_path / "lib/__pycache__").mkdir() + (tmp_path / "lib/__pycache__/utils.cpython-39.pyc").write_text("cache") + + # Create some additional files + (tmp_path / "data.txt").write_text("some data") + (tmp_path / "config.py").write_text("CONFIG = {'key': 'value'}") + + # Create hidden files/directories that should be excluded by default + (tmp_path / ".venv").mkdir() + (tmp_path / ".venv/lib").mkdir() + (tmp_path / ".venv/lib/python3.9").mkdir() + (tmp_path / ".hidden_file.txt").write_text("hidden content") + + return tmp_path + + def test_basic_fuzzy_search(git_repo): """Test basic fuzzy matching functionality""" results = fuzzy_find_project_files.invoke( @@ -91,12 +115,30 @@ def test_invalid_threshold(): fuzzy_find_project_files.invoke({"search_term": "test", "threshold": 101}) -def test_non_git_repo(tmp_path): - """Test error handling outside git repo""" - with pytest.raises(InvalidGitRepositoryError): - fuzzy_find_project_files.invoke( - {"search_term": "test", "repo_path": str(tmp_path)} - ) +def test_non_git_repo(non_git_repo): + """Test fuzzy find works in non-git directories""" + # Now the function should work with non-git repositories + results = fuzzy_find_project_files.invoke( + {"search_term": "main", "repo_path": str(non_git_repo)} + ) + assert len(results) >= 1 + assert any("main.py" in match[0] for match in results) + + +def test_hidden_files_inclusion(non_git_repo): + """Test include_hidden parameter works correctly""" + # Without include_hidden parameter (default False) + results_without_hidden = fuzzy_find_project_files.invoke( + {"search_term": "hidden", "repo_path": str(non_git_repo)} + ) + assert len(results_without_hidden) == 0 + + # With include_hidden=True + results_with_hidden = fuzzy_find_project_files.invoke( + {"search_term": "hidden", "repo_path": str(non_git_repo), "include_hidden": True} + ) + assert len(results_with_hidden) >= 1 + assert any(".hidden_file.txt" in match[0] for match in results_with_hidden) def test_exact_match(git_repo): @@ -131,3 +173,27 @@ def test_no_matches(git_repo): {"search_term": "nonexistentfile", "threshold": 80, "repo_path": str(git_repo)} ) assert len(results) == 0 + + +def test_excluding_system_dirs(non_git_repo): + """Test that system directories are excluded by default""" + # Create files in directories that should be excluded by default + (non_git_repo / "__pycache__").mkdir(exist_ok=True) + (non_git_repo / "__pycache__/module.cpython-39.pyc").write_text("cache data") + (non_git_repo / ".ra-aid").mkdir(exist_ok=True) + (non_git_repo / ".ra-aid/config.json").write_text('{"setting": "value"}') + + # Run search for files that should be excluded + results = fuzzy_find_project_files.invoke( + {"search_term": "config", "repo_path": str(non_git_repo)} + ) + + # Should find config.py but not .ra-aid/config.json + assert any("config.py" in match[0] for match in results) + assert not any(".ra-aid/config.json" in match[0] for match in results) + + # Similarly for __pycache__ + results_cache = fuzzy_find_project_files.invoke( + {"search_term": "module", "repo_path": str(non_git_repo)} + ) + assert len(results_cache) == 0 # Should not find __pycache__ files diff --git a/tests/ra_aid/tools/test_read_file.py b/tests/ra_aid/tools/test_read_file.py index c32b7fd..c9e79d7 100644 --- a/tests/ra_aid/tools/test_read_file.py +++ b/tests/ra_aid/tools/test_read_file.py @@ -74,3 +74,19 @@ def test_empty_file(tmp_path): assert isinstance(result, dict) assert "content" in result assert result["content"] == "" + + +def test_binary_file_detection(tmp_path): + """Test that binary files are detected and not read""" + # Create a binary file with null bytes + test_file = tmp_path / "binary.bin" + with open(test_file, "wb") as f: + f.write(b"Some text with \x00 null bytes \x00 to make it binary") + + # Try to read the binary file + result = read_file_tool.invoke({"filepath": str(test_file)}) + + # Verify that the tool detected it as a binary file + assert isinstance(result, dict) + assert "error" in result + assert "read_file failed because we cannot read binary files" == result["error"]