project file and ciayn improvements

This commit is contained in:
AI Christianson 2025-03-04 00:49:48 -05:00
parent bd02bffc55
commit 9cae0ef1fc
10 changed files with 437 additions and 173 deletions

View File

@ -4,6 +4,7 @@ import os
import subprocess import subprocess
from pathlib import Path from pathlib import Path
from typing import List, Optional, Tuple from typing import List, Optional, Tuple
import fnmatch
class FileListerError(Exception): class FileListerError(Exception):
@ -70,6 +71,117 @@ def is_git_repo(directory: str) -> bool:
raise FileListerError(f"Error checking git repository: {e}") raise FileListerError(f"Error checking git repository: {e}")
def get_all_project_files(
directory: str, include_hidden: bool = False, exclude_patterns: Optional[List[str]] = None
) -> List[str]:
"""
Get a list of all files in a project directory, handling both git and non-git repositories.
Args:
directory: Path to the directory
include_hidden: Whether to include hidden files (starting with .) in the results
exclude_patterns: Optional list of patterns to exclude from the results
Returns:
List[str]: List of file paths relative to the directory
Raises:
DirectoryNotFoundError: If directory does not exist
DirectoryAccessError: If directory cannot be accessed
GitCommandError: If git command fails
FileListerError: For other unexpected errors
"""
# Check if directory exists and is accessible
if not os.path.exists(directory):
raise DirectoryNotFoundError(f"Directory not found: {directory}")
if not os.path.isdir(directory):
raise DirectoryNotFoundError(f"Not a directory: {directory}")
# Default excluded directories
excluded_dirs = {'.ra-aid', '.venv', '.git', '.aider', '__pycache__'}
# Check if it's a git repository
try:
is_git = is_git_repo(directory)
except FileListerError:
# If checking fails, default to non-git approach
is_git = False
all_files = []
if is_git:
# Get list of files from git ls-files
try:
# Get both tracked and untracked files
tracked_files_process = subprocess.run(
["git", "ls-files"],
cwd=directory,
capture_output=True,
text=True,
check=True,
)
untracked_files_process = subprocess.run(
["git", "ls-files", "--others", "--exclude-standard"],
cwd=directory,
capture_output=True,
text=True,
check=True,
)
except subprocess.CalledProcessError as e:
raise GitCommandError(f"Git command failed: {e}")
except PermissionError as e:
raise DirectoryAccessError(f"Permission denied: {e}")
# Combine and process the files
for file in (
tracked_files_process.stdout.splitlines()
+ untracked_files_process.stdout.splitlines()
):
file = file.strip()
if not file:
continue
# Skip hidden files unless explicitly included
if not include_hidden and (
file.startswith(".")
or any(part.startswith(".") for part in file.split("/"))
):
continue
# Skip .aider files
if ".aider" in file:
continue
all_files.append(file)
else:
# Not a git repository, use manual file listing
base_path = Path(directory)
for root, dirs, files in os.walk(directory):
# Filter out excluded directories
dirs[:] = [d for d in dirs if d not in excluded_dirs and (include_hidden or not d.startswith('.'))]
# Calculate relative path
rel_root = os.path.relpath(root, directory)
if rel_root == '.':
rel_root = ''
# Process files
for file in files:
# Skip hidden files unless explicitly included
if not include_hidden and file.startswith('.'):
continue
# Create relative path
rel_path = os.path.join(rel_root, file) if rel_root else file
all_files.append(rel_path)
# Apply additional exclude patterns if specified
if exclude_patterns:
for pattern in exclude_patterns:
all_files = [f for f in all_files if not fnmatch.fnmatch(f, pattern)]
# Remove duplicates and sort
return sorted(set(all_files))
def get_file_listing( def get_file_listing(
directory: str, limit: Optional[int] = None, include_hidden: bool = False directory: str, limit: Optional[int] = None, include_hidden: bool = False
) -> Tuple[List[str], int]: ) -> Tuple[List[str], int]:
@ -98,84 +210,10 @@ def get_file_listing(
FileListerError: For other unexpected errors FileListerError: For other unexpected errors
""" """
try: try:
# Check if directory exists and is accessible # Use the common function to get all files
if not os.path.exists(directory): all_files = get_all_project_files(directory, include_hidden)
raise DirectoryNotFoundError(f"Directory not found: {directory}")
if not os.path.isdir(directory):
raise DirectoryNotFoundError(f"Not a directory: {directory}")
# Check if it's a git repository # Get total count before truncation
is_git = is_git_repo(directory)
all_files = []
if is_git:
# Get list of files from git ls-files
try:
# Get both tracked and untracked files
tracked_files_process = subprocess.run(
["git", "ls-files"],
cwd=directory,
capture_output=True,
text=True,
check=True,
)
untracked_files_process = subprocess.run(
["git", "ls-files", "--others", "--exclude-standard"],
cwd=directory,
capture_output=True,
text=True,
check=True,
)
except subprocess.CalledProcessError as e:
raise GitCommandError(f"Git command failed: {e}")
except PermissionError as e:
raise DirectoryAccessError(f"Permission denied: {e}")
# Combine and process the files
for file in (
tracked_files_process.stdout.splitlines()
+ untracked_files_process.stdout.splitlines()
):
file = file.strip()
if not file:
continue
# Skip hidden files unless explicitly included
if not include_hidden and (
file.startswith(".")
or any(part.startswith(".") for part in file.split("/"))
):
continue
# Skip .aider files
if ".aider" in file:
continue
all_files.append(file)
else:
# Not a git repository, use manual file listing
base_path = Path(directory)
excluded_dirs = {'.ra-aid', '.venv', '.git', '.aider', '__pycache__'}
for root, dirs, files in os.walk(directory):
# Filter out excluded directories
dirs[:] = [d for d in dirs if d not in excluded_dirs and (include_hidden or not d.startswith('.'))]
# Calculate relative path
rel_root = os.path.relpath(root, directory)
if rel_root == '.':
rel_root = ''
# Process files
for file in files:
# Skip hidden files unless explicitly included
if not include_hidden and file.startswith('.'):
continue
# Create relative path
rel_path = os.path.join(rel_root, file) if rel_root else file
all_files.append(rel_path)
# Remove duplicates and sort
all_files = sorted(set(all_files))
total_count = len(all_files) total_count = len(all_files)
# Apply limit if specified # Apply limit if specified

View File

@ -50,7 +50,7 @@ def is_new_project(directory: str) -> bool:
raise DirectoryNotFoundError(f"Path is not a directory: {directory}") raise DirectoryNotFoundError(f"Path is not a directory: {directory}")
# Get all files/dirs in the directory, excluding allowed items # Get all files/dirs in the directory, excluding allowed items
_allowed_items: Set[str] = {".git", ".gitignore", ".ra-aid"} _allowed_items: Set[str] = {".git", ".gitignore", ".ra-aid", ".venv"}
try: try:
contents = set() contents = set()
for item in path.iterdir(): for item in path.iterdir():

View File

@ -27,7 +27,7 @@ The user cannot see the results of function calls, so you have to explicitly use
YOU MUST ALWAYS RESPOND WITH A SINGLE LINE OF PYTHON THAT CALLS ONE OF THE AVAILABLE TOOLS. YOU MUST ALWAYS RESPOND WITH A SINGLE LINE OF PYTHON THAT CALLS ONE OF THE AVAILABLE TOOLS.
NEVER RETURN AN EMPTY MESSAGE. NEVER RETURN AN EMPTY MESSAGE.
NEVER RETURN PLAIN TEXT - ONLY RETURN A TOOL CALL. NEVER RETURN PLAIN TEXT - ONLY RETURN A SINGLE TOOL CALL.
IF UNSURE WHAT TO DO, JUST YEET IT AND CALL THE BEST FUNCTION YOU CAN THINK OF. IF UNSURE WHAT TO DO, JUST YEET IT AND CALL THE BEST FUNCTION YOU CAN THINK OF.
Use as many steps as you need to in order to fully complete the task. Use as many steps as you need to in order to fully complete the task.
@ -74,12 +74,6 @@ You typically don't want to keep calling the same function over and over with th
def example_function(): def example_function():
print("Hello world") print("Hello world")
\"\"\") \"\"\")
- Example of INCORRECT format (DO NOT DO THIS):
put_complete_file_contents("/path/to/file.py", \"\"\"
def example_function():
print("Hello world")
\"\"\")
</function call guidelines> </function call guidelines>
As an agent, you will carefully plan ahead, carefully analyze tool call responses, and adapt to circumstances in order to accomplish your goal. As an agent, you will carefully plan ahead, carefully analyze tool call responses, and adapt to circumstances in order to accomplish your goal.

View File

@ -2,12 +2,14 @@ import fnmatch
from typing import List, Tuple from typing import List, Tuple
from fuzzywuzzy import process from fuzzywuzzy import process
from git import Repo from git import Repo, exc
from langchain_core.tools import tool from langchain_core.tools import tool
from rich.console import Console from rich.console import Console
from rich.markdown import Markdown from rich.markdown import Markdown
from rich.panel import Panel from rich.panel import Panel
from ra_aid.file_listing import get_all_project_files, FileListerError
console = Console() console = Console()
DEFAULT_EXCLUDE_PATTERNS = [ DEFAULT_EXCLUDE_PATTERNS = [
@ -29,27 +31,29 @@ def fuzzy_find_project_files(
max_results: int = 10, max_results: int = 10,
include_paths: List[str] = None, include_paths: List[str] = None,
exclude_patterns: List[str] = None, exclude_patterns: List[str] = None,
include_hidden: bool = False,
) -> List[Tuple[str, int]]: ) -> List[Tuple[str, int]]:
"""Fuzzy find files in a git repository matching the search term. """Fuzzy find files in a project matching the search term.
This tool searches for files within a git repository using fuzzy string matching, This tool searches for files within a project directory using fuzzy string matching,
allowing for approximate matches to the search term. It returns a list of matched allowing for approximate matches to the search term. It returns a list of matched
files along with their match scores. files along with their match scores. Works with both git and non-git repositories.
Args: Args:
search_term: String to match against file paths search_term: String to match against file paths
repo_path: Path to git repository (defaults to current directory) repo_path: Path to project directory (defaults to current directory)
threshold: Minimum similarity score (0-100) for matches (default: 60) threshold: Minimum similarity score (0-100) for matches (default: 60)
max_results: Maximum number of results to return (default: 10) max_results: Maximum number of results to return (default: 10)
include_paths: Optional list of path patterns to include in search include_paths: Optional list of path patterns to include in search
exclude_patterns: Optional list of path patterns to exclude from search exclude_patterns: Optional list of path patterns to exclude from search
include_hidden: Whether to include hidden files in search (default: False)
Returns: Returns:
List of tuples containing (file_path, match_score) List of tuples containing (file_path, match_score)
Raises: Raises:
InvalidGitRepositoryError: If repo_path is not a git repository
ValueError: If threshold is not between 0 and 100 ValueError: If threshold is not between 0 and 100
FileListerError: If there's an error accessing or listing files
""" """
# Validate threshold # Validate threshold
if not 0 <= threshold <= 100: if not 0 <= threshold <= 100:
@ -59,81 +63,80 @@ def fuzzy_find_project_files(
if not search_term: if not search_term:
return [] return []
# Initialize repo for normal search # Combine default and user-provided exclude patterns
repo = Repo(repo_path) all_exclude_patterns = DEFAULT_EXCLUDE_PATTERNS + (exclude_patterns or [])
# Get all tracked files try:
tracked_files = repo.git.ls_files().splitlines() # Get all project files using the common utility function
all_files = get_all_project_files(
# Get all untracked files repo_path,
untracked_files = repo.untracked_files include_hidden=include_hidden,
exclude_patterns=all_exclude_patterns
# Combine file lists
all_files = tracked_files + untracked_files
# Apply include patterns if specified
if include_paths:
filtered_files = []
for pattern in include_paths:
filtered_files.extend(f for f in all_files if fnmatch.fnmatch(f, pattern))
all_files = filtered_files
# Apply exclude patterns
patterns = DEFAULT_EXCLUDE_PATTERNS + (exclude_patterns or [])
for pattern in patterns:
all_files = [f for f in all_files if not fnmatch.fnmatch(f, pattern)]
# Perform fuzzy matching
matches = process.extract(search_term, all_files, limit=max_results)
# Filter by threshold
filtered_matches = [(path, score) for path, score in matches if score >= threshold]
# Build info panel content
info_sections = []
# Search parameters section
params_section = [
"## Search Parameters",
f"**Search Term**: `{search_term}`",
f"**Repository**: `{repo_path}`",
f"**Threshold**: {threshold}",
f"**Max Results**: {max_results}",
]
if include_paths:
params_section.append("\n**Include Patterns**:")
for pattern in include_paths:
params_section.append(f"- `{pattern}`")
if exclude_patterns:
params_section.append("\n**Exclude Patterns**:")
for pattern in exclude_patterns:
params_section.append(f"- `{pattern}`")
info_sections.append("\n".join(params_section))
# Results statistics section
stats_section = [
"## Results Statistics",
f"**Total Files Scanned**: {len(all_files)}",
f"**Matches Found**: {len(filtered_matches)}",
]
info_sections.append("\n".join(stats_section))
# Top results section
if filtered_matches:
results_section = ["## Top Matches"]
for path, score in filtered_matches[:5]: # Show top 5 matches
results_section.append(f"- `{path}` (score: {score})")
info_sections.append("\n".join(results_section))
else:
info_sections.append("## Results\n*No matches found*")
# Display the panel
console.print(
Panel(
Markdown("\n\n".join(info_sections)),
title="🔍 Fuzzy Find Results",
border_style="bright_blue",
) )
)
return filtered_matches # Apply include patterns if specified
if include_paths:
filtered_files = []
for pattern in include_paths:
filtered_files.extend(f for f in all_files if fnmatch.fnmatch(f, pattern))
all_files = filtered_files
# Perform fuzzy matching
matches = process.extract(search_term, all_files, limit=max_results)
# Filter by threshold
filtered_matches = [(path, score) for path, score in matches if score >= threshold]
# Build info panel content
info_sections = []
# Search parameters section
params_section = [
"## Search Parameters",
f"**Search Term**: `{search_term}`",
f"**Directory**: `{repo_path}`",
f"**Threshold**: {threshold}",
f"**Max Results**: {max_results}",
f"**Include Hidden Files**: {include_hidden}",
]
if include_paths:
params_section.append("\n**Include Patterns**:")
for pattern in include_paths:
params_section.append(f"- `{pattern}`")
if exclude_patterns:
params_section.append("\n**Exclude Patterns**:")
for pattern in exclude_patterns:
params_section.append(f"- `{pattern}`")
info_sections.append("\n".join(params_section))
# Results statistics section
stats_section = [
"## Results Statistics",
f"**Total Files Scanned**: {len(all_files)}",
f"**Matches Found**: {len(filtered_matches)}",
]
info_sections.append("\n".join(stats_section))
# Top results section
if filtered_matches:
results_section = ["## Top Matches"]
for path, score in filtered_matches[:5]: # Show top 5 matches
results_section.append(f"- `{path}` (score: {score})")
info_sections.append("\n".join(results_section))
else:
info_sections.append("## Results\n*No matches found*")
# Display the panel
console.print(
Panel(
Markdown("\n\n".join(info_sections)),
title="🔍 Fuzzy Find Results",
border_style="bright_blue",
)
)
return filtered_matches
except FileListerError as e:
console.print(f"[bold red]Error listing files: {e}[/bold red]")
return []

View File

@ -536,6 +536,10 @@ def log_work_event(event: str) -> str:
def is_binary_file(filepath): def is_binary_file(filepath):
"""Check if a file is binary using magic library if available.""" """Check if a file is binary using magic library if available."""
# First check if file is empty
if os.path.getsize(filepath) == 0:
return False # Empty files are not binary
if magic: if magic:
try: try:
mime = magic.from_file(filepath, mime=True) mime = magic.from_file(filepath, mime=True)
@ -565,6 +569,10 @@ def is_binary_file(filepath):
def _is_binary_fallback(filepath): def _is_binary_fallback(filepath):
"""Fallback method to detect binary files without using magic.""" """Fallback method to detect binary files without using magic."""
try: try:
# First check if file is empty
if os.path.getsize(filepath) == 0:
return False # Empty files are not binary
with open(filepath, "r", encoding="utf-8") as f: with open(filepath, "r", encoding="utf-8") as f:
chunk = f.read(1024) chunk = f.read(1024)

View File

@ -8,6 +8,7 @@ from rich.console import Console
from rich.panel import Panel from rich.panel import Panel
from ra_aid.text.processing import truncate_output from ra_aid.text.processing import truncate_output
from ra_aid.tools.memory import is_binary_file
console = Console() console = Console()
@ -22,12 +23,25 @@ def read_file_tool(filepath: str, encoding: str = "utf-8") -> Dict[str, str]:
Args: Args:
filepath: Path to the file to read filepath: Path to the file to read
encoding: File encoding to use (default: utf-8) encoding: File encoding to use (default: utf-8)
DO NOT ATTEMPT TO READ BINARY FILES
""" """
start_time = time.time() start_time = time.time()
try: try:
if not os.path.exists(filepath): if not os.path.exists(filepath):
raise FileNotFoundError(f"File not found: {filepath}") raise FileNotFoundError(f"File not found: {filepath}")
# Check if the file is binary
if is_binary_file(filepath):
console.print(
Panel(
f"Cannot read binary file: {filepath}",
title="⚠️ Binary File Detected",
border_style="bright_red",
)
)
return {"error": "read_file failed because we cannot read binary files"}
logging.debug(f"Starting to read file: {filepath}") logging.debug(f"Starting to read file: {filepath}")
content = [] content = []
line_count = 0 line_count = 0

View File

@ -0,0 +1,91 @@
import pytest
from unittest.mock import MagicMock, patch
from langchain_core.messages import AIMessage
from ra_aid.agent_backends.ciayn_agent import CiaynAgent
from ra_aid.tools import fuzzy_find_project_files
from ra_aid.exceptions import ToolExecutionError
from ra_aid.file_listing import FileListerError
def test_fuzzy_find_project_files_none_args_execution():
"""Test that the CiaynAgent can correctly execute fuzzy_find_project_files
with None arguments as seen in the failing case."""
# Create a mock agent with the fuzzy_find_project_files tool
mock_model = MagicMock()
agent = CiaynAgent(
model=mock_model,
tools=[fuzzy_find_project_files],
max_history_messages=5
)
# This is the exact function call from the error message
function_call = 'fuzzy_find_project_files(search_term="nonexistent_term", repo_path=".", threshold=60, max_results=10, include_paths=None, exclude_patterns=None)'
# Mock the response from the LLM
mock_response = AIMessage(content=function_call)
# Patch process.extract to return empty results for any search
with patch('ra_aid.tools.fuzzy_find.process.extract', return_value=[]):
result = agent._execute_tool(mock_response)
assert result == []
def test_error_handling_with_nonexistent_path():
"""Test that we handle errors gracefully with nonexistent paths."""
# Create a mock agent with the fuzzy_find_project_files tool
mock_model = MagicMock()
agent = CiaynAgent(
model=mock_model,
tools=[fuzzy_find_project_files],
max_history_messages=5
)
function_call = 'fuzzy_find_project_files(search_term="test", repo_path="/nonexistent/path", threshold=60, max_results=10)'
# Mock the response from the LLM
mock_response = AIMessage(content=function_call)
# Patch get_all_project_files to raise a FileListerError
with patch('ra_aid.file_listing.get_all_project_files', side_effect=FileListerError("Directory not found")):
# The function should now return an empty list and log the error rather than raising an exception
result = agent._execute_tool(mock_response)
assert result == []
def test_fallback_not_needed_for_fuzzy_find():
"""Test that fallback handling is not needed for fuzzy_find_project_files
since it now handles errors gracefully."""
# Create a mock agent with the fuzzy_find_project_files tool
mock_model = MagicMock()
# Create a predefined response for the model.invoke
function_call = 'fuzzy_find_project_files(search_term="bullet physics", repo_path="/nonexistent/path", threshold=60, max_results=10, include_paths=None, exclude_patterns=None)'
mock_model.invoke.return_value = AIMessage(content=function_call)
# Create the agent with fallback enabled
agent = CiaynAgent(
model=mock_model,
tools=[fuzzy_find_project_files],
max_history_messages=5,
config={"experimental_fallback_handler": True}
)
# Mock the fallback handler methods
agent.fallback_handler.handle_failure = MagicMock()
agent.handle_fallback_response = MagicMock()
# Patch get_all_project_files to raise a FileListerError
with patch('ra_aid.file_listing.get_all_project_files', side_effect=FileListerError("Directory not found")):
# Call _execute_tool directly, it should not raise an exception
result = agent._execute_tool(mock_model.invoke.return_value)
# Verify the result is an empty list
assert result == []
# Verify that fallback_handler was not called since no exception was raised
agent.fallback_handler.handle_failure.assert_not_called()

View File

@ -0,0 +1,34 @@
import pytest
from ra_aid.agent_backends.ciayn_agent import validate_function_call_pattern
def test_fuzzy_find_validation():
# This is the exact function call from the error message
function_call = 'fuzzy_find_project_files(search_term="bullet physics", repo_path=".", threshold=60, max_results=10, include_paths=None, exclude_patterns=None)'
# The validate_function_call_pattern should return False for valid function calls
# (False means "not invalid" in this function's logic)
assert validate_function_call_pattern(function_call) is False, "The fuzzy_find_project_files call should be considered valid"
def test_validate_function_call_pattern_with_none_args():
# Test with None as arguments for various parameter types
valid_calls = [
'some_function(arg1="test", arg2=None)',
'some_function(arg1=None, arg2=123)',
'some_function(arg1=None, arg2=None, arg3="text")',
'fuzzy_find_project_files(search_term="bullet physics", repo_path=".", threshold=60, max_results=10, include_paths=None, exclude_patterns=None)',
]
for call in valid_calls:
assert validate_function_call_pattern(call) is False, f"Call should be valid: {call}"
def test_validate_function_call_pattern_invalid_syntax():
# Test with invalid syntax
invalid_calls = [
'some_function(arg1="test"', # Missing closing parenthesis
'some_function arg1="test")', # Missing opening parenthesis
'some_function("test") another_function()', # Multiple function calls
'= some_function(arg1="test")', # Invalid start
]
for call in invalid_calls:
assert validate_function_call_pattern(call) is True, f"Call should be invalid: {call}"

View File

@ -29,6 +29,30 @@ def git_repo(tmp_path):
return tmp_path return tmp_path
@pytest.fixture
def non_git_repo(tmp_path):
"""Create a temporary directory with some test files but not a git repository"""
# Create some files
(tmp_path / "main.py").write_text("print('hello')")
(tmp_path / "test_main.py").write_text("def test_main(): pass")
(tmp_path / "lib").mkdir()
(tmp_path / "lib/utils.py").write_text("def util(): pass")
(tmp_path / "lib/__pycache__").mkdir()
(tmp_path / "lib/__pycache__/utils.cpython-39.pyc").write_text("cache")
# Create some additional files
(tmp_path / "data.txt").write_text("some data")
(tmp_path / "config.py").write_text("CONFIG = {'key': 'value'}")
# Create hidden files/directories that should be excluded by default
(tmp_path / ".venv").mkdir()
(tmp_path / ".venv/lib").mkdir()
(tmp_path / ".venv/lib/python3.9").mkdir()
(tmp_path / ".hidden_file.txt").write_text("hidden content")
return tmp_path
def test_basic_fuzzy_search(git_repo): def test_basic_fuzzy_search(git_repo):
"""Test basic fuzzy matching functionality""" """Test basic fuzzy matching functionality"""
results = fuzzy_find_project_files.invoke( results = fuzzy_find_project_files.invoke(
@ -91,12 +115,30 @@ def test_invalid_threshold():
fuzzy_find_project_files.invoke({"search_term": "test", "threshold": 101}) fuzzy_find_project_files.invoke({"search_term": "test", "threshold": 101})
def test_non_git_repo(tmp_path): def test_non_git_repo(non_git_repo):
"""Test error handling outside git repo""" """Test fuzzy find works in non-git directories"""
with pytest.raises(InvalidGitRepositoryError): # Now the function should work with non-git repositories
fuzzy_find_project_files.invoke( results = fuzzy_find_project_files.invoke(
{"search_term": "test", "repo_path": str(tmp_path)} {"search_term": "main", "repo_path": str(non_git_repo)}
) )
assert len(results) >= 1
assert any("main.py" in match[0] for match in results)
def test_hidden_files_inclusion(non_git_repo):
"""Test include_hidden parameter works correctly"""
# Without include_hidden parameter (default False)
results_without_hidden = fuzzy_find_project_files.invoke(
{"search_term": "hidden", "repo_path": str(non_git_repo)}
)
assert len(results_without_hidden) == 0
# With include_hidden=True
results_with_hidden = fuzzy_find_project_files.invoke(
{"search_term": "hidden", "repo_path": str(non_git_repo), "include_hidden": True}
)
assert len(results_with_hidden) >= 1
assert any(".hidden_file.txt" in match[0] for match in results_with_hidden)
def test_exact_match(git_repo): def test_exact_match(git_repo):
@ -131,3 +173,27 @@ def test_no_matches(git_repo):
{"search_term": "nonexistentfile", "threshold": 80, "repo_path": str(git_repo)} {"search_term": "nonexistentfile", "threshold": 80, "repo_path": str(git_repo)}
) )
assert len(results) == 0 assert len(results) == 0
def test_excluding_system_dirs(non_git_repo):
"""Test that system directories are excluded by default"""
# Create files in directories that should be excluded by default
(non_git_repo / "__pycache__").mkdir(exist_ok=True)
(non_git_repo / "__pycache__/module.cpython-39.pyc").write_text("cache data")
(non_git_repo / ".ra-aid").mkdir(exist_ok=True)
(non_git_repo / ".ra-aid/config.json").write_text('{"setting": "value"}')
# Run search for files that should be excluded
results = fuzzy_find_project_files.invoke(
{"search_term": "config", "repo_path": str(non_git_repo)}
)
# Should find config.py but not .ra-aid/config.json
assert any("config.py" in match[0] for match in results)
assert not any(".ra-aid/config.json" in match[0] for match in results)
# Similarly for __pycache__
results_cache = fuzzy_find_project_files.invoke(
{"search_term": "module", "repo_path": str(non_git_repo)}
)
assert len(results_cache) == 0 # Should not find __pycache__ files

View File

@ -74,3 +74,19 @@ def test_empty_file(tmp_path):
assert isinstance(result, dict) assert isinstance(result, dict)
assert "content" in result assert "content" in result
assert result["content"] == "" assert result["content"] == ""
def test_binary_file_detection(tmp_path):
"""Test that binary files are detected and not read"""
# Create a binary file with null bytes
test_file = tmp_path / "binary.bin"
with open(test_file, "wb") as f:
f.write(b"Some text with \x00 null bytes \x00 to make it binary")
# Try to read the binary file
result = read_file_tool.invoke({"filepath": str(test_file)})
# Verify that the tool detected it as a binary file
assert isinstance(result, dict)
assert "error" in result
assert "read_file failed because we cannot read binary files" == result["error"]