project file and ciayn improvements

2025-03-04 00:49:48 -05:00 · 2025-03-04 00:49:48 -05:00 · 9cae0ef1fc
parent bd02bffc55
commit 9cae0ef1fc
10 changed files with 437 additions and 173 deletions
--- a/ra_aid/file_listing.py
+++ b/ra_aid/file_listing.py
@ -4,6 +4,7 @@ import os
 import subprocess
 from pathlib import Path
 from typing import List, Optional, Tuple
 import fnmatch
 class FileListerError(Exception):
@ -70,26 +71,19 @@ def is_git_repo(directory: str) -> bool:
        raise FileListerError(f"Error checking git repository: {e}")
-def get_file_listing(
+def get_all_project_files(
-    directory: str, limit: Optional[int] = None, include_hidden: bool = False
+    directory: str, include_hidden: bool = False, exclude_patterns: Optional[List[str]] = None
-) -> Tuple[List[str], int]:
+) -> List[str]:
    """
-    Get a list of files in a directory.
+    Get a list of all files in a project directory, handling both git and non-git repositories.
    For git repositories, uses `git ls-files` for efficient file listing that respects .gitignore rules.
    For non-git directories, falls back to manual file listing using Python's standard library.
    Returns a tuple containing the list of files (truncated if limit is specified)
    and the total count of files.
    Args:
        directory: Path to the directory
        limit: Optional maximum number of files to return
        include_hidden: Whether to include hidden files (starting with .) in the results
        exclude_patterns: Optional list of patterns to exclude from the results
    Returns:
-        Tuple[List[str], int]: Tuple containing:
+        List[str]: List of file paths relative to the directory
            - List of file paths (truncated to limit if specified)
            - Total number of files (before truncation)
    Raises:
        DirectoryNotFoundError: If directory does not exist
@ -97,15 +91,21 @@ def get_file_listing(
        GitCommandError: If git command fails
        FileListerError: For other unexpected errors
    """
    try:
    # Check if directory exists and is accessible
    if not os.path.exists(directory):
        raise DirectoryNotFoundError(f"Directory not found: {directory}")
    if not os.path.isdir(directory):
        raise DirectoryNotFoundError(f"Not a directory: {directory}")
    # Default excluded directories
    excluded_dirs = {'.ra-aid', '.venv', '.git', '.aider', '__pycache__'}
    # Check if it's a git repository
    try:
        is_git = is_git_repo(directory)
    except FileListerError:
        # If checking fails, default to non-git approach
        is_git = False
    all_files = []
@ -153,7 +153,6 @@ def get_file_listing(
    else:
        # Not a git repository, use manual file listing
        base_path = Path(directory)
            excluded_dirs = {'.ra-aid', '.venv', '.git', '.aider', '__pycache__'}
        for root, dirs, files in os.walk(directory):
            # Filter out excluded directories
@ -174,8 +173,47 @@ def get_file_listing(
                rel_path = os.path.join(rel_root, file) if rel_root else file
                all_files.append(rel_path)
    # Apply additional exclude patterns if specified
    if exclude_patterns:
        for pattern in exclude_patterns:
            all_files = [f for f in all_files if not fnmatch.fnmatch(f, pattern)]
    # Remove duplicates and sort
-        all_files = sorted(set(all_files))
+    return sorted(set(all_files))
 def get_file_listing(
    directory: str, limit: Optional[int] = None, include_hidden: bool = False
 ) -> Tuple[List[str], int]:
    """
    Get a list of files in a directory.
    For git repositories, uses `git ls-files` for efficient file listing that respects .gitignore rules.
    For non-git directories, falls back to manual file listing using Python's standard library.
    Returns a tuple containing the list of files (truncated if limit is specified)
    and the total count of files.
    Args:
        directory: Path to the directory
        limit: Optional maximum number of files to return
        include_hidden: Whether to include hidden files (starting with .) in the results
    Returns:
        Tuple[List[str], int]: Tuple containing:
            - List of file paths (truncated to limit if specified)
            - Total number of files (before truncation)
    Raises:
        DirectoryNotFoundError: If directory does not exist
        DirectoryAccessError: If directory cannot be accessed
        GitCommandError: If git command fails
        FileListerError: For other unexpected errors
    """
    try:
        # Use the common function to get all files
        all_files = get_all_project_files(directory, include_hidden)
        # Get total count before truncation
        total_count = len(all_files)
        # Apply limit if specified
--- a/ra_aid/project_state.py
+++ b/ra_aid/project_state.py
@ -50,7 +50,7 @@ def is_new_project(directory: str) -> bool:
            raise DirectoryNotFoundError(f"Path is not a directory: {directory}")
        # Get all files/dirs in the directory, excluding allowed items
-        _allowed_items: Set[str] = {".git", ".gitignore", ".ra-aid"}
+        _allowed_items: Set[str] = {".git", ".gitignore", ".ra-aid", ".venv"}
        try:
            contents = set()
            for item in path.iterdir():
--- a/ra_aid/prompts/ciayn_prompts.py
+++ b/ra_aid/prompts/ciayn_prompts.py
@ -27,7 +27,7 @@ The user cannot see the results of function calls, so you have to explicitly use
 YOU MUST ALWAYS RESPOND WITH A SINGLE LINE OF PYTHON THAT CALLS ONE OF THE AVAILABLE TOOLS.
 NEVER RETURN AN EMPTY MESSAGE.
-NEVER RETURN PLAIN TEXT - ONLY RETURN A TOOL CALL.
+NEVER RETURN PLAIN TEXT - ONLY RETURN A SINGLE TOOL CALL.
 IF UNSURE WHAT TO DO, JUST YEET IT AND CALL THE BEST FUNCTION YOU CAN THINK OF.
 Use as many steps as you need to in order to fully complete the task.
@ -74,12 +74,6 @@ You typically don't want to keep calling the same function over and over with th
 def example_function():
    print("Hello world")
 \"\"\")
 - Example of INCORRECT format (DO NOT DO THIS):
  put_complete_file_contents("/path/to/file.py", \"\"\"
  def example_function():
      print("Hello world")
  \"\"\")
 </function call guidelines>
 As an agent, you will carefully plan ahead, carefully analyze tool call responses, and adapt to circumstances in order to accomplish your goal.
--- a/ra_aid/tools/fuzzy_find.py
+++ b/ra_aid/tools/fuzzy_find.py
@ -2,12 +2,14 @@ import fnmatch
 from typing import List, Tuple
 from fuzzywuzzy import process
-from git import Repo
+from git import Repo, exc
 from langchain_core.tools import tool
 from rich.console import Console
 from rich.markdown import Markdown
 from rich.panel import Panel
 from ra_aid.file_listing import get_all_project_files, FileListerError
 console = Console()
 DEFAULT_EXCLUDE_PATTERNS = [
@ -29,27 +31,29 @@ def fuzzy_find_project_files(
    max_results: int = 10,
    include_paths: List[str] = None,
    exclude_patterns: List[str] = None,
    include_hidden: bool = False,
 ) -> List[Tuple[str, int]]:
-    """Fuzzy find files in a git repository matching the search term.
+    """Fuzzy find files in a project matching the search term.
-    This tool searches for files within a git repository using fuzzy string matching,
+    This tool searches for files within a project directory using fuzzy string matching,
    allowing for approximate matches to the search term. It returns a list of matched
-    files along with their match scores.
+    files along with their match scores. Works with both git and non-git repositories.
    Args:
        search_term: String to match against file paths
-        repo_path: Path to git repository (defaults to current directory)
+        repo_path: Path to project directory (defaults to current directory)
        threshold: Minimum similarity score (0-100) for matches (default: 60)
        max_results: Maximum number of results to return (default: 10)
        include_paths: Optional list of path patterns to include in search
        exclude_patterns: Optional list of path patterns to exclude from search
        include_hidden: Whether to include hidden files in search (default: False)
    Returns:
        List of tuples containing (file_path, match_score)
    Raises:
        InvalidGitRepositoryError: If repo_path is not a git repository
        ValueError: If threshold is not between 0 and 100
        FileListerError: If there's an error accessing or listing files
    """
    # Validate threshold
    if not 0 <= threshold <= 100:
@ -59,17 +63,16 @@ def fuzzy_find_project_files(
    if not search_term:
        return []
-    # Initialize repo for normal search
+    # Combine default and user-provided exclude patterns
-    repo = Repo(repo_path)
+    all_exclude_patterns = DEFAULT_EXCLUDE_PATTERNS + (exclude_patterns or [])
-    # Get all tracked files
+    try:
-    tracked_files = repo.git.ls_files().splitlines()
+        # Get all project files using the common utility function
-
+        all_files = get_all_project_files(
-    # Get all untracked files
+            repo_path, 
-    untracked_files = repo.untracked_files
+            include_hidden=include_hidden, 
-
+            exclude_patterns=all_exclude_patterns
-    # Combine file lists
+        )
    all_files = tracked_files + untracked_files
        # Apply include patterns if specified
        if include_paths:
@ -78,11 +81,6 @@ def fuzzy_find_project_files(
                filtered_files.extend(f for f in all_files if fnmatch.fnmatch(f, pattern))
            all_files = filtered_files
    # Apply exclude patterns
    patterns = DEFAULT_EXCLUDE_PATTERNS + (exclude_patterns or [])
    for pattern in patterns:
        all_files = [f for f in all_files if not fnmatch.fnmatch(f, pattern)]
        # Perform fuzzy matching
        matches = process.extract(search_term, all_files, limit=max_results)
@ -96,9 +94,10 @@ def fuzzy_find_project_files(
        params_section = [
            "## Search Parameters",
            f"**Search Term**: `{search_term}`",
-        f"**Repository**: `{repo_path}`",
+            f"**Directory**: `{repo_path}`",
            f"**Threshold**: {threshold}",
            f"**Max Results**: {max_results}",
            f"**Include Hidden Files**: {include_hidden}",
        ]
        if include_paths:
            params_section.append("\n**Include Patterns**:")
@ -137,3 +136,7 @@ def fuzzy_find_project_files(
        )
        return filtered_matches
    except FileListerError as e:
        console.print(f"[bold red]Error listing files: {e}[/bold red]")
        return []
--- a/ra_aid/tools/memory.py
+++ b/ra_aid/tools/memory.py
@ -536,6 +536,10 @@ def log_work_event(event: str) -> str:
 def is_binary_file(filepath):
    """Check if a file is binary using magic library if available."""
    # First check if file is empty
    if os.path.getsize(filepath) == 0:
        return False  # Empty files are not binary
    if magic:
        try:
            mime = magic.from_file(filepath, mime=True)
@ -565,6 +569,10 @@ def is_binary_file(filepath):
 def _is_binary_fallback(filepath):
    """Fallback method to detect binary files without using magic."""
    try:
        # First check if file is empty
        if os.path.getsize(filepath) == 0:
            return False  # Empty files are not binary
        with open(filepath, "r", encoding="utf-8") as f:
            chunk = f.read(1024)
--- a/ra_aid/tools/read_file.py
+++ b/ra_aid/tools/read_file.py
@ -8,6 +8,7 @@ from rich.console import Console
 from rich.panel import Panel
 from ra_aid.text.processing import truncate_output
 from ra_aid.tools.memory import is_binary_file
 console = Console()
@ -22,12 +23,25 @@ def read_file_tool(filepath: str, encoding: str = "utf-8") -> Dict[str, str]:
    Args:
        filepath: Path to the file to read
        encoding: File encoding to use (default: utf-8)
    DO NOT ATTEMPT TO READ BINARY FILES
    """
    start_time = time.time()
    try:
        if not os.path.exists(filepath):
            raise FileNotFoundError(f"File not found: {filepath}")
        # Check if the file is binary
        if is_binary_file(filepath):
            console.print(
                Panel(
                    f"Cannot read binary file: {filepath}",
                    title="⚠️ Binary File Detected",
                    border_style="bright_red",
                )
            )
            return {"error": "read_file failed because we cannot read binary files"}
        logging.debug(f"Starting to read file: {filepath}")
        content = []
        line_count = 0
--- a/tests/ra_aid/agent_backends/test_ciayn_tool_execution.py
+++ b/tests/ra_aid/agent_backends/test_ciayn_tool_execution.py
@ -0,0 +1,91 @@
 import pytest
 from unittest.mock import MagicMock, patch
 from langchain_core.messages import AIMessage
 from ra_aid.agent_backends.ciayn_agent import CiaynAgent
 from ra_aid.tools import fuzzy_find_project_files
 from ra_aid.exceptions import ToolExecutionError
 from ra_aid.file_listing import FileListerError
 def test_fuzzy_find_project_files_none_args_execution():
    """Test that the CiaynAgent can correctly execute fuzzy_find_project_files 
    with None arguments as seen in the failing case."""
    # Create a mock agent with the fuzzy_find_project_files tool
    mock_model = MagicMock()
    agent = CiaynAgent(
        model=mock_model,
        tools=[fuzzy_find_project_files],
        max_history_messages=5
    )
    # This is the exact function call from the error message
    function_call = 'fuzzy_find_project_files(search_term="nonexistent_term", repo_path=".", threshold=60, max_results=10, include_paths=None, exclude_patterns=None)'
    # Mock the response from the LLM
    mock_response = AIMessage(content=function_call)
    # Patch process.extract to return empty results for any search
    with patch('ra_aid.tools.fuzzy_find.process.extract', return_value=[]):
        result = agent._execute_tool(mock_response)
        assert result == []
 def test_error_handling_with_nonexistent_path():
    """Test that we handle errors gracefully with nonexistent paths."""
    # Create a mock agent with the fuzzy_find_project_files tool
    mock_model = MagicMock()
    agent = CiaynAgent(
        model=mock_model,
        tools=[fuzzy_find_project_files],
        max_history_messages=5
    )
    function_call = 'fuzzy_find_project_files(search_term="test", repo_path="/nonexistent/path", threshold=60, max_results=10)'
    # Mock the response from the LLM
    mock_response = AIMessage(content=function_call)
    # Patch get_all_project_files to raise a FileListerError
    with patch('ra_aid.file_listing.get_all_project_files', side_effect=FileListerError("Directory not found")):
        # The function should now return an empty list and log the error rather than raising an exception
        result = agent._execute_tool(mock_response)
        assert result == []
 def test_fallback_not_needed_for_fuzzy_find():
    """Test that fallback handling is not needed for fuzzy_find_project_files 
    since it now handles errors gracefully."""
    # Create a mock agent with the fuzzy_find_project_files tool
    mock_model = MagicMock()
    # Create a predefined response for the model.invoke
    function_call = 'fuzzy_find_project_files(search_term="bullet physics", repo_path="/nonexistent/path", threshold=60, max_results=10, include_paths=None, exclude_patterns=None)'
    mock_model.invoke.return_value = AIMessage(content=function_call)
    # Create the agent with fallback enabled
    agent = CiaynAgent(
        model=mock_model,
        tools=[fuzzy_find_project_files],
        max_history_messages=5,
        config={"experimental_fallback_handler": True}
    )
    # Mock the fallback handler methods
    agent.fallback_handler.handle_failure = MagicMock()
    agent.handle_fallback_response = MagicMock()
    # Patch get_all_project_files to raise a FileListerError
    with patch('ra_aid.file_listing.get_all_project_files', side_effect=FileListerError("Directory not found")):
        # Call _execute_tool directly, it should not raise an exception
        result = agent._execute_tool(mock_model.invoke.return_value)
        # Verify the result is an empty list
        assert result == []
        # Verify that fallback_handler was not called since no exception was raised
        agent.fallback_handler.handle_failure.assert_not_called()
--- a/tests/ra_aid/agent_backends/test_ciayn_tool_validation.py
+++ b/tests/ra_aid/agent_backends/test_ciayn_tool_validation.py
@ -0,0 +1,34 @@
 import pytest
 from ra_aid.agent_backends.ciayn_agent import validate_function_call_pattern
 def test_fuzzy_find_validation():
    # This is the exact function call from the error message
    function_call = 'fuzzy_find_project_files(search_term="bullet physics", repo_path=".", threshold=60, max_results=10, include_paths=None, exclude_patterns=None)'
    # The validate_function_call_pattern should return False for valid function calls
    # (False means "not invalid" in this function's logic)
    assert validate_function_call_pattern(function_call) is False, "The fuzzy_find_project_files call should be considered valid"
 def test_validate_function_call_pattern_with_none_args():
    # Test with None as arguments for various parameter types
    valid_calls = [
        'some_function(arg1="test", arg2=None)',
        'some_function(arg1=None, arg2=123)',
        'some_function(arg1=None, arg2=None, arg3="text")',
        'fuzzy_find_project_files(search_term="bullet physics", repo_path=".", threshold=60, max_results=10, include_paths=None, exclude_patterns=None)',
    ]
    for call in valid_calls:
        assert validate_function_call_pattern(call) is False, f"Call should be valid: {call}"
 def test_validate_function_call_pattern_invalid_syntax():
    # Test with invalid syntax
    invalid_calls = [
        'some_function(arg1="test"',  # Missing closing parenthesis
        'some_function arg1="test")',  # Missing opening parenthesis
        'some_function("test") another_function()',  # Multiple function calls
        '= some_function(arg1="test")',  # Invalid start
    ]
    for call in invalid_calls:
        assert validate_function_call_pattern(call) is True, f"Call should be invalid: {call}"
--- a/tests/ra_aid/tools/test_fuzzy_find.py
+++ b/tests/ra_aid/tools/test_fuzzy_find.py
@ -29,6 +29,30 @@ def git_repo(tmp_path):
    return tmp_path
@pytest.fixture
 def non_git_repo(tmp_path):
    """Create a temporary directory with some test files but not a git repository"""
    # Create some files
    (tmp_path / "main.py").write_text("print('hello')")
    (tmp_path / "test_main.py").write_text("def test_main(): pass")
    (tmp_path / "lib").mkdir()
    (tmp_path / "lib/utils.py").write_text("def util(): pass")
    (tmp_path / "lib/__pycache__").mkdir()
    (tmp_path / "lib/__pycache__/utils.cpython-39.pyc").write_text("cache")
    # Create some additional files
    (tmp_path / "data.txt").write_text("some data")
    (tmp_path / "config.py").write_text("CONFIG = {'key': 'value'}")
    # Create hidden files/directories that should be excluded by default
    (tmp_path / ".venv").mkdir()
    (tmp_path / ".venv/lib").mkdir()
    (tmp_path / ".venv/lib/python3.9").mkdir()
    (tmp_path / ".hidden_file.txt").write_text("hidden content")
    return tmp_path
 def test_basic_fuzzy_search(git_repo):
    """Test basic fuzzy matching functionality"""
    results = fuzzy_find_project_files.invoke(
@ -91,12 +115,30 @@ def test_invalid_threshold():
        fuzzy_find_project_files.invoke({"search_term": "test", "threshold": 101})
-def test_non_git_repo(tmp_path):
+def test_non_git_repo(non_git_repo):
-    """Test error handling outside git repo"""
+    """Test fuzzy find works in non-git directories"""
-    with pytest.raises(InvalidGitRepositoryError):
+    # Now the function should work with non-git repositories
-        fuzzy_find_project_files.invoke(
+    results = fuzzy_find_project_files.invoke(
-            {"search_term": "test", "repo_path": str(tmp_path)}
+        {"search_term": "main", "repo_path": str(non_git_repo)}
    )
    assert len(results) >= 1
    assert any("main.py" in match[0] for match in results)
 def test_hidden_files_inclusion(non_git_repo):
    """Test include_hidden parameter works correctly"""
    # Without include_hidden parameter (default False)
    results_without_hidden = fuzzy_find_project_files.invoke(
        {"search_term": "hidden", "repo_path": str(non_git_repo)}
    )
    assert len(results_without_hidden) == 0
    # With include_hidden=True
    results_with_hidden = fuzzy_find_project_files.invoke(
        {"search_term": "hidden", "repo_path": str(non_git_repo), "include_hidden": True}
    )
    assert len(results_with_hidden) >= 1
    assert any(".hidden_file.txt" in match[0] for match in results_with_hidden)
 def test_exact_match(git_repo):
@ -131,3 +173,27 @@ def test_no_matches(git_repo):
        {"search_term": "nonexistentfile", "threshold": 80, "repo_path": str(git_repo)}
    )
    assert len(results) == 0
 def test_excluding_system_dirs(non_git_repo):
    """Test that system directories are excluded by default"""
    # Create files in directories that should be excluded by default
    (non_git_repo / "__pycache__").mkdir(exist_ok=True)
    (non_git_repo / "__pycache__/module.cpython-39.pyc").write_text("cache data")
    (non_git_repo / ".ra-aid").mkdir(exist_ok=True)
    (non_git_repo / ".ra-aid/config.json").write_text('{"setting": "value"}')
    # Run search for files that should be excluded
    results = fuzzy_find_project_files.invoke(
        {"search_term": "config", "repo_path": str(non_git_repo)}
    )
    # Should find config.py but not .ra-aid/config.json
    assert any("config.py" in match[0] for match in results)
    assert not any(".ra-aid/config.json" in match[0] for match in results)
    # Similarly for __pycache__
    results_cache = fuzzy_find_project_files.invoke(
        {"search_term": "module", "repo_path": str(non_git_repo)}
    )
    assert len(results_cache) == 0  # Should not find __pycache__ files
--- a/tests/ra_aid/tools/test_read_file.py
+++ b/tests/ra_aid/tools/test_read_file.py
@ -74,3 +74,19 @@ def test_empty_file(tmp_path):
    assert isinstance(result, dict)
    assert "content" in result
    assert result["content"] == ""
 def test_binary_file_detection(tmp_path):
    """Test that binary files are detected and not read"""
    # Create a binary file with null bytes
    test_file = tmp_path / "binary.bin"
    with open(test_file, "wb") as f:
        f.write(b"Some text with \x00 null bytes \x00 to make it binary")
    # Try to read the binary file
    result = read_file_tool.invoke({"filepath": str(test_file)})
    # Verify that the tool detected it as a binary file
    assert isinstance(result, dict)
    assert "error" in result
    assert "read_file failed because we cannot read binary files" == result["error"]