import fnmatch import logging from typing import List, Tuple, Dict, Optional, Any from fuzzywuzzy import process from git import Repo, exc from langchain_core.tools import tool from rich.console import Console from rich.markdown import Markdown from rich.panel import Panel from ra_aid.file_listing import get_all_project_files, FileListerError console = Console() def record_trajectory( tool_name: str, tool_parameters: Dict, step_data: Dict, record_type: str = "tool_execution", is_error: bool = False, error_message: Optional[str] = None, error_type: Optional[str] = None ) -> None: """ Helper function to record trajectory information, handling the case when repositories are not available. Args: tool_name: Name of the tool tool_parameters: Parameters passed to the tool step_data: UI rendering data record_type: Type of trajectory record is_error: Flag indicating if this record represents an error error_message: The error message error_type: The type/class of the error """ try: from ra_aid.database.repositories.trajectory_repository import get_trajectory_repository from ra_aid.database.repositories.human_input_repository import get_human_input_repository trajectory_repo = get_trajectory_repository() human_input_id = get_human_input_repository().get_most_recent_id() trajectory_repo.create( tool_name=tool_name, tool_parameters=tool_parameters, step_data=step_data, record_type=record_type, human_input_id=human_input_id, is_error=is_error, error_message=error_message, error_type=error_type ) except (ImportError, RuntimeError): # If either the repository modules can't be imported or no repository is available, # just log and continue without recording trajectory logging.debug("Skipping trajectory recording: repositories not available") DEFAULT_EXCLUDE_PATTERNS = [ "*.pyc", "__pycache__/*", ".git/*", "*.so", "*.o", "*.class", ] @tool def fuzzy_find_project_files( search_term: str, *, repo_path: str = ".", threshold: int = 60, max_results: int = 10, include_paths: List[str] = None, exclude_patterns: List[str] = None, include_hidden: bool = False, ) -> List[Tuple[str, int]]: """Fuzzy find files in a project matching the search term. This tool searches for files within a project directory using fuzzy string matching, allowing for approximate matches to the search term. It returns a list of matched files along with their match scores. Works with both git and non-git repositories. Args: search_term: String to match against file paths repo_path: Path to project directory (defaults to current directory) threshold: Minimum similarity score (0-100) for matches (default: 60) max_results: Maximum number of results to return (default: 10) include_paths: Optional list of path patterns to include in search exclude_patterns: Optional list of path patterns to exclude from search include_hidden: Whether to include hidden files in search (default: False) Returns: List of tuples containing (file_path, match_score) Raises: ValueError: If threshold is not between 0 and 100 FileListerError: If there's an error accessing or listing files """ # Validate threshold if not 0 <= threshold <= 100: error_msg = "Threshold must be between 0 and 100" # Record error in trajectory record_trajectory( tool_name="fuzzy_find_project_files", tool_parameters={ "search_term": search_term, "repo_path": repo_path, "threshold": threshold, "max_results": max_results, "include_paths": include_paths, "exclude_patterns": exclude_patterns, "include_hidden": include_hidden }, step_data={ "search_term": search_term, "display_title": "Invalid Threshold Value", "error_message": error_msg }, record_type="tool_execution", is_error=True, error_message=error_msg, error_type="ValueError" ) raise ValueError(error_msg) # Handle empty search term as special case if not search_term: return [] # Combine default and user-provided exclude patterns all_exclude_patterns = DEFAULT_EXCLUDE_PATTERNS + (exclude_patterns or []) try: # Get all project files using the common utility function all_files = get_all_project_files( repo_path, include_hidden=include_hidden, exclude_patterns=all_exclude_patterns ) # Apply include patterns if specified if include_paths: filtered_files = [] for pattern in include_paths: filtered_files.extend(f for f in all_files if fnmatch.fnmatch(f, pattern)) all_files = filtered_files # Perform fuzzy matching matches = process.extract(search_term, all_files, limit=max_results) # Filter by threshold filtered_matches = [(path, score) for path, score in matches if score >= threshold] # Build info panel content info_sections = [] # Search parameters section params_section = [ "## Search Parameters", f"**Search Term**: `{search_term}`", f"**Directory**: `{repo_path}`", f"**Threshold**: {threshold}", f"**Max Results**: {max_results}", f"**Include Hidden Files**: {include_hidden}", ] if include_paths: params_section.append("\n**Include Patterns**:") for pattern in include_paths: params_section.append(f"- `{pattern}`") if exclude_patterns: params_section.append("\n**Exclude Patterns**:") for pattern in exclude_patterns: params_section.append(f"- `{pattern}`") info_sections.append("\n".join(params_section)) # Results statistics section stats_section = [ "## Results Statistics", f"**Total Files Scanned**: {len(all_files)}", f"**Matches Found**: {len(filtered_matches)}", ] info_sections.append("\n".join(stats_section)) # Top results section if filtered_matches: results_section = ["## Top Matches"] for path, score in filtered_matches[:5]: # Show top 5 matches results_section.append(f"- `{path}` (score: {score})") info_sections.append("\n".join(results_section)) else: info_sections.append("## Results\n*No matches found*") # Record fuzzy find in trajectory record_trajectory( tool_name="fuzzy_find_project_files", tool_parameters={ "search_term": search_term, "repo_path": repo_path, "threshold": threshold, "max_results": max_results, "include_paths": include_paths, "exclude_patterns": exclude_patterns, "include_hidden": include_hidden }, step_data={ "search_term": search_term, "display_title": "Fuzzy Find Results", "total_files": len(all_files), "matches_found": len(filtered_matches) }, record_type="tool_execution" ) # Display the panel console.print( Panel( Markdown("\n\n".join(info_sections)), title="🔍 Fuzzy Find Results", border_style="bright_blue", ) ) return filtered_matches except FileListerError as e: error_msg = f"Error listing files: {e}" # Record error in trajectory record_trajectory( tool_name="fuzzy_find_project_files", tool_parameters={ "search_term": search_term, "repo_path": repo_path, "threshold": threshold, "max_results": max_results, "include_paths": include_paths, "exclude_patterns": exclude_patterns, "include_hidden": include_hidden }, step_data={ "search_term": search_term, "display_title": "Fuzzy Find Error", "error_message": error_msg }, record_type="tool_execution", is_error=True, error_message=error_msg, error_type=type(e).__name__ ) console.print(f"[bold red]{error_msg}[/bold red]") return []