RA.Aid/ra_aid/tools/read_file.py

197 lines
6.5 KiB
Python

import logging
import os.path
import time
from typing import Dict, Optional
from langchain_core.tools import tool
from rich.console import Console
from rich.panel import Panel
from ra_aid.text.processing import truncate_output
from ra_aid.tools.memory import is_binary_file
console = Console()
# Standard buffer size for file reading
CHUNK_SIZE = 8192
def record_trajectory(
tool_name: str,
tool_parameters: Dict,
step_data: Dict,
record_type: str = "tool_execution",
is_error: bool = False,
error_message: Optional[str] = None,
error_type: Optional[str] = None
) -> None:
"""
Helper function to record trajectory information, handling the case when repositories are not available.
Args:
tool_name: Name of the tool
tool_parameters: Parameters passed to the tool
step_data: UI rendering data
record_type: Type of trajectory record
is_error: Flag indicating if this record represents an error
error_message: The error message
error_type: The type/class of the error
"""
try:
from ra_aid.database.repositories.trajectory_repository import get_trajectory_repository
from ra_aid.database.repositories.human_input_repository import get_human_input_repository
trajectory_repo = get_trajectory_repository()
human_input_id = get_human_input_repository().get_most_recent_id()
trajectory_repo.create(
tool_name=tool_name,
tool_parameters=tool_parameters,
step_data=step_data,
record_type=record_type,
human_input_id=human_input_id,
is_error=is_error,
error_message=error_message,
error_type=error_type
)
except (ImportError, RuntimeError):
# If either the repository modules can't be imported or no repository is available,
# just log and continue without recording trajectory
logging.debug("Skipping trajectory recording: repositories not available")
@tool
def read_file_tool(filepath: str, encoding: str = "utf-8") -> Dict[str, str]:
"""Read and return the contents of a text file.
Args:
filepath: Path to the file to read
encoding: File encoding to use (default: utf-8)
DO NOT ATTEMPT TO READ BINARY FILES
"""
start_time = time.time()
try:
if not os.path.exists(filepath):
# Record error in trajectory
record_trajectory(
tool_name="read_file_tool",
tool_parameters={
"filepath": filepath,
"encoding": encoding
},
step_data={
"filepath": filepath,
"display_title": "File Not Found",
"error_message": f"File not found: {filepath}"
},
is_error=True,
error_message=f"File not found: {filepath}",
error_type="FileNotFoundError"
)
raise FileNotFoundError(f"File not found: {filepath}")
# Check if the file is binary
if is_binary_file(filepath):
# Record binary file error in trajectory
record_trajectory(
tool_name="read_file_tool",
tool_parameters={
"filepath": filepath,
"encoding": encoding
},
step_data={
"filepath": filepath,
"display_title": "Binary File Detected",
"error_message": f"Cannot read binary file: {filepath}"
},
is_error=True,
error_message="Cannot read binary file",
error_type="BinaryFileError"
)
console.print(
Panel(
f"Cannot read binary file: {filepath}",
title="⚠ Binary File Detected",
border_style="bright_red",
)
)
return {"error": "read_file failed because we cannot read binary files"}
logging.debug(f"Starting to read file: {filepath}")
content = []
line_count = 0
total_bytes = 0
with open(filepath, "r", encoding=encoding) as f:
while True:
chunk = f.read(CHUNK_SIZE)
if not chunk:
break
content.append(chunk)
total_bytes += len(chunk)
line_count += chunk.count("\n")
logging.debug(
f"Read chunk: {len(chunk)} bytes, running total: {total_bytes} bytes"
)
full_content = "".join(content)
elapsed = time.time() - start_time
logging.debug(f"File read complete: {total_bytes} bytes in {elapsed:.2f}s")
logging.debug(f"Pre-truncation stats: {total_bytes} bytes, {line_count} lines")
# Record successful file read in trajectory
record_trajectory(
tool_name="read_file_tool",
tool_parameters={
"filepath": filepath,
"encoding": encoding
},
step_data={
"filepath": filepath,
"display_title": "File Read",
"line_count": line_count,
"total_bytes": total_bytes,
"elapsed_time": elapsed
}
)
console.print(
Panel(
f"Read {line_count} lines ({total_bytes} bytes) from {filepath} in {elapsed:.2f}s",
title="📄 File Read",
border_style="bright_blue",
)
)
# Truncate if needed
truncated = truncate_output(full_content) if full_content else ""
return {"content": truncated}
except Exception as e:
elapsed = time.time() - start_time
# Record exception in trajectory (if it's not already a handled FileNotFoundError)
if not isinstance(e, FileNotFoundError):
record_trajectory(
tool_name="read_file_tool",
tool_parameters={
"filepath": filepath,
"encoding": encoding
},
step_data={
"filepath": filepath,
"display_title": "File Read Error",
"error_message": str(e)
},
is_error=True,
error_message=str(e),
error_type=type(e).__name__
)
raise