skip binary files; update deps

This commit is contained in:
AI Christianson 2025-02-25 18:32:29 -05:00
parent 8d1e4a96bd
commit 94d655ce91
5 changed files with 108 additions and 2 deletions

View File

@ -43,6 +43,7 @@ dependencies = [
"websockets>=12.0",
"jinja2>=3.1.2",
"python-Levenshtein>=0.26.1",
"python-magic>=0.4.27",
]
[project.optional-dependencies]

View File

@ -229,6 +229,7 @@ def create_llm_client(
model_config["supports_temperature"] = provider in known_temp_providers
supports_temperature = model_config["supports_temperature"]
supports_thinking = model_config.get("supports_thinking", False)
# Handle temperature settings
if is_expert:
@ -242,6 +243,12 @@ def create_llm_client(
temp_kwargs = {"temperature": temperature}
else:
temp_kwargs = {}
if supports_thinking:
temp_kwargs = {"thinking": {
"type": "enabled",
"budget_tokens": 8000
}}
if provider == "deepseek":
return create_deepseek_client(
@ -280,6 +287,7 @@ def create_llm_client(
model_name=model_name,
timeout=LLM_REQUEST_TIMEOUT,
max_retries=LLM_MAX_RETRIES,
max_tokens=model_config.get("max_tokens", 64000),
**temp_kwargs,
)
elif provider == "openai-compatible":

View File

@ -939,6 +939,8 @@ models_params = {
"claude-3-7-sonnet-20250219": {
"token_limit": 200000,
"supports_temperature": True,
"supports_thinking": True,
"max_tokens": 64000,
"default_temperature": 1.0,
"latency_coefficient": DEFAULT_BASE_LATENCY,
},

View File

@ -1,6 +1,11 @@
import os
from typing import Dict, List, Optional, Set, Union
try:
import magic
except ImportError:
magic = None
from langchain_core.tools import tool
from rich.console import Console
from rich.markdown import Markdown
@ -380,6 +385,7 @@ def emit_related_files(files: List[str]) -> str:
results = []
added_files = []
invalid_paths = []
binary_files = []
# Process files
for file in files:
@ -400,6 +406,12 @@ def emit_related_files(files: List[str]) -> str:
invalid_paths.append(file)
results.append(f"Error: Path '{file}' exists but is not a regular file")
continue
# Check if it's a binary file
if is_binary_file(file):
binary_files.append(file)
results.append(f"Skipped binary file: '{file}'")
continue
# Normalize the path
normalized_path = os.path.abspath(file)
@ -424,7 +436,7 @@ def emit_related_files(files: List[str]) -> str:
added_files.append((file_id, file)) # Keep original path for display
results.append(f"File ID #{file_id}: {file}")
# Rich output - single consolidated panel
# Rich output - single consolidated panel for added files
if added_files:
files_added_md = "\n".join(f"- `{file}`" for id, file in added_files)
md_content = f"**Files Noted:**\n{files_added_md}"
@ -435,8 +447,24 @@ def emit_related_files(files: List[str]) -> str:
border_style="green",
)
)
# Display skipped binary files
if binary_files:
binary_files_md = "\n".join(f"- `{file}`" for file in binary_files)
md_content = f"**Binary Files Skipped:**\n{binary_files_md}"
console.print(
Panel(
Markdown(md_content),
title="⚠️ Binary Files Not Added",
border_style="yellow",
)
)
return "Files noted."
# Return summary message
if binary_files:
return f"Files noted. {len(binary_files)} binary files were skipped."
else:
return "Files noted."
def log_work_event(event: str) -> str:
@ -461,6 +489,25 @@ def log_work_event(event: str) -> str:
return f"Event logged: {event}"
def is_binary_file(filepath):
"""Check if a file is binary using magic library if available."""
if magic:
try:
mime = magic.from_file(filepath, mime=True)
return not mime.startswith('text/')
except Exception:
# Fallback if magic fails
return False
else:
# Basic binary detection if magic is not available
try:
with open(filepath, 'r', encoding='utf-8') as f:
f.read(1024) # Try to read as text
return False
except UnicodeDecodeError:
return True
def get_work_log() -> str:
"""Return formatted markdown of work log entries.

View File

@ -714,3 +714,51 @@ def test_swap_task_order_after_delete(reset_memory):
# Verify swap worked
assert _global_memory["tasks"][0] == "Task 3"
assert _global_memory["tasks"][2] == "Task 1"
def test_emit_related_files_binary_filtering(reset_memory, tmp_path, monkeypatch):
"""Test that binary files are filtered out when adding related files"""
# Create test text files
text_file1 = tmp_path / "text1.txt"
text_file1.write_text("Text file 1 content")
text_file2 = tmp_path / "text2.txt"
text_file2.write_text("Text file 2 content")
# Create test "binary" files
binary_file1 = tmp_path / "binary1.bin"
binary_file1.write_text("Binary file 1 content")
binary_file2 = tmp_path / "binary2.bin"
binary_file2.write_text("Binary file 2 content")
# Mock the is_binary_file function to identify our "binary" files
def mock_is_binary_file(filepath):
return ".bin" in str(filepath)
# Apply the mock
import ra_aid.tools.memory
monkeypatch.setattr(ra_aid.tools.memory, "is_binary_file", mock_is_binary_file)
# Call emit_related_files with mix of text and binary files
result = emit_related_files.invoke({
"files": [
str(text_file1),
str(binary_file1),
str(text_file2),
str(binary_file2)
]
})
# Verify the result message mentions skipped binary files
assert "Files noted." in result
assert "2 binary files were skipped" in result
# Verify only text files were added to related_files
assert len(_global_memory["related_files"]) == 2
file_values = list(_global_memory["related_files"].values())
assert str(text_file1) in file_values
assert str(text_file2) in file_values
assert str(binary_file1) not in file_values
assert str(binary_file2) not in file_values
# Verify counter is correct (only incremented for text files)
assert _global_memory["related_file_id_counter"] == 2