skip binary files; update deps
This commit is contained in:
parent
8d1e4a96bd
commit
94d655ce91
|
|
@ -43,6 +43,7 @@ dependencies = [
|
||||||
"websockets>=12.0",
|
"websockets>=12.0",
|
||||||
"jinja2>=3.1.2",
|
"jinja2>=3.1.2",
|
||||||
"python-Levenshtein>=0.26.1",
|
"python-Levenshtein>=0.26.1",
|
||||||
|
"python-magic>=0.4.27",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
|
|
|
||||||
|
|
@ -229,6 +229,7 @@ def create_llm_client(
|
||||||
model_config["supports_temperature"] = provider in known_temp_providers
|
model_config["supports_temperature"] = provider in known_temp_providers
|
||||||
|
|
||||||
supports_temperature = model_config["supports_temperature"]
|
supports_temperature = model_config["supports_temperature"]
|
||||||
|
supports_thinking = model_config.get("supports_thinking", False)
|
||||||
|
|
||||||
# Handle temperature settings
|
# Handle temperature settings
|
||||||
if is_expert:
|
if is_expert:
|
||||||
|
|
@ -242,6 +243,12 @@ def create_llm_client(
|
||||||
temp_kwargs = {"temperature": temperature}
|
temp_kwargs = {"temperature": temperature}
|
||||||
else:
|
else:
|
||||||
temp_kwargs = {}
|
temp_kwargs = {}
|
||||||
|
|
||||||
|
if supports_thinking:
|
||||||
|
temp_kwargs = {"thinking": {
|
||||||
|
"type": "enabled",
|
||||||
|
"budget_tokens": 8000
|
||||||
|
}}
|
||||||
|
|
||||||
if provider == "deepseek":
|
if provider == "deepseek":
|
||||||
return create_deepseek_client(
|
return create_deepseek_client(
|
||||||
|
|
@ -280,6 +287,7 @@ def create_llm_client(
|
||||||
model_name=model_name,
|
model_name=model_name,
|
||||||
timeout=LLM_REQUEST_TIMEOUT,
|
timeout=LLM_REQUEST_TIMEOUT,
|
||||||
max_retries=LLM_MAX_RETRIES,
|
max_retries=LLM_MAX_RETRIES,
|
||||||
|
max_tokens=model_config.get("max_tokens", 64000),
|
||||||
**temp_kwargs,
|
**temp_kwargs,
|
||||||
)
|
)
|
||||||
elif provider == "openai-compatible":
|
elif provider == "openai-compatible":
|
||||||
|
|
|
||||||
|
|
@ -939,6 +939,8 @@ models_params = {
|
||||||
"claude-3-7-sonnet-20250219": {
|
"claude-3-7-sonnet-20250219": {
|
||||||
"token_limit": 200000,
|
"token_limit": 200000,
|
||||||
"supports_temperature": True,
|
"supports_temperature": True,
|
||||||
|
"supports_thinking": True,
|
||||||
|
"max_tokens": 64000,
|
||||||
"default_temperature": 1.0,
|
"default_temperature": 1.0,
|
||||||
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
"latency_coefficient": DEFAULT_BASE_LATENCY,
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,11 @@
|
||||||
import os
|
import os
|
||||||
from typing import Dict, List, Optional, Set, Union
|
from typing import Dict, List, Optional, Set, Union
|
||||||
|
|
||||||
|
try:
|
||||||
|
import magic
|
||||||
|
except ImportError:
|
||||||
|
magic = None
|
||||||
|
|
||||||
from langchain_core.tools import tool
|
from langchain_core.tools import tool
|
||||||
from rich.console import Console
|
from rich.console import Console
|
||||||
from rich.markdown import Markdown
|
from rich.markdown import Markdown
|
||||||
|
|
@ -380,6 +385,7 @@ def emit_related_files(files: List[str]) -> str:
|
||||||
results = []
|
results = []
|
||||||
added_files = []
|
added_files = []
|
||||||
invalid_paths = []
|
invalid_paths = []
|
||||||
|
binary_files = []
|
||||||
|
|
||||||
# Process files
|
# Process files
|
||||||
for file in files:
|
for file in files:
|
||||||
|
|
@ -400,6 +406,12 @@ def emit_related_files(files: List[str]) -> str:
|
||||||
invalid_paths.append(file)
|
invalid_paths.append(file)
|
||||||
results.append(f"Error: Path '{file}' exists but is not a regular file")
|
results.append(f"Error: Path '{file}' exists but is not a regular file")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Check if it's a binary file
|
||||||
|
if is_binary_file(file):
|
||||||
|
binary_files.append(file)
|
||||||
|
results.append(f"Skipped binary file: '{file}'")
|
||||||
|
continue
|
||||||
|
|
||||||
# Normalize the path
|
# Normalize the path
|
||||||
normalized_path = os.path.abspath(file)
|
normalized_path = os.path.abspath(file)
|
||||||
|
|
@ -424,7 +436,7 @@ def emit_related_files(files: List[str]) -> str:
|
||||||
added_files.append((file_id, file)) # Keep original path for display
|
added_files.append((file_id, file)) # Keep original path for display
|
||||||
results.append(f"File ID #{file_id}: {file}")
|
results.append(f"File ID #{file_id}: {file}")
|
||||||
|
|
||||||
# Rich output - single consolidated panel
|
# Rich output - single consolidated panel for added files
|
||||||
if added_files:
|
if added_files:
|
||||||
files_added_md = "\n".join(f"- `{file}`" for id, file in added_files)
|
files_added_md = "\n".join(f"- `{file}`" for id, file in added_files)
|
||||||
md_content = f"**Files Noted:**\n{files_added_md}"
|
md_content = f"**Files Noted:**\n{files_added_md}"
|
||||||
|
|
@ -435,8 +447,24 @@ def emit_related_files(files: List[str]) -> str:
|
||||||
border_style="green",
|
border_style="green",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Display skipped binary files
|
||||||
|
if binary_files:
|
||||||
|
binary_files_md = "\n".join(f"- `{file}`" for file in binary_files)
|
||||||
|
md_content = f"**Binary Files Skipped:**\n{binary_files_md}"
|
||||||
|
console.print(
|
||||||
|
Panel(
|
||||||
|
Markdown(md_content),
|
||||||
|
title="⚠️ Binary Files Not Added",
|
||||||
|
border_style="yellow",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
return "Files noted."
|
# Return summary message
|
||||||
|
if binary_files:
|
||||||
|
return f"Files noted. {len(binary_files)} binary files were skipped."
|
||||||
|
else:
|
||||||
|
return "Files noted."
|
||||||
|
|
||||||
|
|
||||||
def log_work_event(event: str) -> str:
|
def log_work_event(event: str) -> str:
|
||||||
|
|
@ -461,6 +489,25 @@ def log_work_event(event: str) -> str:
|
||||||
return f"Event logged: {event}"
|
return f"Event logged: {event}"
|
||||||
|
|
||||||
|
|
||||||
|
def is_binary_file(filepath):
|
||||||
|
"""Check if a file is binary using magic library if available."""
|
||||||
|
if magic:
|
||||||
|
try:
|
||||||
|
mime = magic.from_file(filepath, mime=True)
|
||||||
|
return not mime.startswith('text/')
|
||||||
|
except Exception:
|
||||||
|
# Fallback if magic fails
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
# Basic binary detection if magic is not available
|
||||||
|
try:
|
||||||
|
with open(filepath, 'r', encoding='utf-8') as f:
|
||||||
|
f.read(1024) # Try to read as text
|
||||||
|
return False
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
def get_work_log() -> str:
|
def get_work_log() -> str:
|
||||||
"""Return formatted markdown of work log entries.
|
"""Return formatted markdown of work log entries.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -714,3 +714,51 @@ def test_swap_task_order_after_delete(reset_memory):
|
||||||
# Verify swap worked
|
# Verify swap worked
|
||||||
assert _global_memory["tasks"][0] == "Task 3"
|
assert _global_memory["tasks"][0] == "Task 3"
|
||||||
assert _global_memory["tasks"][2] == "Task 1"
|
assert _global_memory["tasks"][2] == "Task 1"
|
||||||
|
|
||||||
|
|
||||||
|
def test_emit_related_files_binary_filtering(reset_memory, tmp_path, monkeypatch):
|
||||||
|
"""Test that binary files are filtered out when adding related files"""
|
||||||
|
# Create test text files
|
||||||
|
text_file1 = tmp_path / "text1.txt"
|
||||||
|
text_file1.write_text("Text file 1 content")
|
||||||
|
text_file2 = tmp_path / "text2.txt"
|
||||||
|
text_file2.write_text("Text file 2 content")
|
||||||
|
|
||||||
|
# Create test "binary" files
|
||||||
|
binary_file1 = tmp_path / "binary1.bin"
|
||||||
|
binary_file1.write_text("Binary file 1 content")
|
||||||
|
binary_file2 = tmp_path / "binary2.bin"
|
||||||
|
binary_file2.write_text("Binary file 2 content")
|
||||||
|
|
||||||
|
# Mock the is_binary_file function to identify our "binary" files
|
||||||
|
def mock_is_binary_file(filepath):
|
||||||
|
return ".bin" in str(filepath)
|
||||||
|
|
||||||
|
# Apply the mock
|
||||||
|
import ra_aid.tools.memory
|
||||||
|
monkeypatch.setattr(ra_aid.tools.memory, "is_binary_file", mock_is_binary_file)
|
||||||
|
|
||||||
|
# Call emit_related_files with mix of text and binary files
|
||||||
|
result = emit_related_files.invoke({
|
||||||
|
"files": [
|
||||||
|
str(text_file1),
|
||||||
|
str(binary_file1),
|
||||||
|
str(text_file2),
|
||||||
|
str(binary_file2)
|
||||||
|
]
|
||||||
|
})
|
||||||
|
|
||||||
|
# Verify the result message mentions skipped binary files
|
||||||
|
assert "Files noted." in result
|
||||||
|
assert "2 binary files were skipped" in result
|
||||||
|
|
||||||
|
# Verify only text files were added to related_files
|
||||||
|
assert len(_global_memory["related_files"]) == 2
|
||||||
|
file_values = list(_global_memory["related_files"].values())
|
||||||
|
assert str(text_file1) in file_values
|
||||||
|
assert str(text_file2) in file_values
|
||||||
|
assert str(binary_file1) not in file_values
|
||||||
|
assert str(binary_file2) not in file_values
|
||||||
|
|
||||||
|
# Verify counter is correct (only incremented for text files)
|
||||||
|
assert _global_memory["related_file_id_counter"] == 2
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue