skip binary files; update deps

2025-02-25 18:32:29 -05:00 · 2025-02-25 18:32:29 -05:00 · 94d655ce91
parent 8d1e4a96bd
commit 94d655ce91
5 changed files with 108 additions and 2 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -43,6 +43,7 @@ dependencies = [
    "websockets>=12.0",
    "jinja2>=3.1.2",
    "python-Levenshtein>=0.26.1",
    "python-magic>=0.4.27",
 ]
 [project.optional-dependencies]
--- a/ra_aid/llm.py
+++ b/ra_aid/llm.py
@ -229,6 +229,7 @@ def create_llm_client(
        model_config["supports_temperature"] = provider in known_temp_providers
    supports_temperature = model_config["supports_temperature"]
    supports_thinking = model_config.get("supports_thinking", False)
    # Handle temperature settings
    if is_expert:
@ -242,6 +243,12 @@ def create_llm_client(
        temp_kwargs = {"temperature": temperature}
    else:
        temp_kwargs = {}
    if supports_thinking:
        temp_kwargs = {"thinking": {
            "type": "enabled",
            "budget_tokens": 8000
        }}
    if provider == "deepseek":
        return create_deepseek_client(
@ -280,6 +287,7 @@ def create_llm_client(
            model_name=model_name,
            timeout=LLM_REQUEST_TIMEOUT,
            max_retries=LLM_MAX_RETRIES,
            max_tokens=model_config.get("max_tokens", 64000),
            **temp_kwargs,
        )
    elif provider == "openai-compatible":
--- a/ra_aid/models_params.py
+++ b/ra_aid/models_params.py
@ -939,6 +939,8 @@ models_params = {
        "claude-3-7-sonnet-20250219": {
            "token_limit": 200000,
            "supports_temperature": True,
            "supports_thinking": True,
            "max_tokens": 64000,
            "default_temperature": 1.0,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
--- a/ra_aid/tools/memory.py
+++ b/ra_aid/tools/memory.py
@ -1,6 +1,11 @@
 import os
 from typing import Dict, List, Optional, Set, Union
 try:
    import magic
 except ImportError:
    magic = None
 from langchain_core.tools import tool
 from rich.console import Console
 from rich.markdown import Markdown
@ -380,6 +385,7 @@ def emit_related_files(files: List[str]) -> str:
    results = []
    added_files = []
    invalid_paths = []
    binary_files = []
    # Process files
    for file in files:
@ -400,6 +406,12 @@ def emit_related_files(files: List[str]) -> str:
            invalid_paths.append(file)
            results.append(f"Error: Path '{file}' exists but is not a regular file")
            continue
        # Check if it's a binary file
        if is_binary_file(file):
            binary_files.append(file)
            results.append(f"Skipped binary file: '{file}'")
            continue
        # Normalize the path
        normalized_path = os.path.abspath(file)
@ -424,7 +436,7 @@ def emit_related_files(files: List[str]) -> str:
            added_files.append((file_id, file))  # Keep original path for display
            results.append(f"File ID #{file_id}: {file}")
-    # Rich output - single consolidated panel
+    # Rich output - single consolidated panel for added files
    if added_files:
        files_added_md = "\n".join(f"- `{file}`" for id, file in added_files)
        md_content = f"**Files Noted:**\n{files_added_md}"
@ -435,8 +447,24 @@ def emit_related_files(files: List[str]) -> str:
                border_style="green",
            )
        )
    # Display skipped binary files
    if binary_files:
        binary_files_md = "\n".join(f"- `{file}`" for file in binary_files)
        md_content = f"**Binary Files Skipped:**\n{binary_files_md}"
        console.print(
            Panel(
                Markdown(md_content),
                title="⚠️ Binary Files Not Added",
                border_style="yellow",
            )
        )
-    return "Files noted."
+    # Return summary message
    if binary_files:
        return f"Files noted. {len(binary_files)} binary files were skipped."
    else:
        return "Files noted."
 def log_work_event(event: str) -> str:
@ -461,6 +489,25 @@ def log_work_event(event: str) -> str:
    return f"Event logged: {event}"
 def is_binary_file(filepath):
    """Check if a file is binary using magic library if available."""
    if magic:
        try:
            mime = magic.from_file(filepath, mime=True)
            return not mime.startswith('text/')
        except Exception:
            # Fallback if magic fails
            return False
    else:
        # Basic binary detection if magic is not available
        try:
            with open(filepath, 'r', encoding='utf-8') as f:
                f.read(1024)  # Try to read as text
                return False
        except UnicodeDecodeError:
            return True
 def get_work_log() -> str:
    """Return formatted markdown of work log entries.
--- a/tests/ra_aid/tools/test_memory.py
+++ b/tests/ra_aid/tools/test_memory.py
@ -714,3 +714,51 @@ def test_swap_task_order_after_delete(reset_memory):
    # Verify swap worked
    assert _global_memory["tasks"][0] == "Task 3"
    assert _global_memory["tasks"][2] == "Task 1"
 def test_emit_related_files_binary_filtering(reset_memory, tmp_path, monkeypatch):
    """Test that binary files are filtered out when adding related files"""
    # Create test text files
    text_file1 = tmp_path / "text1.txt"
    text_file1.write_text("Text file 1 content")
    text_file2 = tmp_path / "text2.txt"
    text_file2.write_text("Text file 2 content")
    # Create test "binary" files
    binary_file1 = tmp_path / "binary1.bin"
    binary_file1.write_text("Binary file 1 content")
    binary_file2 = tmp_path / "binary2.bin"
    binary_file2.write_text("Binary file 2 content")
    # Mock the is_binary_file function to identify our "binary" files
    def mock_is_binary_file(filepath):
        return ".bin" in str(filepath)
    # Apply the mock
    import ra_aid.tools.memory
    monkeypatch.setattr(ra_aid.tools.memory, "is_binary_file", mock_is_binary_file)
    # Call emit_related_files with mix of text and binary files
    result = emit_related_files.invoke({
        "files": [
            str(text_file1), 
            str(binary_file1), 
            str(text_file2), 
            str(binary_file2)
        ]
    })
    # Verify the result message mentions skipped binary files
    assert "Files noted." in result
    assert "2 binary files were skipped" in result
    # Verify only text files were added to related_files
    assert len(_global_memory["related_files"]) == 2
    file_values = list(_global_memory["related_files"].values())
    assert str(text_file1) in file_values
    assert str(text_file2) in file_values
    assert str(binary_file1) not in file_values
    assert str(binary_file2) not in file_values
    # Verify counter is correct (only incremented for text files)
    assert _global_memory["related_file_id_counter"] == 2