skip binary files; update deps

2025-02-25 18:32:29 -05:00 · 2025-02-25 18:32:29 -05:00 · 94d655ce91
parent 8d1e4a96bd
commit 94d655ce91
5 changed files with 108 additions and 2 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -43,6 +43,7 @@ dependencies = [
    "websockets>=12.0",
    "jinja2>=3.1.2",
    "python-Levenshtein>=0.26.1",
+    "python-magic>=0.4.27",
 ]

 [project.optional-dependencies]
--- a/ra_aid/llm.py
+++ b/ra_aid/llm.py
@ -229,6 +229,7 @@ def create_llm_client(
        model_config["supports_temperature"] = provider in known_temp_providers

    supports_temperature = model_config["supports_temperature"]
+    supports_thinking = model_config.get("supports_thinking", False)

    # Handle temperature settings
    if is_expert:
@ -242,6 +243,12 @@ def create_llm_client(
        temp_kwargs = {"temperature": temperature}
    else:
        temp_kwargs = {}
+    
+    if supports_thinking:
+        temp_kwargs = {"thinking": {
+            "type": "enabled",
+            "budget_tokens": 8000
+        }}

    if provider == "deepseek":
        return create_deepseek_client(
@ -280,6 +287,7 @@ def create_llm_client(
            model_name=model_name,
            timeout=LLM_REQUEST_TIMEOUT,
            max_retries=LLM_MAX_RETRIES,
+            max_tokens=model_config.get("max_tokens", 64000),
            **temp_kwargs,
        )
    elif provider == "openai-compatible":
--- a/ra_aid/models_params.py
+++ b/ra_aid/models_params.py
@ -939,6 +939,8 @@ models_params = {
        "claude-3-7-sonnet-20250219": {
            "token_limit": 200000,
            "supports_temperature": True,
+            "supports_thinking": True,
+            "max_tokens": 64000,
            "default_temperature": 1.0,
            "latency_coefficient": DEFAULT_BASE_LATENCY,
        },
--- a/ra_aid/tools/memory.py
+++ b/ra_aid/tools/memory.py
@ -1,6 +1,11 @@
 import os
 from typing import Dict, List, Optional, Set, Union

+try:
+    import magic
+except ImportError:
+    magic = None
+
 from langchain_core.tools import tool
 from rich.console import Console
 from rich.markdown import Markdown
@ -380,6 +385,7 @@ def emit_related_files(files: List[str]) -> str:
    results = []
    added_files = []
    invalid_paths = []
+    binary_files = []

    # Process files
    for file in files:
@ -400,6 +406,12 @@ def emit_related_files(files: List[str]) -> str:
            invalid_paths.append(file)
            results.append(f"Error: Path '{file}' exists but is not a regular file")
            continue
+            
+        # Check if it's a binary file
+        if is_binary_file(file):
+            binary_files.append(file)
+            results.append(f"Skipped binary file: '{file}'")
+            continue

        # Normalize the path
        normalized_path = os.path.abspath(file)
@ -424,7 +436,7 @@ def emit_related_files(files: List[str]) -> str:
            added_files.append((file_id, file))  # Keep original path for display
            results.append(f"File ID #{file_id}: {file}")

-    # Rich output - single consolidated panel
+    # Rich output - single consolidated panel for added files
    if added_files:
        files_added_md = "\n".join(f"- `{file}`" for id, file in added_files)
        md_content = f"**Files Noted:**\n{files_added_md}"
@ -435,8 +447,24 @@ def emit_related_files(files: List[str]) -> str:
                border_style="green",
            )
        )
+    
+    # Display skipped binary files
+    if binary_files:
+        binary_files_md = "\n".join(f"- `{file}`" for file in binary_files)
+        md_content = f"**Binary Files Skipped:**\n{binary_files_md}"
+        console.print(
+            Panel(
+                Markdown(md_content),
+                title="⚠️ Binary Files Not Added",
+                border_style="yellow",
+            )
+        )

-    return "Files noted."
+    # Return summary message
+    if binary_files:
+        return f"Files noted. {len(binary_files)} binary files were skipped."
+    else:
+        return "Files noted."


 def log_work_event(event: str) -> str:
@ -461,6 +489,25 @@ def log_work_event(event: str) -> str:
    return f"Event logged: {event}"


+def is_binary_file(filepath):
+    """Check if a file is binary using magic library if available."""
+    if magic:
+        try:
+            mime = magic.from_file(filepath, mime=True)
+            return not mime.startswith('text/')
+        except Exception:
+            # Fallback if magic fails
+            return False
+    else:
+        # Basic binary detection if magic is not available
+        try:
+            with open(filepath, 'r', encoding='utf-8') as f:
+                f.read(1024)  # Try to read as text
+                return False
+        except UnicodeDecodeError:
+            return True
+
+
 def get_work_log() -> str:
    """Return formatted markdown of work log entries.

--- a/tests/ra_aid/tools/test_memory.py
+++ b/tests/ra_aid/tools/test_memory.py
@ -714,3 +714,51 @@ def test_swap_task_order_after_delete(reset_memory):
    # Verify swap worked
    assert _global_memory["tasks"][0] == "Task 3"
    assert _global_memory["tasks"][2] == "Task 1"
+
+
+def test_emit_related_files_binary_filtering(reset_memory, tmp_path, monkeypatch):
+    """Test that binary files are filtered out when adding related files"""
+    # Create test text files
+    text_file1 = tmp_path / "text1.txt"
+    text_file1.write_text("Text file 1 content")
+    text_file2 = tmp_path / "text2.txt"
+    text_file2.write_text("Text file 2 content")
+    
+    # Create test "binary" files
+    binary_file1 = tmp_path / "binary1.bin"
+    binary_file1.write_text("Binary file 1 content")
+    binary_file2 = tmp_path / "binary2.bin"
+    binary_file2.write_text("Binary file 2 content")
+    
+    # Mock the is_binary_file function to identify our "binary" files
+    def mock_is_binary_file(filepath):
+        return ".bin" in str(filepath)
+    
+    # Apply the mock
+    import ra_aid.tools.memory
+    monkeypatch.setattr(ra_aid.tools.memory, "is_binary_file", mock_is_binary_file)
+    
+    # Call emit_related_files with mix of text and binary files
+    result = emit_related_files.invoke({
+        "files": [
+            str(text_file1), 
+            str(binary_file1), 
+            str(text_file2), 
+            str(binary_file2)
+        ]
+    })
+    
+    # Verify the result message mentions skipped binary files
+    assert "Files noted." in result
+    assert "2 binary files were skipped" in result
+    
+    # Verify only text files were added to related_files
+    assert len(_global_memory["related_files"]) == 2
+    file_values = list(_global_memory["related_files"].values())
+    assert str(text_file1) in file_values
+    assert str(text_file2) in file_values
+    assert str(binary_file1) not in file_values
+    assert str(binary_file2) not in file_values
+    
+    # Verify counter is correct (only incremented for text files)
+    assert _global_memory["related_file_id_counter"] == 2