From 94d655ce91061649e732c6055b9a7820fb77fdc2 Mon Sep 17 00:00:00 2001 From: AI Christianson Date: Tue, 25 Feb 2025 18:32:29 -0500 Subject: [PATCH] skip binary files; update deps --- pyproject.toml | 1 + ra_aid/llm.py | 8 +++++ ra_aid/models_params.py | 2 ++ ra_aid/tools/memory.py | 51 +++++++++++++++++++++++++++++-- tests/ra_aid/tools/test_memory.py | 48 +++++++++++++++++++++++++++++ 5 files changed, 108 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e0d7855..ef751c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,7 @@ dependencies = [ "websockets>=12.0", "jinja2>=3.1.2", "python-Levenshtein>=0.26.1", + "python-magic>=0.4.27", ] [project.optional-dependencies] diff --git a/ra_aid/llm.py b/ra_aid/llm.py index c7267fb..af0238d 100644 --- a/ra_aid/llm.py +++ b/ra_aid/llm.py @@ -229,6 +229,7 @@ def create_llm_client( model_config["supports_temperature"] = provider in known_temp_providers supports_temperature = model_config["supports_temperature"] + supports_thinking = model_config.get("supports_thinking", False) # Handle temperature settings if is_expert: @@ -242,6 +243,12 @@ def create_llm_client( temp_kwargs = {"temperature": temperature} else: temp_kwargs = {} + + if supports_thinking: + temp_kwargs = {"thinking": { + "type": "enabled", + "budget_tokens": 8000 + }} if provider == "deepseek": return create_deepseek_client( @@ -280,6 +287,7 @@ def create_llm_client( model_name=model_name, timeout=LLM_REQUEST_TIMEOUT, max_retries=LLM_MAX_RETRIES, + max_tokens=model_config.get("max_tokens", 64000), **temp_kwargs, ) elif provider == "openai-compatible": diff --git a/ra_aid/models_params.py b/ra_aid/models_params.py index 65f0133..d7019de 100644 --- a/ra_aid/models_params.py +++ b/ra_aid/models_params.py @@ -939,6 +939,8 @@ models_params = { "claude-3-7-sonnet-20250219": { "token_limit": 200000, "supports_temperature": True, + "supports_thinking": True, + "max_tokens": 64000, "default_temperature": 1.0, "latency_coefficient": DEFAULT_BASE_LATENCY, }, diff --git a/ra_aid/tools/memory.py b/ra_aid/tools/memory.py index 935b89e..43a6058 100644 --- a/ra_aid/tools/memory.py +++ b/ra_aid/tools/memory.py @@ -1,6 +1,11 @@ import os from typing import Dict, List, Optional, Set, Union +try: + import magic +except ImportError: + magic = None + from langchain_core.tools import tool from rich.console import Console from rich.markdown import Markdown @@ -380,6 +385,7 @@ def emit_related_files(files: List[str]) -> str: results = [] added_files = [] invalid_paths = [] + binary_files = [] # Process files for file in files: @@ -400,6 +406,12 @@ def emit_related_files(files: List[str]) -> str: invalid_paths.append(file) results.append(f"Error: Path '{file}' exists but is not a regular file") continue + + # Check if it's a binary file + if is_binary_file(file): + binary_files.append(file) + results.append(f"Skipped binary file: '{file}'") + continue # Normalize the path normalized_path = os.path.abspath(file) @@ -424,7 +436,7 @@ def emit_related_files(files: List[str]) -> str: added_files.append((file_id, file)) # Keep original path for display results.append(f"File ID #{file_id}: {file}") - # Rich output - single consolidated panel + # Rich output - single consolidated panel for added files if added_files: files_added_md = "\n".join(f"- `{file}`" for id, file in added_files) md_content = f"**Files Noted:**\n{files_added_md}" @@ -435,8 +447,24 @@ def emit_related_files(files: List[str]) -> str: border_style="green", ) ) + + # Display skipped binary files + if binary_files: + binary_files_md = "\n".join(f"- `{file}`" for file in binary_files) + md_content = f"**Binary Files Skipped:**\n{binary_files_md}" + console.print( + Panel( + Markdown(md_content), + title="⚠️ Binary Files Not Added", + border_style="yellow", + ) + ) - return "Files noted." + # Return summary message + if binary_files: + return f"Files noted. {len(binary_files)} binary files were skipped." + else: + return "Files noted." def log_work_event(event: str) -> str: @@ -461,6 +489,25 @@ def log_work_event(event: str) -> str: return f"Event logged: {event}" +def is_binary_file(filepath): + """Check if a file is binary using magic library if available.""" + if magic: + try: + mime = magic.from_file(filepath, mime=True) + return not mime.startswith('text/') + except Exception: + # Fallback if magic fails + return False + else: + # Basic binary detection if magic is not available + try: + with open(filepath, 'r', encoding='utf-8') as f: + f.read(1024) # Try to read as text + return False + except UnicodeDecodeError: + return True + + def get_work_log() -> str: """Return formatted markdown of work log entries. diff --git a/tests/ra_aid/tools/test_memory.py b/tests/ra_aid/tools/test_memory.py index 5a361c0..af782c7 100644 --- a/tests/ra_aid/tools/test_memory.py +++ b/tests/ra_aid/tools/test_memory.py @@ -714,3 +714,51 @@ def test_swap_task_order_after_delete(reset_memory): # Verify swap worked assert _global_memory["tasks"][0] == "Task 3" assert _global_memory["tasks"][2] == "Task 1" + + +def test_emit_related_files_binary_filtering(reset_memory, tmp_path, monkeypatch): + """Test that binary files are filtered out when adding related files""" + # Create test text files + text_file1 = tmp_path / "text1.txt" + text_file1.write_text("Text file 1 content") + text_file2 = tmp_path / "text2.txt" + text_file2.write_text("Text file 2 content") + + # Create test "binary" files + binary_file1 = tmp_path / "binary1.bin" + binary_file1.write_text("Binary file 1 content") + binary_file2 = tmp_path / "binary2.bin" + binary_file2.write_text("Binary file 2 content") + + # Mock the is_binary_file function to identify our "binary" files + def mock_is_binary_file(filepath): + return ".bin" in str(filepath) + + # Apply the mock + import ra_aid.tools.memory + monkeypatch.setattr(ra_aid.tools.memory, "is_binary_file", mock_is_binary_file) + + # Call emit_related_files with mix of text and binary files + result = emit_related_files.invoke({ + "files": [ + str(text_file1), + str(binary_file1), + str(text_file2), + str(binary_file2) + ] + }) + + # Verify the result message mentions skipped binary files + assert "Files noted." in result + assert "2 binary files were skipped" in result + + # Verify only text files were added to related_files + assert len(_global_memory["related_files"]) == 2 + file_values = list(_global_memory["related_files"].values()) + assert str(text_file1) in file_values + assert str(text_file2) in file_values + assert str(binary_file1) not in file_values + assert str(binary_file2) not in file_values + + # Verify counter is correct (only incremented for text files) + assert _global_memory["related_file_id_counter"] == 2