61 lines
2.0 KiB
Python
61 lines
2.0 KiB
Python
"""Utility functions for file operations."""
|
|
|
|
import os
|
|
|
|
try:
|
|
import magic
|
|
except ImportError:
|
|
magic = None
|
|
|
|
|
|
def is_binary_file(filepath):
|
|
"""Check if a file is binary using magic library if available."""
|
|
# First check if file is empty
|
|
if os.path.getsize(filepath) == 0:
|
|
return False # Empty files are not binary
|
|
|
|
if magic:
|
|
try:
|
|
mime = magic.from_file(filepath, mime=True)
|
|
file_type = magic.from_file(filepath)
|
|
|
|
# If MIME type starts with 'text/', it's likely a text file
|
|
if mime.startswith("text/"):
|
|
return False
|
|
|
|
# Also consider 'application/x-python' and similar script types as text
|
|
if any(mime.startswith(prefix) for prefix in ['application/x-python', 'application/javascript']):
|
|
return False
|
|
|
|
# Check for common text file descriptors
|
|
text_indicators = ["text", "script", "xml", "json", "yaml", "markdown", "HTML"]
|
|
if any(indicator.lower() in file_type.lower() for indicator in text_indicators):
|
|
return False
|
|
|
|
# If none of the text indicators are present, assume it's binary
|
|
return True
|
|
except Exception:
|
|
return _is_binary_fallback(filepath)
|
|
else:
|
|
return _is_binary_fallback(filepath)
|
|
|
|
|
|
def _is_binary_fallback(filepath):
|
|
"""Fallback method to detect binary files without using magic."""
|
|
try:
|
|
# First check if file is empty
|
|
if os.path.getsize(filepath) == 0:
|
|
return False # Empty files are not binary
|
|
|
|
with open(filepath, "r", encoding="utf-8") as f:
|
|
chunk = f.read(1024)
|
|
|
|
# Check for null bytes which indicate binary content
|
|
if "\0" in chunk:
|
|
return True
|
|
|
|
# If we can read it as text without errors, it's probably not binary
|
|
return False
|
|
except UnicodeDecodeError:
|
|
# If we can't decode as UTF-8, it's likely binary
|
|
return True |