FIX prevent duplicate files (#64)

This commit is contained in:
Jose M Leon 2025-01-29 14:48:02 -05:00 committed by GitHub
parent 90b8875a73
commit b44f1c73eb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 163 additions and 11 deletions

View File

@ -108,7 +108,10 @@ def get_file_listing(directory: str, limit: Optional[int] = None) -> Tuple[List[
for line in result.stdout.splitlines()
if line.strip()
]
# Deduplicate and sort for consistency
files = list(dict.fromkeys(files)) # Remove duplicates while preserving order
# Sort for consistency
files.sort()

View File

@ -4,17 +4,16 @@ import os
import pytest
from pathlib import Path
import subprocess
from unittest.mock import patch, MagicMock
from ra_aid.file_listing import (
get_file_listing,
is_git_repo,
GitCommandError,
DirectoryNotFoundError,
DirectoryAccessError,
GitCommandError,
FileListerError
FileListerError,
)
@pytest.fixture
def empty_git_repo(tmp_path):
"""Create an empty git repository."""
@ -33,23 +32,23 @@ def sample_git_repo(empty_git_repo):
"tests/test_main.py",
"docs/index.html"
]
for file_path in files:
full_path = empty_git_repo / file_path
full_path.parent.mkdir(parents=True, exist_ok=True)
full_path.write_text(f"Content of {file_path}")
# Add and commit files
subprocess.run(["git", "add", "."], cwd=empty_git_repo)
subprocess.run(
["git", "commit", "-m", "Initial commit"],
cwd=empty_git_repo,
env={"GIT_AUTHOR_NAME": "Test",
env={"GIT_AUTHOR_NAME": "Test",
"GIT_AUTHOR_EMAIL": "test@example.com",
"GIT_COMMITTER_NAME": "Test",
"GIT_COMMITTER_EMAIL": "test@example.com"}
)
return empty_git_repo
@ -104,7 +103,7 @@ def test_file_as_directory(tmp_path):
"""Test handling of file path instead of directory."""
test_file = tmp_path / "test.txt"
test_file.write_text("test")
with pytest.raises(DirectoryNotFoundError):
get_file_listing(str(test_file))
@ -115,9 +114,159 @@ def test_permission_error(tmp_path):
try:
# Make directory unreadable
os.chmod(tmp_path, 0o000)
with pytest.raises(DirectoryAccessError):
get_file_listing(str(tmp_path))
finally:
# Restore permissions to allow cleanup
os.chmod(tmp_path, 0o755)
# Constants for test data
DUMMY_PATH = "dummy/path"
EMPTY_FILE_LIST = []
EMPTY_FILE_TOTAL = 0
SINGLE_FILE_NAME = "file1.txt"
MULTI_FILE_NAMES = ["file1.txt", "file2.py", "file3.md"]
# Test cases for get_file_listing
FILE_LISTING_TEST_CASES = [
{
"name": "empty_repository",
"git_output": "",
"expected_files": EMPTY_FILE_LIST,
"expected_total": EMPTY_FILE_TOTAL,
"limit": None,
},
{
"name": "single_file",
"git_output": f"{SINGLE_FILE_NAME}\n",
"expected_files": [SINGLE_FILE_NAME],
"expected_total": 1,
"limit": None,
},
{
"name": "multiple_files",
"git_output": "\n".join(MULTI_FILE_NAMES) + "\n",
"expected_files": MULTI_FILE_NAMES,
"expected_total": len(MULTI_FILE_NAMES),
"limit": None,
},
{
"name": "duplicate_files",
"git_output": "\n".join([SINGLE_FILE_NAME, SINGLE_FILE_NAME] + MULTI_FILE_NAMES[1:]) + "\n",
"expected_files": [SINGLE_FILE_NAME] + MULTI_FILE_NAMES[1:],
"expected_total": 3, # After deduplication
"limit": None,
},
{
"name": "with_limit",
"git_output": "\n".join(MULTI_FILE_NAMES) + "\n",
"expected_files": MULTI_FILE_NAMES[:2],
"expected_total": len(MULTI_FILE_NAMES),
"limit": 2,
},
{
"name": "with_empty_lines",
"git_output": f"\n{SINGLE_FILE_NAME}\n\n{MULTI_FILE_NAMES[1]}\n\n",
"expected_files": [SINGLE_FILE_NAME, MULTI_FILE_NAMES[1]],
"expected_total": 2,
"limit": None,
},
{
"name": "with_whitespace",
"git_output": f" {SINGLE_FILE_NAME} \n {MULTI_FILE_NAMES[1]} \n",
"expected_files": [SINGLE_FILE_NAME, MULTI_FILE_NAMES[1]],
"expected_total": 2,
"limit": None,
},
{
"name": "limit_larger_than_total",
"git_output": f"{SINGLE_FILE_NAME}\n{MULTI_FILE_NAMES[1]}\n",
"expected_files": [SINGLE_FILE_NAME, MULTI_FILE_NAMES[1]],
"expected_total": 2,
"limit": 5,
},
{
"name": "limit_zero",
"git_output": "\n".join(MULTI_FILE_NAMES) + "\n",
"expected_files": EMPTY_FILE_LIST,
"expected_total": len(MULTI_FILE_NAMES),
"limit": 0,
},
{
"name": "nested_paths",
"git_output": "dir1/file1.txt\ndir1/dir2/file2.py\nfile3.md\n",
"expected_files": sorted(["dir1/file1.txt", "dir1/dir2/file2.py", "file3.md"]),
"expected_total": 3,
"limit": None,
},
{
"name": "special_characters",
"git_output": "file-1.txt\nfile_2.py\nfile 3.md\n",
"expected_files": sorted(["file-1.txt", "file_2.py", "file 3.md"]),
"expected_total": 3,
"limit": None,
},
{
"name": "duplicate_nested_paths",
"git_output": "dir1/file1.txt\ndir1/file1.txt\ndir2/file1.txt\n",
"expected_files": sorted(["dir1/file1.txt", "dir2/file1.txt"]),
"expected_total": 2,
"limit": None,
},
]
def create_mock_process(git_output: str) -> MagicMock:
"""Create a mock process with the given git output."""
mock_process = MagicMock()
mock_process.stdout = git_output
mock_process.returncode = 0
return mock_process
@pytest.fixture
def mock_subprocess():
"""Fixture to mock subprocess.run."""
with patch("subprocess.run") as mock_run:
yield mock_run
@pytest.fixture
def mock_is_git_repo():
"""Fixture to mock is_git_repo function."""
with patch("ra_aid.file_listing.is_git_repo") as mock:
mock.return_value = True
yield mock
@pytest.mark.parametrize("test_case", FILE_LISTING_TEST_CASES, ids=lambda x: x["name"])
def test_get_file_listing(test_case, mock_subprocess, mock_is_git_repo):
"""Test get_file_listing with various inputs."""
mock_subprocess.return_value = create_mock_process(test_case["git_output"])
files, total = get_file_listing(DUMMY_PATH, limit=test_case["limit"])
assert files == test_case["expected_files"]
assert total == test_case["expected_total"]
def test_get_file_listing_non_git_repo(mock_is_git_repo):
"""Test get_file_listing with non-git repository."""
mock_is_git_repo.return_value = False
files, total = get_file_listing(DUMMY_PATH)
assert files == EMPTY_FILE_LIST
assert total == EMPTY_FILE_TOTAL
def test_get_file_listing_git_error(mock_subprocess, mock_is_git_repo):
"""Test get_file_listing when git command fails."""
mock_subprocess.side_effect = GitCommandError("Git command failed")
with pytest.raises(GitCommandError):
get_file_listing(DUMMY_PATH)
def test_get_file_listing_permission_error(mock_subprocess, mock_is_git_repo):
"""Test get_file_listing with permission error."""
mock_subprocess.side_effect = PermissionError("Permission denied")
with pytest.raises(DirectoryAccessError):
get_file_listing(DUMMY_PATH)
def test_get_file_listing_unexpected_error(mock_subprocess, mock_is_git_repo):
"""Test get_file_listing with unexpected error."""
mock_subprocess.side_effect = Exception("Unexpected error")
with pytest.raises(FileListerError):
get_file_listing(DUMMY_PATH)