Get project info programmatically to save tokens.

This commit is contained in:
AI Christianson 2025-01-09 14:43:42 -05:00
parent 557ffd959b
commit b631a4bf57
10 changed files with 719 additions and 11 deletions

1
.gitignore vendored
View File

@ -10,3 +10,4 @@ __pycache__/
/.venv /.venv
/venv /venv
/.idea /.idea
/htmlcov

View File

@ -12,7 +12,7 @@ from typing import Optional
from langgraph.prebuilt import create_react_agent from langgraph.prebuilt import create_react_agent
from ra_aid.agents.ciayn_agent import CiaynAgent from ra_aid.agents.ciayn_agent import CiaynAgent
from ra_aid.agents.ciayn_agent import CiaynAgent from ra_aid.project_info import get_project_info, format_project_info, display_project_status
from ra_aid.console.formatting import print_stage_header, print_error from ra_aid.console.formatting import print_stage_header, print_error
from langchain_core.language_models import BaseChatModel from langchain_core.language_models import BaseChatModel
from langchain_core.tools import tool from langchain_core.tools import tool
@ -181,6 +181,15 @@ def run_research_agent(
code_snippets = _global_memory.get("code_snippets", "") code_snippets = _global_memory.get("code_snippets", "")
related_files = _global_memory.get("related_files", "") related_files = _global_memory.get("related_files", "")
# Get project info
try:
project_info = get_project_info(".", file_limit=2000)
formatted_project_info = format_project_info(project_info)
display_project_status(project_info) # Add status display
except Exception as e:
logger.warning(f"Failed to get project info: {e}")
formatted_project_info = ""
# Build prompt # Build prompt
prompt = (RESEARCH_ONLY_PROMPT if research_only else RESEARCH_PROMPT).format( prompt = (RESEARCH_ONLY_PROMPT if research_only else RESEARCH_PROMPT).format(
base_task=base_task_or_query, base_task=base_task_or_query,
@ -191,7 +200,8 @@ def run_research_agent(
key_facts=key_facts, key_facts=key_facts,
work_log=get_memory_value('work_log'), work_log=get_memory_value('work_log'),
code_snippets=code_snippets, code_snippets=code_snippets,
related_files=related_files related_files=related_files,
project_info=formatted_project_info
) )
# Set up configuration # Set up configuration

131
ra_aid/file_listing.py Normal file
View File

@ -0,0 +1,131 @@
"""Module for efficient file listing using git."""
import subprocess
from pathlib import Path
from typing import List, Optional, Tuple
class FileListerError(Exception):
"""Base exception for file listing related errors."""
pass
class GitCommandError(FileListerError):
"""Raised when a git command fails."""
pass
class DirectoryNotFoundError(FileListerError):
"""Raised when the specified directory does not exist."""
pass
class DirectoryAccessError(FileListerError):
"""Raised when the directory cannot be accessed due to permissions."""
pass
def is_git_repo(directory: str) -> bool:
"""
Check if the given directory is a git repository.
Args:
directory: Path to the directory to check
Returns:
bool: True if directory is a git repository, False otherwise
Raises:
DirectoryNotFoundError: If directory does not exist
DirectoryAccessError: If directory cannot be accessed
GitCommandError: If git command fails unexpectedly
"""
try:
path = Path(directory).resolve()
if not path.exists():
raise DirectoryNotFoundError(f"Directory does not exist: {directory}")
if not path.is_dir():
raise DirectoryNotFoundError(f"Path is not a directory: {directory}")
result = subprocess.run(
["git", "rev-parse", "--git-dir"],
cwd=str(path),
capture_output=True,
text=True
)
return result.returncode == 0
except subprocess.CalledProcessError as e:
raise GitCommandError(f"Git command failed: {e}")
except PermissionError as e:
raise DirectoryAccessError(f"Cannot access directory {directory}: {e}")
except Exception as e:
if isinstance(e, FileListerError):
raise
raise FileListerError(f"Error checking git repository: {e}")
def get_file_listing(directory: str, limit: Optional[int] = None) -> Tuple[List[str], int]:
"""
Get a list of tracked files in a git repository.
Uses `git ls-files` for efficient file listing that respects .gitignore rules.
Returns a tuple containing the list of files (truncated if limit is specified)
and the total count of files.
Args:
directory: Path to the git repository
limit: Optional maximum number of files to return
Returns:
Tuple[List[str], int]: Tuple containing:
- List of file paths (truncated to limit if specified)
- Total number of files (before truncation)
Raises:
DirectoryNotFoundError: If directory does not exist
DirectoryAccessError: If directory cannot be accessed
GitCommandError: If git command fails
FileListerError: For other unexpected errors
"""
try:
# Check if directory is a git repo first
if not is_git_repo(directory):
return [], 0
# Run git ls-files
result = subprocess.run(
["git", "ls-files"],
cwd=directory,
capture_output=True,
text=True,
check=True
)
# Process the output
files = [
line.strip()
for line in result.stdout.splitlines()
if line.strip()
]
# Sort for consistency
files.sort()
# Get total count before truncation
total_count = len(files)
# Truncate if limit specified
if limit is not None:
files = files[:limit]
return files, total_count
except subprocess.CalledProcessError as e:
raise GitCommandError(f"Git command failed: {e}")
except PermissionError as e:
raise DirectoryAccessError(f"Cannot access directory {directory}: {e}")
except Exception as e:
if isinstance(e, FileListerError):
raise
raise FileListerError(f"Error listing files: {e}")

125
ra_aid/project_info.py Normal file
View File

@ -0,0 +1,125 @@
"""Module providing unified interface for project information."""
from dataclasses import dataclass
from typing import List, Optional
from rich.console import Console
from rich.markdown import Markdown
from rich.panel import Panel
__all__ = ['ProjectInfo', 'ProjectInfoError', 'get_project_info', 'format_project_info', 'display_project_status']
from ra_aid.project_state import is_new_project, ProjectStateError
from ra_aid.file_listing import get_file_listing, FileListerError
@dataclass
class ProjectInfo:
"""Data class containing project information.
Attributes:
is_new: Whether the project is new/empty
files: List of tracked files in the project
total_files: Total number of tracked files (before any limit)
"""
is_new: bool
files: List[str]
total_files: int
class ProjectInfoError(Exception):
"""Base exception for project info related errors."""
pass
def get_project_info(directory: str, file_limit: Optional[int] = None) -> ProjectInfo:
"""
Get unified project information including new status and file listing.
Args:
directory: Path to the project directory
file_limit: Optional maximum number of files to return in listing
Returns:
ProjectInfo: Object containing project information
Raises:
ProjectInfoError: If there are any errors accessing project information
ProjectStateError: If there are errors checking project state
FileListerError: If there are errors listing files
"""
try:
# Check if project is new
new_status = is_new_project(directory)
# Get file listing
files, total = get_file_listing(directory, limit=file_limit)
return ProjectInfo(
is_new=new_status,
files=files,
total_files=total
)
except (ProjectStateError, FileListerError) as e:
# Re-raise known errors
raise
except Exception as e:
# Wrap unexpected errors
raise ProjectInfoError(f"Error getting project info: {e}")
def format_project_info(info: ProjectInfo) -> str:
"""Format project information into a displayable string.
Args:
info: ProjectInfo object to format
Returns:
Formatted string containing project status and file listing
"""
# Create project status line
status = "New/Empty Project" if info.is_new else "Existing Project"
# Handle empty project case
if info.total_files == 0:
return f"Project Status: {status}\nTotal Files: 0\nFiles: None"
# Format file count with truncation notice if needed
file_count = f"{len(info.files)} of {info.total_files}" if len(info.files) < info.total_files else str(info.total_files)
file_count_line = f"Total Files: {file_count}"
# Format file listing
files_section = "Files:\n" + "\n".join(f"- {f}" for f in info.files)
# Add truncation notice if list was truncated
if len(info.files) < info.total_files:
files_section += f"\n[Note: Showing {len(info.files)} of {info.total_files} total files]"
return f"Project Status: {status}\n{file_count_line}\n{files_section}"
def display_project_status(info: ProjectInfo) -> None:
"""Display project status in a visual panel.
Args:
info: ProjectInfo object containing project state
"""
# Create project status text
status = "New/Empty Project" if info.is_new else "Existing Project"
# Format file count (with truncation notice if needed)
file_count = f"{len(info.files)} of {info.total_files}" if len(info.files) < info.total_files else str(info.total_files)
# Build status text with markdown
status_text = f"""
# Project Status
- **Status:** {status}
- **Total Files:** {file_count}
"""
# Add truncation notice if list was truncated
if len(info.files) < info.total_files:
status_text += f"\n[*Note: File listing truncated ({len(info.files)} of {info.total_files} shown)*]"
# Create and display panel
console = Console()
console.print(Panel(Markdown(status_text.strip()), title="📊 Project Status"))

66
ra_aid/project_state.py Normal file
View File

@ -0,0 +1,66 @@
"""Module for determining project state and initialization status."""
from pathlib import Path
from typing import Set
class ProjectStateError(Exception):
"""Base exception for project state related errors."""
pass
class DirectoryNotFoundError(ProjectStateError):
"""Raised when the specified directory does not exist."""
pass
class DirectoryAccessError(ProjectStateError):
"""Raised when the directory cannot be accessed due to permissions."""
pass
def is_new_project(directory: str) -> bool:
"""
Determine if a directory represents a new/empty project.
A project is considered new if it either:
- Is an empty directory
- Contains only .git directory and/or .gitignore file
Args:
directory: String path to the directory to check
Returns:
bool: True if the directory is empty or contains only git files,
False otherwise
Raises:
DirectoryNotFoundError: If the specified directory does not exist
DirectoryAccessError: If the directory cannot be accessed
ProjectStateError: For other unexpected errors
"""
try:
path = Path(directory).resolve()
if not path.exists():
raise DirectoryNotFoundError(f"Directory does not exist: {directory}")
if not path.is_dir():
raise ProjectStateError(f"Path is not a directory: {directory}")
# Get all files/dirs in the directory, excluding contents of .git
allowed_items: Set[str] = {'.git', '.gitignore'}
try:
contents = set()
for item in path.iterdir():
# Only consider top-level items
if item.name != '.git':
contents.add(item.name)
except PermissionError as e:
raise DirectoryAccessError(f"Cannot access directory {directory}: {e}")
# Directory is new if empty or only contains .gitignore
return len(contents) == 0 or contents.issubset({'.gitignore'})
except Exception as e:
if isinstance(e, ProjectStateError):
raise
raise ProjectStateError(f"Error checking project state: {e}")

View File

@ -123,6 +123,19 @@ Work done so far:
{work_log} {work_log}
</work log> </work log>
Project Info:
{project_info}
Project State Handling:
For new/empty projects:
Skip exploratory steps and focus directly on the task
For existing projects:
Start with the provided file listing in Project Info
If file listing was truncated (over 2000 files):
Be aware there may be additional relevant files
Use tools like ripgrep_search and fuzzy_find_project_files to locate specific files
Be very thorough in your research and emit lots of snippets, key facts. If you take more than a few steps, be eager to emit research subtasks.{research_only_note} Be very thorough in your research and emit lots of snippets, key facts. If you take more than a few steps, be eager to emit research subtasks.{research_only_note}
Objective Objective
@ -190,10 +203,21 @@ If this is a trivial task that can be completed in one shot, do the change using
For one shot tasks, still take some time to consider whether compilation, testing, or additional validation should be done to check your work. For one shot tasks, still take some time to consider whether compilation, testing, or additional validation should be done to check your work.
If you implement the task yourself, do not request implementation. If you implement the task yourself, do not request implementation.
Thoroughness and Completeness Thoroughness and Completeness:
If this is determined to be a new/empty project (shown in Project Info), focus directly on the task.
If it is an existing project:
Start with the provided file listing in Project Info
If file listing was truncated (over 2000 files):
Be aware there may be additional relevant files
Use tools like ripgrep_search and fuzzy_find_project_files to locate specific files
If this is determined to be a new/empty project (no code or files), state that and stop. Then explore the project fully:
If it is an existing project, explore it fully: Start at the root directory, ls to see what's there.
For each directory found, navigate in and run ls again.
If this is a monorepo or multi-module project, thoroughly discover all directories and files related to the tasksometimes user requests will span multiple modules or parts of the monorepo.
When you find related files, search for files related to those that could be affected, and so on, until you're sure you've gone deep enough. Err on the side of going too deep.
Continue this process until you have discovered all directories and files at all levels.
Carefully report what you found, including all directories and files.
Start at the root directory, ls to see whats there. Start at the root directory, ls to see whats there.
For each directory found, navigate in and run ls again. For each directory found, navigate in and run ls again.
If this is a monorepo or multi-module project, thoroughly discover all directories and files related to the tasksometimes user requests will span multiple modules or parts of the monorepo. If this is a monorepo or multi-module project, thoroughly discover all directories and files related to the tasksometimes user requests will span multiple modules or parts of the monorepo.
@ -336,6 +360,21 @@ Work done so far:
{work_log} {work_log}
</work log> </work log>
Project Info:
{project_info}
Project State Handling:
For new/empty projects:
Skip exploratory steps and focus directly on the task
For existing projects:
Start with the provided file listing in Project Info
If file listing was truncated (over 2000 files):
Be aware there may be additional relevant files
Use tools like ripgrep_search and fuzzy_find_project_files to locate specific files
Then explore the project fully:
Be very thorough in your research and emit lots of snippets, key facts. If you take more than a few steps, be eager to emit research subtasks. Be very thorough in your research and emit lots of snippets, key facts. If you take more than a few steps, be eager to emit research subtasks.
Objective Objective
@ -396,10 +435,15 @@ No Planning or Problem-Solving
You must remain strictly within the bounds of describing what currently exists. You must remain strictly within the bounds of describing what currently exists.
Thoroughness and Completeness Thoroughness and Completeness:
If this is determined to be a new/empty project (shown in Project Info), focus directly on the task.
If it is an existing project:
Start with the provided file listing in Project Info
If file listing was truncated (over 2000 files):
Be aware there may be additional relevant files
Use tools like ripgrep_search and fuzzy_find_project_files to locate specific files
If this is determined to be a new/empty project (no code or files), state that and stop. Then explore the project fully:
If it is an existing project, explore it fully:
Start at the root directory, ls to see what's there. Start at the root directory, ls to see what's there.
For each directory found, navigate in and run ls again. For each directory found, navigate in and run ls again.
If this is a monorepo or multi-module project, thoroughly discover all directories and files related to the tasksometimes user requests will span multiple modules or parts of the monorepo. If this is a monorepo or multi-module project, thoroughly discover all directories and files related to the tasksometimes user requests will span multiple modules or parts of the monorepo.

View File

@ -197,7 +197,7 @@ def emit_key_snippets(snippets: List[SnippetInfo]) -> str:
This is for **existing**, or **just-written** files, not for things to be created in the future. This is for **existing**, or **just-written** files, not for things to be created in the future.
Args: Args:
snippets: List of snippet information dictionaries containing: snippets: REQUIRED List of snippet information dictionaries containing:
- filepath: Path to the source file - filepath: Path to the source file
- line_number: Line number where the snippet starts - line_number: Line number where the snippet starts
- snippet: The source code snippet text - snippet: The source code snippet text

123
tests/test_file_listing.py Normal file
View File

@ -0,0 +1,123 @@
"""Tests for file listing functionality."""
import os
import pytest
from pathlib import Path
import subprocess
from ra_aid.file_listing import (
get_file_listing,
is_git_repo,
DirectoryNotFoundError,
DirectoryAccessError,
GitCommandError,
FileListerError
)
@pytest.fixture
def empty_git_repo(tmp_path):
"""Create an empty git repository."""
subprocess.run(["git", "init"], cwd=tmp_path, capture_output=True)
return tmp_path
@pytest.fixture
def sample_git_repo(empty_git_repo):
"""Create a git repository with sample files."""
# Create some files
files = [
"README.md",
"src/main.py",
"src/utils.py",
"tests/test_main.py",
"docs/index.html"
]
for file_path in files:
full_path = empty_git_repo / file_path
full_path.parent.mkdir(parents=True, exist_ok=True)
full_path.write_text(f"Content of {file_path}")
# Add and commit files
subprocess.run(["git", "add", "."], cwd=empty_git_repo)
subprocess.run(
["git", "commit", "-m", "Initial commit"],
cwd=empty_git_repo,
env={"GIT_AUTHOR_NAME": "Test",
"GIT_AUTHOR_EMAIL": "test@example.com",
"GIT_COMMITTER_NAME": "Test",
"GIT_COMMITTER_EMAIL": "test@example.com"}
)
return empty_git_repo
def test_is_git_repo(sample_git_repo, tmp_path_factory):
"""Test git repository detection."""
# Create a new directory that is not a git repository
non_repo_dir = tmp_path_factory.mktemp("non_repo")
# Assert that sample_git_repo is identified as a git repository
assert is_git_repo(str(sample_git_repo)) is True
# Assert that non_repo_dir is not identified as a git repository
assert is_git_repo(str(non_repo_dir)) is False
def test_get_file_listing_no_limit(sample_git_repo):
"""Test getting complete file listing."""
files, total = get_file_listing(str(sample_git_repo))
assert len(files) == 5
assert total == 5
assert "README.md" in files
assert "src/main.py" in files
assert all(isinstance(f, str) for f in files)
def test_get_file_listing_with_limit(sample_git_repo):
"""Test file listing with limit."""
files, total = get_file_listing(str(sample_git_repo), limit=2)
assert len(files) == 2
assert total == 5 # Total should still be 5
def test_empty_git_repo(empty_git_repo):
"""Test handling of empty git repository."""
files, total = get_file_listing(str(empty_git_repo))
assert len(files) == 0
assert total == 0
def test_non_git_directory(tmp_path):
"""Test handling of non-git directory."""
files, total = get_file_listing(str(tmp_path))
assert len(files) == 0
assert total == 0
def test_nonexistent_directory():
"""Test handling of non-existent directory."""
with pytest.raises(DirectoryNotFoundError):
get_file_listing("/nonexistent/path/123456")
def test_file_as_directory(tmp_path):
"""Test handling of file path instead of directory."""
test_file = tmp_path / "test.txt"
test_file.write_text("test")
with pytest.raises(DirectoryNotFoundError):
get_file_listing(str(test_file))
@pytest.mark.skipif(os.name == "nt", reason="Permission tests unreliable on Windows")
def test_permission_error(tmp_path):
"""Test handling of permission errors."""
try:
# Make directory unreadable
os.chmod(tmp_path, 0o000)
with pytest.raises(DirectoryAccessError):
get_file_listing(str(tmp_path))
finally:
# Restore permissions to allow cleanup
os.chmod(tmp_path, 0o755)

108
tests/test_project_info.py Normal file
View File

@ -0,0 +1,108 @@
"""Tests for project info functionality."""
import os
import subprocess
import pytest
from pathlib import Path
from ra_aid.project_info import (
get_project_info,
ProjectInfo,
ProjectInfoError
)
from ra_aid.project_state import DirectoryNotFoundError, DirectoryAccessError
from ra_aid.file_listing import GitCommandError
@pytest.fixture
def empty_git_repo(tmp_path):
"""Create an empty git repository."""
import subprocess
subprocess.run(["git", "init"], cwd=tmp_path, capture_output=True)
return tmp_path
@pytest.fixture
def sample_git_repo(empty_git_repo):
"""Create a git repository with sample files."""
# Create some files
files = [
"README.md",
"src/main.py",
"src/utils.py",
"tests/test_main.py",
"docs/index.html"
]
for file_path in files:
full_path = empty_git_repo / file_path
full_path.parent.mkdir(parents=True, exist_ok=True)
full_path.write_text(f"Content of {file_path}")
# Add and commit files
subprocess.run(["git", "add", "."], cwd=empty_git_repo)
subprocess.run(
["git", "commit", "-m", "Initial commit"],
cwd=empty_git_repo,
env={"GIT_AUTHOR_NAME": "Test",
"GIT_AUTHOR_EMAIL": "test@example.com",
"GIT_COMMITTER_NAME": "Test",
"GIT_COMMITTER_EMAIL": "test@example.com"}
)
return empty_git_repo
def test_empty_git_repo(empty_git_repo):
"""Test project info for empty git repository."""
info = get_project_info(str(empty_git_repo))
assert isinstance(info, ProjectInfo)
assert info.is_new is True
assert len(info.files) == 0
assert info.total_files == 0
def test_sample_git_repo(sample_git_repo):
"""Test project info for repository with files."""
info = get_project_info(str(sample_git_repo))
assert isinstance(info, ProjectInfo)
assert info.is_new is False
assert len(info.files) == 5
assert info.total_files == 5
assert "README.md" in info.files
def test_file_limit(sample_git_repo):
"""Test file listing with limit."""
info = get_project_info(str(sample_git_repo), file_limit=2)
assert len(info.files) == 2
assert info.total_files == 5 # Total should still be 5
def test_nonexistent_directory():
"""Test handling of non-existent directory."""
with pytest.raises(DirectoryNotFoundError):
get_project_info("/nonexistent/path/123456")
def test_file_as_directory(tmp_path):
"""Test handling of file path instead of directory."""
test_file = tmp_path / "test.txt"
test_file.write_text("test")
with pytest.raises(DirectoryNotFoundError):
get_project_info(str(test_file))
@pytest.mark.skipif(os.name == "nt", reason="Permission tests unreliable on Windows")
def test_permission_error(tmp_path):
"""Test handling of permission errors."""
try:
# Make directory unreadable
os.chmod(tmp_path, 0o000)
with pytest.raises(DirectoryAccessError):
get_project_info(str(tmp_path))
finally:
# Restore permissions to allow cleanup
os.chmod(tmp_path, 0o755)

100
tests/test_project_state.py Normal file
View File

@ -0,0 +1,100 @@
"""Tests for project state detection functionality."""
import os
import pytest
from pathlib import Path
from ra_aid.project_state import (
is_new_project,
DirectoryNotFoundError,
DirectoryAccessError,
ProjectStateError
)
@pytest.fixture
def empty_dir(tmp_path):
"""Create an empty temporary directory."""
return tmp_path
@pytest.fixture
def git_only_dir(tmp_path):
"""Create a directory with only git files."""
git_dir = tmp_path / ".git"
git_dir.mkdir()
gitignore = tmp_path / ".gitignore"
gitignore.write_text("*.pyc\n")
return tmp_path
@pytest.fixture
def project_dir(tmp_path):
"""Create a directory with some project files."""
(tmp_path / "src").mkdir()
(tmp_path / "README.md").write_text("# Test Project")
return tmp_path
def test_empty_directory(empty_dir):
"""Test that an empty directory is considered a new project."""
assert is_new_project(str(empty_dir)) is True
def test_git_only_directory(git_only_dir):
"""Test that a directory with only git files is considered a new project."""
assert is_new_project(str(git_only_dir)) is True
@pytest.fixture
def git_dir_with_contents(tmp_path):
"""Create a directory with .git containing files and .gitignore."""
git_dir = tmp_path / ".git"
git_dir.mkdir()
# Add some files inside .git
(git_dir / "HEAD").write_text("ref: refs/heads/main")
(git_dir / "config").write_text("[core]\n\trepositoryformatversion = 0")
(git_dir / "refs").mkdir()
# Add .gitignore
gitignore = tmp_path / ".gitignore"
gitignore.write_text("*.pyc\n")
return tmp_path
def test_git_directory_with_contents(git_dir_with_contents):
"""Test that a directory is considered new even with files inside .git."""
assert is_new_project(str(git_dir_with_contents)) is True
def test_existing_project_directory(project_dir):
"""Test that a directory with project files is not considered new."""
assert is_new_project(str(project_dir)) is False
def test_nonexistent_directory():
"""Test that a non-existent directory raises appropriate error."""
with pytest.raises(DirectoryNotFoundError):
is_new_project("/nonexistent/path/123456")
def test_file_as_directory(tmp_path):
"""Test that passing a file instead of directory raises error."""
test_file = tmp_path / "test.txt"
test_file.write_text("test")
with pytest.raises(ProjectStateError):
is_new_project(str(test_file))
@pytest.mark.skipif(os.name == "nt", reason="Permission tests unreliable on Windows")
def test_permission_error(tmp_path):
"""Test handling of permission errors."""
try:
# Make directory unreadable
os.chmod(tmp_path, 0o000)
with pytest.raises(DirectoryAccessError):
is_new_project(str(tmp_path))
finally:
# Restore permissions to allow cleanup
os.chmod(tmp_path, 0o755)