key snippet gc

This commit is contained in:
AI Christianson 2025-03-02 19:41:48 -05:00
parent 772ce3e049
commit 332cbec826
5 changed files with 165 additions and 36 deletions

View File

@ -610,9 +610,6 @@ def main():
memory=planning_memory, memory=planning_memory,
config=config, config=config,
) )
# Run cleanup tasks before exiting database context
run_cleanup()
except (KeyboardInterrupt, AgentInterrupt): except (KeyboardInterrupt, AgentInterrupt):
print() print()
@ -620,18 +617,5 @@ def main():
print() print()
sys.exit(0) sys.exit(0)
def run_cleanup():
"""Run cleanup tasks after main execution."""
try:
# Import the key facts cleaner agent
from ra_aid.agents.key_facts_gc_agent import run_key_facts_gc_agent
# Run the key facts garbage collection agent regardless of the number of facts
run_key_facts_gc_agent()
except Exception as e:
logger.error(f"Failed to run cleanup tasks: {str(e)}")
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -1,26 +1,18 @@
""" """
Key facts gc agent package. Agent package for various specialized agents.
This package contains the agent responsible for cleaning up key facts This package contains agents responsible for specific tasks such as
in the database when they exceed a certain threshold. cleaning up key facts and key snippets in the database when they
exceed certain thresholds.
Includes agents for:
- Key facts garbage collection
- Key snippets garbage collection
""" """
from typing import Optional from typing import Optional
def run_key_facts_gc_agent(max_facts: int = 20) -> None: from ra_aid.agents.key_facts_gc_agent import run_key_facts_gc_agent
""" from ra_aid.agents.key_snippets_gc_agent import run_key_snippets_gc_agent
Run the key facts gc agent to reduce key facts to the specified maximum.
__all__ = ["run_key_facts_gc_agent", "run_key_snippets_gc_agent"]
This agent evaluates the importance of key facts and removes the least important ones
when the total count exceeds the maximum threshold.
Args:
max_facts: Maximum number of key facts to keep (defaults to 20)
"""
# This is a placeholder function that will be implemented later
# The actual implementation will:
# 1. Fetch all key facts from the database
# 2. Evaluate their importance based on certain criteria
# 3. Sort them by importance
# 4. Delete the least important ones until only max_facts remain
pass

View File

@ -0,0 +1,85 @@
"""
Key snippets gc agent implementation.
This agent is responsible for maintaining the code snippet knowledge base by pruning less important
snippets when the total number exceeds a specified threshold. The agent evaluates all
key snippets and deletes the least valuable ones to keep the database clean and relevant.
"""
from typing import List
from langchain_core.tools import tool
from rich.console import Console
from rich.markdown import Markdown
from rich.panel import Panel
from ra_aid.agent_utils import create_agent, run_agent_with_retry
from ra_aid.database.repositories.key_snippet_repository import KeySnippetRepository
from ra_aid.llm import initialize_llm
from ra_aid.prompts.key_snippets_gc_prompts import KEY_SNIPPETS_GC_PROMPT
from ra_aid.tools.memory import delete_key_snippets, log_work_event, _global_memory
console = Console()
key_snippet_repository = KeySnippetRepository()
def run_key_snippets_gc_agent() -> None:
"""Run the key snippets gc agent to maintain a reasonable number of key snippets.
The agent analyzes all key snippets and determines which are the least valuable,
deleting them to maintain a manageable collection size of high-value snippets.
"""
# Get the count of key snippets
snippets = key_snippet_repository.get_all()
snippet_count = len(snippets)
# Display status panel with snippet count included
console.print(Panel(f"Gathering my thoughts...\nCurrent number of key snippets: {snippet_count}", title="🗑 Garbage Collection"))
# Only run the agent if we actually have snippets to clean
if snippet_count > 0:
# Get all snippets as a formatted string for the prompt
snippets_dict = key_snippet_repository.get_snippets_dict()
formatted_snippets = "\n".join([
f"Snippet #{k}: filepath={v['filepath']}, line_number={v['line_number']}, description={v['description']}\n```python\n{v['snippet']}\n```"
for k, v in snippets_dict.items()
])
# Retrieve configuration
llm_config = _global_memory.get("config", {})
# Initialize the LLM model
model = initialize_llm(
llm_config.get("provider", "anthropic"),
llm_config.get("model", "claude-3-7-sonnet-20250219"),
temperature=llm_config.get("temperature")
)
# Create the agent with the delete_key_snippets tool
agent = create_agent(model, [delete_key_snippets])
# Format the prompt with the current snippets
prompt = KEY_SNIPPETS_GC_PROMPT.format(key_snippets=formatted_snippets)
# Set up the agent configuration
agent_config = {
"recursion_limit": 50 # Set a reasonable recursion limit
}
# Run the agent
run_agent_with_retry(agent, prompt, agent_config)
# Get updated count
updated_snippets = key_snippet_repository.get_all()
updated_count = len(updated_snippets)
# Show info panel with updated count
console.print(
Panel(
f"Cleaned key snippets: {snippet_count}{updated_count}",
title="🗑 GC Complete"
)
)
else:
console.print(Panel("No key snippets to clean.", title="🗑 GC Info"))

View File

@ -0,0 +1,57 @@
"""
Key snippets gc-specific prompts for the AI agent system.
This module contains the prompt for the key snippets gc agent that is
responsible for evaluating and trimming down the stored key snippets to keep
only the most valuable ones, ensuring that the collection remains manageable.
"""
KEY_SNIPPETS_GC_PROMPT = """
You are a Key Snippets Cleaner agent responsible for maintaining the code snippet knowledge base by pruning less important snippets.
<key snippets>
{key_snippets}
</key snippets>
Task:
Your task is to analyze all the key snippets in the system and determine which ones should be kept and which ones should be removed.
Guidelines for evaluation:
1. Review all key snippets and their IDs
2. Identify which snippets are lowest value/most ephemeral based on:
- Relevance to the overall project
- Specificity and actionability of the code
- Long-term value vs. temporary relevance
- Uniqueness of the information (avoid redundancy)
- How fundamental the snippet is to understanding the codebase
3. Trim down the collection to keep no more than 10 highest value, longest-lasting snippets
4. For each snippet you decide to delete, provide a brief explanation of your reasoning
Retention priority (from highest to lowest):
- Core architectural code that demonstrates project structure
- Critical implementation details that affect multiple parts of the system
- Important design patterns and conventions
- API endpoints and interfaces
- Configuration requirements
- Complex algorithms
- Error handling patterns
- Testing approaches
- Simple helper functions or boilerplate code that is easily rediscovered
- If there are contradictory snippets, that probably means that the older snippet is outdated and should be deleted.
For snippets of similar importance, prefer to keep more recent snippets if they supersede older information.
Output:
1. List the IDs of snippets to be deleted using the delete_key_snippets tool with the IDs provided as a list [ids...], NOT as a comma-separated string
2. Provide a brief explanation for each deletion decision
3. Explain your overall approach to selecting which snippets to keep
IMPORTANT:
- Use the delete_key_snippets tool with multiple IDs at once in a single call, rather than making multiple individual deletion calls
- The delete_key_snippets tool accepts a list of IDs in the format [id1, id2, id3, ...], not as a comma-separated string
- Batch deletion is much more efficient than calling the deletion function multiple times
- Collect all IDs to delete first, then make a single call to delete_key_snippets with the complete list
Remember: Your goal is to maintain a concise, high-value code snippet collection that preserves essential project understanding while removing ephemeral or less critical snippets.
"""

View File

@ -242,6 +242,17 @@ def emit_key_snippet(snippet_info: SnippetInfo) -> str:
) )
log_work_event(f"Stored code snippet #{snippet_id}.") log_work_event(f"Stored code snippet #{snippet_id}.")
# Check if we need to clean up snippets (more than 20)
all_snippets = key_snippet_repository.get_all()
if len(all_snippets) > 20:
# Trigger the key snippets cleaner agent
try:
from ra_aid.agents.key_snippets_gc_agent import run_key_snippets_gc_agent
run_key_snippets_gc_agent()
except Exception as e:
logger.error(f"Failed to run key snippets cleaner: {str(e)}")
return f"Snippet #{snippet_id} stored." return f"Snippet #{snippet_id} stored."