RA.Aid/ra_aid/agents/research_notes_gc_agent.py

392 lines
17 KiB
Python

"""
Research notes gc agent implementation.
This agent is responsible for maintaining the collection of research notes by pruning less
important notes when the total number exceeds a specified threshold. The agent evaluates all
research notes and deletes the least valuable ones to keep the database clean and relevant.
"""
import logging
from typing import List
from langchain_core.tools import tool
from rich.console import Console
from rich.markdown import Markdown
from rich.panel import Panel
logger = logging.getLogger(__name__)
from ra_aid.agent_context import mark_should_exit
from ra_aid.agent_utils import create_agent, run_agent_with_retry
from ra_aid.database.repositories.research_note_repository import get_research_note_repository
from ra_aid.database.repositories.human_input_repository import get_human_input_repository
from ra_aid.database.repositories.config_repository import get_config_repository
from ra_aid.database.repositories.trajectory_repository import get_trajectory_repository
from ra_aid.llm import initialize_llm
from ra_aid.model_formatters.research_notes_formatter import format_research_note
from ra_aid.tools.memory import log_work_event
console = Console()
@tool
def delete_research_notes(note_ids: List[int]) -> str:
"""Delete multiple research notes by their IDs.
Args:
note_ids: List of IDs of the research notes to delete
Returns:
str: Success or failure message
"""
deleted_notes = []
not_found_notes = []
failed_notes = []
protected_notes = []
# Try to get the current human input to protect its notes
current_human_input_id = None
try:
current_human_input_id = get_human_input_repository().get_most_recent_id()
except Exception as e:
console.print(f"Warning: Could not retrieve current human input: {str(e)}")
for note_id in note_ids:
try:
# Get the note first to display information
note = get_research_note_repository().get(note_id)
if note:
# Check if this note is associated with the current human input
if current_human_input_id is not None and note.human_input_id == current_human_input_id:
protected_notes.append((note_id, note.content))
continue
# Delete the note if it's not protected
was_deleted = get_research_note_repository().delete(note_id)
if was_deleted:
deleted_notes.append((note_id, note.content))
log_work_event(f"Deleted research note {note_id}.")
else:
failed_notes.append(note_id)
else:
not_found_notes.append(note_id)
except RuntimeError as e:
logger.error(f"Failed to access research note repository: {str(e)}")
failed_notes.append(note_id)
except Exception as e:
# For any other exceptions, log and continue
logger.error(f"Error processing research note {note_id}: {str(e)}")
failed_notes.append(note_id)
# Prepare result message
result_parts = []
if deleted_notes:
deleted_msg = "Successfully deleted research notes:\n" + "\n".join([f"- #{note_id}: {content[:100]}..." if len(content) > 100 else f"- #{note_id}: {content}" for note_id, content in deleted_notes])
result_parts.append(deleted_msg)
# Record GC operation in trajectory
try:
trajectory_repo = get_trajectory_repository()
human_input_id = get_human_input_repository().get_most_recent_id()
trajectory_repo.create(
step_data={
"deleted_notes": deleted_notes,
"display_title": "Research Notes Deleted",
},
record_type="gc_operation",
human_input_id=human_input_id,
tool_name="research_notes_gc_agent"
)
except Exception:
pass # Continue if trajectory recording fails
console.print(
Panel(Markdown(deleted_msg), title="Research Notes Deleted", border_style="green")
)
if protected_notes:
protected_msg = "Protected research notes (associated with current request):\n" + "\n".join([f"- #{note_id}: {content[:100]}..." if len(content) > 100 else f"- #{note_id}: {content}" for note_id, content in protected_notes])
result_parts.append(protected_msg)
# Record GC operation in trajectory
try:
trajectory_repo = get_trajectory_repository()
human_input_id = get_human_input_repository().get_most_recent_id()
trajectory_repo.create(
step_data={
"protected_notes": protected_notes,
"display_title": "Research Notes Protected",
},
record_type="gc_operation",
human_input_id=human_input_id,
tool_name="research_notes_gc_agent"
)
except Exception:
pass # Continue if trajectory recording fails
console.print(
Panel(Markdown(protected_msg), title="Research Notes Protected", border_style="blue")
)
if not_found_notes:
not_found_msg = f"Research notes not found: {', '.join([f'#{note_id}' for note_id in not_found_notes])}"
result_parts.append(not_found_msg)
if failed_notes:
failed_msg = f"Failed to delete research notes: {', '.join([f'#{note_id}' for note_id in failed_notes])}"
result_parts.append(failed_msg)
# Mark the agent to exit after performing the cleanup operation
mark_should_exit()
return "\n".join(result_parts)
def run_research_notes_gc_agent(threshold: int = 30) -> None:
"""Run the research notes gc agent to maintain a reasonable number of research notes.
The agent analyzes all research notes and determines which are the least valuable,
deleting them to maintain a manageable collection size of high-value notes.
Notes associated with the current human input are excluded from deletion.
Args:
threshold: Maximum number of research notes to keep before triggering cleanup
"""
# Get the count of research notes
try:
notes = get_research_note_repository().get_all()
note_count = len(notes)
except RuntimeError as e:
logger.error(f"Failed to access research note repository: {str(e)}")
# Record GC error in trajectory
try:
trajectory_repo = get_trajectory_repository()
human_input_id = get_human_input_repository().get_most_recent_id()
trajectory_repo.create(
step_data={
"error": str(e),
"display_title": "GC Error",
},
record_type="gc_operation",
human_input_id=human_input_id,
tool_name="research_notes_gc_agent",
is_error=True,
error_message=str(e),
error_type="Repository Error"
)
except Exception:
pass # Continue if trajectory recording fails
console.print(Panel(f"Error: {str(e)}", title="🗑 GC Error", border_style="red"))
return # Exit the function if we can't access the repository
# Display status panel with note count included
try:
trajectory_repo = get_trajectory_repository()
human_input_id = get_human_input_repository().get_most_recent_id()
trajectory_repo.create(
step_data={
"note_count": note_count,
"display_title": "Garbage Collection",
},
record_type="gc_operation",
human_input_id=human_input_id,
tool_name="research_notes_gc_agent"
)
except Exception:
pass # Continue if trajectory recording fails
console.print(Panel(f"Gathering my thoughts...\nCurrent number of research notes: {note_count}", title="🗑 Garbage Collection"))
# Only run the agent if we actually have notes to clean and we're over the threshold
if note_count > threshold:
# Try to get the current human input ID to exclude its notes
current_human_input_id = None
try:
current_human_input_id = get_human_input_repository().get_most_recent_id()
except Exception as e:
console.print(f"Warning: Could not retrieve current human input: {str(e)}")
# Get all notes that are not associated with the current human input
eligible_notes = []
protected_notes = []
for note in notes:
if current_human_input_id is not None and note.human_input_id == current_human_input_id:
protected_notes.append(note)
else:
eligible_notes.append(note)
# Only process if we have notes that can be deleted
if eligible_notes:
# Format notes as a dictionary for the prompt
notes_dict = {note.id: note.content for note in eligible_notes}
formatted_notes = "\n".join([f"Note #{k}: {v}" for k, v in notes_dict.items()])
# Retrieve configuration
llm_config = get_config_repository().get_all()
# Initialize the LLM model
model = initialize_llm(
llm_config.get("provider", "anthropic"),
llm_config.get("model", "claude-3-7-sonnet-20250219"),
temperature=llm_config.get("temperature")
)
# Create the agent with the delete_research_notes tool
agent = create_agent(model, [delete_research_notes])
# Build the prompt for the research notes gc agent
prompt = f"""
You are a Research Notes Cleaner agent responsible for maintaining the research notes collection by pruning less important notes.
<research notes>
{formatted_notes}
</research notes>
Task:
Your task is to analyze all the research notes in the system and determine which ones should be kept and which ones should be removed.
Guidelines for evaluation:
1. Review all research notes and their IDs
2. Identify which notes are lowest value/most ephemeral based on:
- Relevance to the overall project
- Specificity and actionability of the information
- Long-term value vs. temporary relevance
- Uniqueness of the information (avoid redundancy)
- How fundamental the note is to understanding the context
3. Trim down the collection to keep no more than {threshold} highest value, longest-lasting notes
4. For each note you decide to delete, provide a brief explanation of your reasoning
Retention priority (from highest to lowest):
- Core research findings directly relevant to the project requirements
- Important technical details that affect implementation decisions
- API documentation and usage examples
- Configuration information and best practices
- Alternative approaches considered with pros and cons
- General background information
- Information that is easily found elsewhere or outdated
For notes of similar importance, prefer to keep more recent notes if they supersede older information.
Output:
1. List the IDs of notes to be deleted using the delete_research_notes tool with the IDs provided as a list [ids...], NOT as a comma-separated string
2. Provide a brief explanation for each deletion decision
3. Explain your overall approach to selecting which notes to keep
IMPORTANT:
- Use the delete_research_notes tool with multiple IDs at once in a single call, rather than making multiple individual deletion calls
- The delete_research_notes tool accepts a list of IDs in the format [id1, id2, id3, ...], not as a comma-separated string
- Batch deletion is much more efficient than calling the deletion function multiple times
- Collect all IDs to delete first, then make a single call to delete_research_notes with the complete list
Remember: Your goal is to maintain a concise, high-value collection of research notes that preserves essential information while removing ephemeral or less critical details.
"""
# Set up the agent configuration
agent_config = {
"recursion_limit": 50 # Set a reasonable recursion limit
}
# Run the agent
run_agent_with_retry(agent, prompt, agent_config)
# Get updated count
try:
updated_notes = get_research_note_repository().get_all()
updated_count = len(updated_notes)
except RuntimeError as e:
logger.error(f"Failed to access research note repository for update count: {str(e)}")
updated_count = "unknown"
# Show info panel with updated count and protected notes count
protected_count = len(protected_notes)
if protected_count > 0:
# Record GC completion in trajectory
try:
trajectory_repo = get_trajectory_repository()
human_input_id = get_human_input_repository().get_most_recent_id()
trajectory_repo.create(
step_data={
"original_count": note_count,
"updated_count": updated_count,
"protected_count": protected_count,
"display_title": "GC Complete",
},
record_type="gc_operation",
human_input_id=human_input_id,
tool_name="research_notes_gc_agent"
)
except Exception:
pass # Continue if trajectory recording fails
console.print(
Panel(
f"Cleaned research notes: {note_count}{updated_count}\nProtected notes (associated with current request): {protected_count}",
title="🗑 GC Complete"
)
)
else:
# Record GC completion in trajectory
try:
trajectory_repo = get_trajectory_repository()
human_input_id = get_human_input_repository().get_most_recent_id()
trajectory_repo.create(
step_data={
"original_count": note_count,
"updated_count": updated_count,
"protected_count": 0,
"display_title": "GC Complete",
},
record_type="gc_operation",
human_input_id=human_input_id,
tool_name="research_notes_gc_agent"
)
except Exception:
pass # Continue if trajectory recording fails
console.print(
Panel(
f"Cleaned research notes: {note_count}{updated_count}",
title="🗑 GC Complete"
)
)
else:
# Record GC info in trajectory
try:
trajectory_repo = get_trajectory_repository()
human_input_id = get_human_input_repository().get_most_recent_id()
trajectory_repo.create(
step_data={
"protected_count": len(protected_notes),
"message": "All research notes are protected",
"display_title": "GC Info",
},
record_type="gc_operation",
human_input_id=human_input_id,
tool_name="research_notes_gc_agent"
)
except Exception:
pass # Continue if trajectory recording fails
console.print(Panel(f"All {len(protected_notes)} research notes are associated with the current request and protected from deletion.", title="🗑 GC Info"))
else:
# Record GC info in trajectory
try:
trajectory_repo = get_trajectory_repository()
human_input_id = get_human_input_repository().get_most_recent_id()
trajectory_repo.create(
step_data={
"note_count": note_count,
"threshold": threshold,
"message": "Below threshold - no cleanup needed",
"display_title": "GC Info",
},
record_type="gc_operation",
human_input_id=human_input_id,
tool_name="research_notes_gc_agent"
)
except Exception:
pass # Continue if trajectory recording fails
console.print(Panel(f"Research notes count ({note_count}) is below threshold ({threshold}). No cleanup needed.", title="🗑 GC Info"))