research notes repo

This commit is contained in:
AI Christianson 2025-03-03 22:31:51 -05:00
parent 6562b6c332
commit 035544c77a
17 changed files with 1191 additions and 32 deletions

View File

@ -49,6 +49,9 @@ from ra_aid.database.repositories.key_snippet_repository import (
from ra_aid.database.repositories.human_input_repository import (
HumanInputRepositoryManager, get_human_input_repository
)
from ra_aid.database.repositories.research_note_repository import (
ResearchNoteRepositoryManager, get_research_note_repository
)
from ra_aid.model_formatters import format_key_facts_dict
from ra_aid.model_formatters.key_snippets_formatter import format_key_snippets_dict
from ra_aid.console.output import cpm
@ -401,11 +404,13 @@ def main():
# Initialize repositories with database connection
with KeyFactRepositoryManager(db) as key_fact_repo, \
KeySnippetRepositoryManager(db) as key_snippet_repo, \
HumanInputRepositoryManager(db) as human_input_repo:
HumanInputRepositoryManager(db) as human_input_repo, \
ResearchNoteRepositoryManager(db) as research_note_repo:
# This initializes all repositories and makes them available via their respective get methods
logger.debug("Initialized KeyFactRepository")
logger.debug("Initialized KeySnippetRepository")
logger.debug("Initialized HumanInputRepository")
logger.debug("Initialized ResearchNoteRepository")
# Check dependencies before proceeding
check_dependencies()

View File

@ -87,8 +87,10 @@ from ra_aid.tools.handle_user_defined_test_cmd_execution import execute_test_com
from ra_aid.database.repositories.key_fact_repository import get_key_fact_repository
from ra_aid.database.repositories.key_snippet_repository import get_key_snippet_repository
from ra_aid.database.repositories.human_input_repository import get_human_input_repository
from ra_aid.database.repositories.research_note_repository import get_research_note_repository
from ra_aid.model_formatters import format_key_facts_dict
from ra_aid.model_formatters.key_snippets_formatter import format_key_snippets_dict
from ra_aid.model_formatters.research_notes_formatter import format_research_notes_dict
from ra_aid.tools.memory import (
_global_memory,
get_memory_value,
@ -672,6 +674,15 @@ def run_planning_agent(
logger.error(f"Failed to access key snippet repository: {str(e)}")
key_snippets = ""
# Get formatted research notes using repository
try:
repository = get_research_note_repository()
notes_dict = repository.get_notes_dict()
formatted_research_notes = format_research_notes_dict(notes_dict)
except RuntimeError as e:
logger.error(f"Failed to access research note repository: {str(e)}")
formatted_research_notes = ""
planning_prompt = PLANNING_PROMPT.format(
current_date=current_date,
working_directory=working_directory,
@ -680,7 +691,7 @@ def run_planning_agent(
web_research_section=web_research_section,
base_task=base_task,
project_info=formatted_project_info,
research_notes=get_memory_value("research_notes"),
research_notes=formatted_research_notes,
related_files="\n".join(get_related_files()),
key_facts=key_facts,
key_snippets=key_snippets,
@ -783,6 +794,15 @@ def run_task_implementation_agent(
logger.error(f"Failed to access key fact repository: {str(e)}")
key_facts = ""
# Get formatted research notes using repository
try:
repository = get_research_note_repository()
notes_dict = repository.get_notes_dict()
formatted_research_notes = format_research_notes_dict(notes_dict)
except RuntimeError as e:
logger.error(f"Failed to access research note repository: {str(e)}")
formatted_research_notes = ""
prompt = IMPLEMENTATION_PROMPT.format(
current_date=current_date,
working_directory=working_directory,
@ -793,7 +813,7 @@ def run_task_implementation_agent(
related_files=related_files,
key_facts=key_facts,
key_snippets=format_key_snippets_dict(get_key_snippet_repository().get_snippets_dict()),
research_notes=get_memory_value("research_notes"),
research_notes=formatted_research_notes,
work_log=get_memory_value("work_log"),
expert_section=EXPERT_PROMPT_SECTION_IMPLEMENTATION if expert_enabled else "",
human_section=(

View File

@ -0,0 +1,252 @@
"""
Research notes gc agent implementation.
This agent is responsible for maintaining the collection of research notes by pruning less
important notes when the total number exceeds a specified threshold. The agent evaluates all
research notes and deletes the least valuable ones to keep the database clean and relevant.
"""
import logging
from typing import List
from langchain_core.tools import tool
from rich.console import Console
from rich.markdown import Markdown
from rich.panel import Panel
logger = logging.getLogger(__name__)
from ra_aid.agent_utils import create_agent, run_agent_with_retry
from ra_aid.database.repositories.research_note_repository import get_research_note_repository
from ra_aid.database.repositories.human_input_repository import get_human_input_repository
from ra_aid.llm import initialize_llm
from ra_aid.model_formatters.research_notes_formatter import format_research_note
from ra_aid.tools.memory import log_work_event, _global_memory
console = Console()
@tool
def delete_research_notes(note_ids: List[int]) -> str:
"""Delete multiple research notes by their IDs.
Args:
note_ids: List of IDs of the research notes to delete
Returns:
str: Success or failure message
"""
deleted_notes = []
not_found_notes = []
failed_notes = []
protected_notes = []
# Try to get the current human input to protect its notes
current_human_input_id = None
try:
recent_inputs = get_human_input_repository().get_recent(1)
if recent_inputs and len(recent_inputs) > 0:
current_human_input_id = recent_inputs[0].id
except Exception as e:
console.print(f"Warning: Could not retrieve current human input: {str(e)}")
for note_id in note_ids:
try:
# Get the note first to display information
note = get_research_note_repository().get(note_id)
if note:
# Check if this note is associated with the current human input
if current_human_input_id is not None and note.human_input_id == current_human_input_id:
protected_notes.append((note_id, note.content))
continue
# Delete the note if it's not protected
was_deleted = get_research_note_repository().delete(note_id)
if was_deleted:
deleted_notes.append((note_id, note.content))
log_work_event(f"Deleted research note {note_id}.")
else:
failed_notes.append(note_id)
else:
not_found_notes.append(note_id)
except RuntimeError as e:
logger.error(f"Failed to access research note repository: {str(e)}")
failed_notes.append(note_id)
except Exception as e:
# For any other exceptions, log and continue
logger.error(f"Error processing research note {note_id}: {str(e)}")
failed_notes.append(note_id)
# Prepare result message
result_parts = []
if deleted_notes:
deleted_msg = "Successfully deleted research notes:\n" + "\n".join([f"- #{note_id}: {content[:100]}..." if len(content) > 100 else f"- #{note_id}: {content}" for note_id, content in deleted_notes])
result_parts.append(deleted_msg)
console.print(
Panel(Markdown(deleted_msg), title="Research Notes Deleted", border_style="green")
)
if protected_notes:
protected_msg = "Protected research notes (associated with current request):\n" + "\n".join([f"- #{note_id}: {content[:100]}..." if len(content) > 100 else f"- #{note_id}: {content}" for note_id, content in protected_notes])
result_parts.append(protected_msg)
console.print(
Panel(Markdown(protected_msg), title="Research Notes Protected", border_style="blue")
)
if not_found_notes:
not_found_msg = f"Research notes not found: {', '.join([f'#{note_id}' for note_id in not_found_notes])}"
result_parts.append(not_found_msg)
if failed_notes:
failed_msg = f"Failed to delete research notes: {', '.join([f'#{note_id}' for note_id in failed_notes])}"
result_parts.append(failed_msg)
return "\n".join(result_parts)
def run_research_notes_gc_agent(threshold: int = 30) -> None:
"""Run the research notes gc agent to maintain a reasonable number of research notes.
The agent analyzes all research notes and determines which are the least valuable,
deleting them to maintain a manageable collection size of high-value notes.
Notes associated with the current human input are excluded from deletion.
Args:
threshold: Maximum number of research notes to keep before triggering cleanup
"""
# Get the count of research notes
try:
notes = get_research_note_repository().get_all()
note_count = len(notes)
except RuntimeError as e:
logger.error(f"Failed to access research note repository: {str(e)}")
console.print(Panel(f"Error: {str(e)}", title="🗑 GC Error", border_style="red"))
return # Exit the function if we can't access the repository
# Display status panel with note count included
console.print(Panel(f"Gathering my thoughts...\nCurrent number of research notes: {note_count}", title="🗑 Garbage Collection"))
# Only run the agent if we actually have notes to clean and we're over the threshold
if note_count > threshold:
# Try to get the current human input ID to exclude its notes
current_human_input_id = None
try:
recent_inputs = get_human_input_repository().get_recent(1)
if recent_inputs and len(recent_inputs) > 0:
current_human_input_id = recent_inputs[0].id
except Exception as e:
console.print(f"Warning: Could not retrieve current human input: {str(e)}")
# Get all notes that are not associated with the current human input
eligible_notes = []
protected_notes = []
for note in notes:
if current_human_input_id is not None and note.human_input_id == current_human_input_id:
protected_notes.append(note)
else:
eligible_notes.append(note)
# Only process if we have notes that can be deleted
if eligible_notes:
# Format notes as a dictionary for the prompt
notes_dict = {note.id: note.content for note in eligible_notes}
formatted_notes = "\n".join([f"Note #{k}: {v}" for k, v in notes_dict.items()])
# Retrieve configuration
llm_config = _global_memory.get("config", {})
# Initialize the LLM model
model = initialize_llm(
llm_config.get("provider", "anthropic"),
llm_config.get("model", "claude-3-7-sonnet-20250219"),
temperature=llm_config.get("temperature")
)
# Create the agent with the delete_research_notes tool
agent = create_agent(model, [delete_research_notes])
# Build the prompt for the research notes gc agent
prompt = f"""
You are a Research Notes Cleaner agent responsible for maintaining the research notes collection by pruning less important notes.
<research notes>
{formatted_notes}
</research notes>
Task:
Your task is to analyze all the research notes in the system and determine which ones should be kept and which ones should be removed.
Guidelines for evaluation:
1. Review all research notes and their IDs
2. Identify which notes are lowest value/most ephemeral based on:
- Relevance to the overall project
- Specificity and actionability of the information
- Long-term value vs. temporary relevance
- Uniqueness of the information (avoid redundancy)
- How fundamental the note is to understanding the context
3. Trim down the collection to keep no more than {threshold} highest value, longest-lasting notes
4. For each note you decide to delete, provide a brief explanation of your reasoning
Retention priority (from highest to lowest):
- Core research findings directly relevant to the project requirements
- Important technical details that affect implementation decisions
- API documentation and usage examples
- Configuration information and best practices
- Alternative approaches considered with pros and cons
- General background information
- Information that is easily found elsewhere or outdated
For notes of similar importance, prefer to keep more recent notes if they supersede older information.
Output:
1. List the IDs of notes to be deleted using the delete_research_notes tool with the IDs provided as a list [ids...], NOT as a comma-separated string
2. Provide a brief explanation for each deletion decision
3. Explain your overall approach to selecting which notes to keep
IMPORTANT:
- Use the delete_research_notes tool with multiple IDs at once in a single call, rather than making multiple individual deletion calls
- The delete_research_notes tool accepts a list of IDs in the format [id1, id2, id3, ...], not as a comma-separated string
- Batch deletion is much more efficient than calling the deletion function multiple times
- Collect all IDs to delete first, then make a single call to delete_research_notes with the complete list
Remember: Your goal is to maintain a concise, high-value collection of research notes that preserves essential information while removing ephemeral or less critical details.
"""
# Set up the agent configuration
agent_config = {
"recursion_limit": 50 # Set a reasonable recursion limit
}
# Run the agent
run_agent_with_retry(agent, prompt, agent_config)
# Get updated count
try:
updated_notes = get_research_note_repository().get_all()
updated_count = len(updated_notes)
except RuntimeError as e:
logger.error(f"Failed to access research note repository for update count: {str(e)}")
updated_count = "unknown"
# Show info panel with updated count and protected notes count
protected_count = len(protected_notes)
if protected_count > 0:
console.print(
Panel(
f"Cleaned research notes: {note_count}{updated_count}\nProtected notes (associated with current request): {protected_count}",
title="🗑 GC Complete"
)
)
else:
console.print(
Panel(
f"Cleaned research notes: {note_count}{updated_count}",
title="🗑 GC Complete"
)
)
else:
console.print(Panel(f"All {len(protected_notes)} research notes are associated with the current request and protected from deletion.", title="🗑 GC Info"))
else:
console.print(Panel(f"Research notes count ({note_count}) is below threshold ({threshold}). No cleanup needed.", title="🗑 GC Info"))

View File

@ -42,8 +42,8 @@ def initialize_database():
# to avoid circular imports
# Note: This import needs to be here, not at the top level
try:
from ra_aid.database.models import KeyFact, KeySnippet, HumanInput
db.create_tables([KeyFact, KeySnippet, HumanInput], safe=True)
from ra_aid.database.models import KeyFact, KeySnippet, HumanInput, ResearchNote
db.create_tables([KeyFact, KeySnippet, HumanInput, ResearchNote], safe=True)
logger.debug("Ensured database tables exist")
except Exception as e:
logger.error(f"Error creating tables: {str(e)}")
@ -146,4 +146,20 @@ class KeySnippet(BaseModel):
# created_at and updated_at are inherited from BaseModel
class Meta:
table_name = "key_snippet"
table_name = "key_snippet"
class ResearchNote(BaseModel):
"""
Model representing a research note stored in the database.
Research notes are detailed information compiled from research activities
that need to be preserved for future reference. These notes contain valuable
context and findings about topics relevant to the project.
"""
content = peewee.TextField()
human_input = peewee.ForeignKeyField(HumanInput, backref='research_notes', null=True)
# created_at and updated_at are inherited from BaseModel
class Meta:
table_name = "research_note"

View File

@ -0,0 +1,42 @@
"""
Repository package for database access abstractions.
This package contains repository implementations for various models,
following the repository pattern for data access abstraction.
"""
from ra_aid.database.repositories.human_input_repository import (
HumanInputRepository,
HumanInputRepositoryManager,
get_human_input_repository
)
from ra_aid.database.repositories.key_fact_repository import (
KeyFactRepository,
KeyFactRepositoryManager,
get_key_fact_repository
)
from ra_aid.database.repositories.key_snippet_repository import (
KeySnippetRepository,
KeySnippetRepositoryManager,
get_key_snippet_repository
)
from ra_aid.database.repositories.research_note_repository import (
ResearchNoteRepository,
ResearchNoteRepositoryManager,
get_research_note_repository
)
__all__ = [
'HumanInputRepository',
'HumanInputRepositoryManager',
'get_human_input_repository',
'KeyFactRepository',
'KeyFactRepositoryManager',
'get_key_fact_repository',
'KeySnippetRepository',
'KeySnippetRepositoryManager',
'get_key_snippet_repository',
'ResearchNoteRepository',
'ResearchNoteRepositoryManager',
'get_research_note_repository',
]

View File

@ -0,0 +1,255 @@
"""
Research note repository implementation for database access.
This module provides a repository implementation for the ResearchNote model,
following the repository pattern for data access abstraction.
"""
from typing import Dict, List, Optional
import contextvars
from contextlib import contextmanager
import peewee
from ra_aid.database.models import ResearchNote
from ra_aid.logging_config import get_logger
logger = get_logger(__name__)
# Create contextvar to hold the ResearchNoteRepository instance
research_note_repo_var = contextvars.ContextVar("research_note_repo", default=None)
class ResearchNoteRepositoryManager:
"""
Context manager for ResearchNoteRepository.
This class provides a context manager interface for ResearchNoteRepository,
using the contextvars approach for thread safety.
Example:
with DatabaseManager() as db:
with ResearchNoteRepositoryManager(db) as repo:
# Use the repository
note = repo.create("Research findings about the topic")
all_notes = repo.get_all()
"""
def __init__(self, db):
"""
Initialize the ResearchNoteRepositoryManager.
Args:
db: Database connection to use (required)
"""
self.db = db
def __enter__(self) -> 'ResearchNoteRepository':
"""
Initialize the ResearchNoteRepository and return it.
Returns:
ResearchNoteRepository: The initialized repository
"""
repo = ResearchNoteRepository(self.db)
research_note_repo_var.set(repo)
return repo
def __exit__(
self,
exc_type: Optional[type],
exc_val: Optional[Exception],
exc_tb: Optional[object],
) -> None:
"""
Reset the repository when exiting the context.
Args:
exc_type: The exception type if an exception was raised
exc_val: The exception value if an exception was raised
exc_tb: The traceback if an exception was raised
"""
# Reset the contextvar to None
research_note_repo_var.set(None)
# Don't suppress exceptions
return False
def get_research_note_repository() -> 'ResearchNoteRepository':
"""
Get the current ResearchNoteRepository instance.
Returns:
ResearchNoteRepository: The current repository instance
Raises:
RuntimeError: If no repository has been initialized with ResearchNoteRepositoryManager
"""
repo = research_note_repo_var.get()
if repo is None:
raise RuntimeError(
"No ResearchNoteRepository available. "
"Make sure to initialize one with ResearchNoteRepositoryManager first."
)
return repo
class ResearchNoteRepository:
"""
Repository for managing ResearchNote database operations.
This class provides methods for performing CRUD operations on the ResearchNote model,
abstracting the database access details from the business logic.
Example:
with DatabaseManager() as db:
with ResearchNoteRepositoryManager(db) as repo:
note = repo.create("Research findings about the topic")
all_notes = repo.get_all()
"""
def __init__(self, db):
"""
Initialize the repository with a database connection.
Args:
db: Database connection to use (required)
"""
if db is None:
raise ValueError("Database connection is required for ResearchNoteRepository")
self.db = db
def create(self, content: str, human_input_id: Optional[int] = None) -> ResearchNote:
"""
Create a new research note in the database.
Args:
content: The text content of the research note
human_input_id: Optional ID of the associated human input
Returns:
ResearchNote: The newly created research note instance
Raises:
peewee.DatabaseError: If there's an error creating the note
"""
try:
note = ResearchNote.create(content=content, human_input_id=human_input_id)
logger.debug(f"Created research note ID {note.id}: {content[:50]}...")
return note
except peewee.DatabaseError as e:
logger.error(f"Failed to create research note: {str(e)}")
raise
def get(self, note_id: int) -> Optional[ResearchNote]:
"""
Retrieve a research note by its ID.
Args:
note_id: The ID of the research note to retrieve
Returns:
Optional[ResearchNote]: The research note instance if found, None otherwise
Raises:
peewee.DatabaseError: If there's an error accessing the database
"""
try:
return ResearchNote.get_or_none(ResearchNote.id == note_id)
except peewee.DatabaseError as e:
logger.error(f"Failed to fetch research note {note_id}: {str(e)}")
raise
def update(self, note_id: int, content: str) -> Optional[ResearchNote]:
"""
Update an existing research note.
Args:
note_id: The ID of the research note to update
content: The new content for the research note
Returns:
Optional[ResearchNote]: The updated research note if found, None otherwise
Raises:
peewee.DatabaseError: If there's an error updating the note
"""
try:
# First check if the note exists
note = self.get(note_id)
if not note:
logger.warning(f"Attempted to update non-existent research note {note_id}")
return None
# Update the note
note.content = content
note.save()
logger.debug(f"Updated research note ID {note_id}: {content[:50]}...")
return note
except peewee.DatabaseError as e:
logger.error(f"Failed to update research note {note_id}: {str(e)}")
raise
def delete(self, note_id: int) -> bool:
"""
Delete a research note by its ID.
Args:
note_id: The ID of the research note to delete
Returns:
bool: True if the note was deleted, False if it wasn't found
Raises:
peewee.DatabaseError: If there's an error deleting the note
"""
try:
# First check if the note exists
note = self.get(note_id)
if not note:
logger.warning(f"Attempted to delete non-existent research note {note_id}")
return False
# Delete the note
note.delete_instance()
logger.debug(f"Deleted research note ID {note_id}")
return True
except peewee.DatabaseError as e:
logger.error(f"Failed to delete research note {note_id}: {str(e)}")
raise
def get_all(self) -> List[ResearchNote]:
"""
Retrieve all research notes from the database.
Returns:
List[ResearchNote]: List of all research note instances
Raises:
peewee.DatabaseError: If there's an error accessing the database
"""
try:
return list(ResearchNote.select().order_by(ResearchNote.id))
except peewee.DatabaseError as e:
logger.error(f"Failed to fetch all research notes: {str(e)}")
raise
def get_notes_dict(self) -> Dict[int, str]:
"""
Retrieve all research notes as a dictionary mapping IDs to content.
This method is useful for compatibility with the existing memory format.
Returns:
Dict[int, str]: Dictionary with note IDs as keys and content as values
Raises:
peewee.DatabaseError: If there's an error accessing the database
"""
try:
notes = self.get_all()
return {note.id: note.content for note in notes}
except peewee.DatabaseError as e:
logger.error(f"Failed to fetch research notes as dictionary: {str(e)}")
raise

View File

@ -0,0 +1,94 @@
"""Peewee migrations -- 006_20250303_211704_add_research_note_model.py.
Some examples (model - class or model name)::
> Model = migrator.orm['table_name'] # Return model in current state by name
> Model = migrator.ModelClass # Return model in current state by name
> migrator.sql(sql) # Run custom SQL
> migrator.run(func, *args, **kwargs) # Run python function with the given args
> migrator.create_model(Model) # Create a model (could be used as decorator)
> migrator.remove_model(model, cascade=True) # Remove a model
> migrator.add_fields(model, **fields) # Add fields to a model
> migrator.change_fields(model, **fields) # Change fields
> migrator.remove_fields(model, *field_names, cascade=True)
> migrator.rename_field(model, old_field_name, new_field_name)
> migrator.rename_table(model, new_table_name)
> migrator.add_index(model, *col_names, unique=False)
> migrator.add_not_null(model, *field_names)
> migrator.add_default(model, field_name, default)
> migrator.add_constraint(model, name, sql)
> migrator.drop_index(model, *col_names)
> migrator.drop_not_null(model, *field_names)
> migrator.drop_constraints(model, *constraints)
"""
from contextlib import suppress
import peewee as pw
from peewee_migrate import Migrator
with suppress(ImportError):
import playhouse.postgres_ext as pw_pext
def migrate(migrator: Migrator, database: pw.Database, *, fake=False):
"""Create the research_note table for storing research notes."""
# Check if the table already exists
try:
database.execute_sql("SELECT id FROM research_note LIMIT 1")
# If we reach here, the table exists
return
except pw.OperationalError:
# Table doesn't exist, safe to create
pass
@migrator.create_model
class ResearchNote(pw.Model):
id = pw.AutoField()
created_at = pw.DateTimeField()
updated_at = pw.DateTimeField()
content = pw.TextField()
# We'll add the human_input foreign key in a separate step for safety
class Meta:
table_name = "research_note"
# Check if HumanInput model exists before adding the foreign key
try:
HumanInput = migrator.orm['human_input']
# Only add the foreign key if the human_input_id column doesn't already exist
try:
database.execute_sql("SELECT human_input_id FROM research_note LIMIT 1")
except pw.OperationalError:
# Column doesn't exist, safe to add
migrator.add_fields(
'research_note',
human_input=pw.ForeignKeyField(
HumanInput,
null=True,
field='id',
on_delete='SET NULL'
)
)
except KeyError:
# HumanInput doesn't exist, we'll skip adding the foreign key
pass
def rollback(migrator: Migrator, database: pw.Database, *, fake=False):
"""Remove the research_note table."""
# First remove any foreign key fields
try:
migrator.remove_fields('research_note', 'human_input')
except pw.OperationalError:
# Field might not exist, that's fine
pass
# Then remove the model
migrator.remove_model('research_note')

View File

@ -1,10 +1,11 @@
"""
This module provides formatting functions for model data for display or output.
It includes functions to format key facts in a consistent, readable way for
presentation to users and other parts of the system.
It includes functions to format key facts, key snippets, and research notes in a consistent,
readable way for presentation to users and other parts of the system.
"""
from ra_aid.model_formatters.key_facts_formatter import format_key_fact, format_key_facts_dict
from ra_aid.model_formatters.research_notes_formatter import format_research_note, format_research_notes_dict
__all__ = ["format_key_fact", "format_key_facts_dict"]
__all__ = ["format_key_fact", "format_key_facts_dict", "format_research_note", "format_research_notes_dict"]

View File

@ -0,0 +1,58 @@
"""
Research notes model formatter module.
This module provides utility functions for formatting research notes from database models
into consistent markdown styling for display or output purposes.
"""
from typing import Dict, Optional
def format_research_note(note_id: int, content: str) -> str:
"""
Format a single research note with markdown formatting.
Args:
note_id: The identifier of the research note
content: The text content of the research note
Returns:
str: Formatted research note as markdown
Example:
>>> format_research_note(1, "This is an important research finding")
'## 🔍 Research Note #1\n\nThis is an important research finding'
"""
if not content:
return ""
return f"## 🔍 Research Note #{note_id}\n\n{content}"
def format_research_notes_dict(notes_dict: Dict[int, str]) -> str:
"""
Format a dictionary of research notes with consistent markdown formatting.
Args:
notes_dict: Dictionary mapping note IDs to content strings
Returns:
str: Formatted research notes as markdown with proper spacing and headings
Example:
>>> format_research_notes_dict({1: "First finding", 2: "Second finding"})
'## 🔍 Research Note #1\n\nFirst finding\n\n## 🔍 Research Note #2\n\nSecond finding'
"""
if not notes_dict:
return ""
# Sort by ID for consistent output and format as markdown sections
notes = []
for note_id, content in sorted(notes_dict.items()):
notes.extend([
format_research_note(note_id, content),
"" # Empty line between notes
])
# Join all notes and remove trailing newline
return "\n".join(notes).rstrip()

View File

@ -56,8 +56,10 @@ from ra_aid.prompts.chat_prompts import CHAT_PROMPT
# CIAYN prompts
from ra_aid.prompts.ciayn_prompts import (
CIAYN_AGENT_BASE_PROMPT,
CIAYN_AGENT_SYSTEM_PROMPT,
CIAYN_AGENT_HUMAN_PROMPT,
EXTRACT_TOOL_CALL_PROMPT,
NO_TOOL_CALL_PROMPT,
)
# Add an __all__ list with all the exported names
@ -98,6 +100,8 @@ __all__ = [
"CHAT_PROMPT",
# CIAYN prompts
"CIAYN_AGENT_BASE_PROMPT",
"CIAYN_AGENT_SYSTEM_PROMPT",
"CIAYN_AGENT_HUMAN_PROMPT",
"EXTRACT_TOOL_CALL_PROMPT",
"NO_TOOL_CALL_PROMPT",
]

View File

@ -0,0 +1,55 @@
"""
Research notes gc-specific prompts for the AI agent system.
This module contains the prompt for the research notes gc agent that is
responsible for evaluating and trimming down the stored research notes to keep
only the most valuable ones, ensuring that the collection remains manageable.
"""
RESEARCH_NOTES_GC_PROMPT = """
You are a Research Notes Cleaner agent responsible for maintaining the research notes collection by pruning less important notes.
<research notes>
{research_notes}
</research notes>
Task:
Your task is to analyze all the research notes in the system and determine which ones should be kept and which ones should be removed.
Guidelines for evaluation:
1. Review all research notes and their IDs
2. Identify which notes are lowest value/most ephemeral based on:
- Relevance to the overall project
- Specificity and actionability of the information
- Long-term value vs. temporary relevance
- Uniqueness of the information (avoid redundancy)
- How fundamental the note is to understanding the context
3. Trim down the collection to keep no more than 30 highest value, longest-lasting notes
4. For each note you decide to delete, provide a brief explanation of your reasoning
Retention priority (from highest to lowest):
- Core research findings directly relevant to the project requirements
- Important technical details that affect implementation decisions
- API documentation and usage examples
- Configuration information and best practices
- Alternative approaches considered with pros and cons
- General background information
- Information that is easily found elsewhere or outdated
- If there are contradictory notes, that probably means that the older note is outdated and should be deleted.
For notes of similar importance, prefer to keep more recent notes if they supersede older information.
Output:
1. List the IDs of notes to be deleted using the delete_research_notes tool with the IDs provided as a list [ids...], NOT as a comma-separated string
2. Provide a brief explanation for each deletion decision
3. Explain your overall approach to selecting which notes to keep
IMPORTANT:
- Use the delete_research_notes tool with multiple IDs at once in a single call, rather than making multiple individual deletion calls
- The delete_research_notes tool accepts a list of IDs in the format [id1, id2, id3, ...], not as a comma-separated string
- Batch deletion is much more efficient than calling the deletion function multiple times
- Collect all IDs to delete first, then make a single call to delete_research_notes with the complete list
Remember: Your goal is to maintain a concise, high-value collection of research notes that preserves essential information while removing ephemeral or less critical details.
"""

View File

@ -16,9 +16,11 @@ from ra_aid.console.formatting import print_error
from ra_aid.database.repositories.human_input_repository import HumanInputRepository
from ra_aid.database.repositories.key_fact_repository import get_key_fact_repository
from ra_aid.database.repositories.key_snippet_repository import get_key_snippet_repository
from ra_aid.database.repositories.research_note_repository import get_research_note_repository
from ra_aid.exceptions import AgentInterrupt
from ra_aid.model_formatters import format_key_facts_dict
from ra_aid.model_formatters.key_snippets_formatter import format_key_snippets_dict
from ra_aid.model_formatters.research_notes_formatter import format_research_notes_dict
from ra_aid.tools.memory import _global_memory
from ..console import print_task_header
@ -74,7 +76,7 @@ def request_research(query: str) -> ResearchResult:
"completion_message": "Research stopped - maximum recursion depth reached",
"key_facts": key_facts,
"related_files": get_related_files(),
"research_notes": get_memory_value("research_notes"),
"research_notes": "", # Empty for max depth exceeded case
"key_snippets": key_snippets,
"success": False,
"reason": "max_depth_exceeded",
@ -129,12 +131,20 @@ def request_research(query: str) -> ResearchResult:
except RuntimeError as e:
logger.error(f"Failed to access key snippet repository: {str(e)}")
key_snippets = ""
try:
repository = get_research_note_repository()
notes_dict = repository.get_notes_dict()
formatted_research_notes = format_research_notes_dict(notes_dict)
except RuntimeError as e:
logger.error(f"Failed to access research note repository: {str(e)}")
formatted_research_notes = ""
response_data = {
"completion_message": completion_message,
"key_facts": key_facts,
"related_files": get_related_files(),
"research_notes": get_memory_value("research_notes"),
"research_notes": formatted_research_notes,
"key_snippets": key_snippets,
"success": success,
"reason": reason,
@ -201,11 +211,19 @@ def request_web_research(query: str) -> ResearchResult:
except RuntimeError as e:
logger.error(f"Failed to access key snippet repository: {str(e)}")
key_snippets = ""
try:
repository = get_research_note_repository()
notes_dict = repository.get_notes_dict()
formatted_research_notes = format_research_notes_dict(notes_dict)
except RuntimeError as e:
logger.error(f"Failed to access research note repository: {str(e)}")
formatted_research_notes = ""
response_data = {
"completion_message": completion_message,
"key_snippets": key_snippets,
"research_notes": get_memory_value("research_notes"),
"research_notes": formatted_research_notes,
"success": success,
"reason": reason,
}
@ -281,12 +299,20 @@ def request_research_and_implementation(query: str) -> Dict[str, Any]:
except RuntimeError as e:
logger.error(f"Failed to access key snippet repository: {str(e)}")
key_snippets = ""
try:
repository = get_research_note_repository()
notes_dict = repository.get_notes_dict()
formatted_research_notes = format_research_notes_dict(notes_dict)
except RuntimeError as e:
logger.error(f"Failed to access research note repository: {str(e)}")
formatted_research_notes = ""
response_data = {
"completion_message": completion_message,
"key_facts": key_facts,
"related_files": get_related_files(),
"research_notes": get_memory_value("research_notes"),
"research_notes": formatted_research_notes,
"key_snippets": key_snippets,
"success": success,
"reason": reason,

View File

@ -11,9 +11,11 @@ logger = logging.getLogger(__name__)
from ..database.repositories.key_fact_repository import get_key_fact_repository
from ..database.repositories.key_snippet_repository import get_key_snippet_repository
from ..database.repositories.research_note_repository import get_research_note_repository
from ..llm import initialize_expert_llm
from ..model_formatters import format_key_facts_dict
from ..model_formatters.key_snippets_formatter import format_key_snippets_dict
from ..model_formatters.research_notes_formatter import format_research_notes_dict
from .memory import _global_memory, get_memory_value
console = Console()
@ -167,7 +169,14 @@ def ask_expert(question: str) -> str:
except RuntimeError as e:
logger.error(f"Failed to access key fact repository: {str(e)}")
key_facts = ""
research_notes = get_memory_value("research_notes")
# Get research notes directly from repository and format using the formatter
try:
repository = get_research_note_repository()
notes_dict = repository.get_notes_dict()
formatted_research_notes = format_research_notes_dict(notes_dict)
except RuntimeError as e:
logger.error(f"Failed to access research note repository: {str(e)}")
formatted_research_notes = ""
# Build display query (just question)
display_query = "# Question\n" + question
@ -187,8 +196,8 @@ def ask_expert(question: str) -> str:
if related_contents:
query_parts.extend(["# Related Files", related_contents])
if related_contents:
query_parts.extend(["# Research Notes", research_notes])
if formatted_research_notes:
query_parts.extend(["# Research Notes", formatted_research_notes])
if key_snippets and len(key_snippets) > 0:
query_parts.extend(["# Key Snippets", key_snippets])

View File

@ -20,6 +20,7 @@ from ra_aid.agent_context import (
from ra_aid.database.repositories.key_fact_repository import get_key_fact_repository
from ra_aid.database.repositories.key_snippet_repository import get_key_snippet_repository
from ra_aid.database.repositories.human_input_repository import get_human_input_repository
from ra_aid.database.repositories.research_note_repository import get_research_note_repository
from ra_aid.model_formatters import key_snippets_formatter
from ra_aid.logging_config import get_logger
@ -45,7 +46,6 @@ from ra_aid.database.repositories.key_fact_repository import get_key_fact_reposi
# Global memory store
_global_memory: Dict[str, Any] = {
"research_notes": [],
"plans": [],
"tasks": {}, # Dict[int, str] - ID to task mapping
"task_id_counter": 1, # Counter for generating unique task IDs
@ -66,9 +66,50 @@ def emit_research_notes(notes: str) -> str:
Args:
notes: REQUIRED The research notes to store
"""
_global_memory["research_notes"].append(notes)
console.print(Panel(Markdown(notes), title="🔍 Research Notes"))
return "Research notes stored."
# Try to get the latest human input
human_input_id = None
try:
human_input_repo = get_human_input_repository()
recent_inputs = human_input_repo.get_recent(1)
if recent_inputs and len(recent_inputs) > 0:
human_input_id = recent_inputs[0].id
except RuntimeError as e:
logger.warning(f"No HumanInputRepository available: {str(e)}")
except Exception as e:
logger.warning(f"Failed to get recent human input: {str(e)}")
try:
# Create note in database using repository
created_note = get_research_note_repository().create(notes, human_input_id=human_input_id)
note_id = created_note.id
# Format the note using the formatter
from ra_aid.model_formatters.research_notes_formatter import format_research_note
formatted_note = format_research_note(note_id, notes)
# Display formatted note
console.print(Panel(Markdown(formatted_note), title="🔍 Research Notes"))
log_work_event(f"Stored research note #{note_id}.")
# Check if we need to clean up notes (more than 30)
try:
all_notes = get_research_note_repository().get_all()
if len(all_notes) > 30:
# Trigger the research notes cleaner agent
try:
from ra_aid.agents.research_notes_gc_agent import run_research_notes_gc_agent
run_research_notes_gc_agent()
except Exception as e:
logger.error(f"Failed to run research notes cleaner: {str(e)}")
except RuntimeError as e:
logger.error(f"Failed to access research note repository: {str(e)}")
return f"Research note #{note_id} stored."
except RuntimeError as e:
logger.error(f"Failed to access research note repository: {str(e)}")
console.print(f"Error storing research note: {str(e)}", style="red")
return "Failed to store research note."
@tool("emit_plan")
@ -617,6 +658,7 @@ def get_memory_value(key: str) -> str:
Different memory types return different formats:
- For work_log: Returns formatted markdown with timestamps and events
- For research_notes: Returns formatted markdown from repository
- For other types: Returns newline-separated list of values
Args:
@ -631,6 +673,32 @@ def get_memory_value(key: str) -> str:
return ""
entries = [f"## {entry['timestamp']}\n{entry['event']}" for entry in values]
return "\n\n".join(entries)
if key == "research_notes":
# DEPRECATED: This method of accessing research notes is deprecated.
# Use direct repository access instead:
# from ra_aid.database.repositories.research_note_repository import get_research_note_repository
# from ra_aid.model_formatters.research_notes_formatter import format_research_notes_dict
# repository = get_research_note_repository()
# notes_dict = repository.get_notes_dict()
# formatted_notes = format_research_notes_dict(notes_dict)
logger.warning("DEPRECATED: Accessing research notes via get_memory_value() is deprecated. "
"Use direct repository access with get_research_note_repository() instead.")
try:
# Import required modules for research notes
from ra_aid.database.repositories.research_note_repository import get_research_note_repository
from ra_aid.model_formatters.research_notes_formatter import format_research_notes_dict
# Get notes from repository and format them
repository = get_research_note_repository()
notes_dict = repository.get_notes_dict()
return format_research_notes_dict(notes_dict)
except RuntimeError as e:
logger.error(f"Failed to access research note repository: {str(e)}")
return ""
except Exception as e:
logger.error(f"Error accessing research notes: {str(e)}")
return ""
# For other types (lists), join with newlines
values = _global_memory.get(key, [])

View File

@ -0,0 +1,261 @@
"""
Tests for the ResearchNoteRepository class.
"""
import pytest
from unittest.mock import patch
import peewee
from ra_aid.database.connection import DatabaseManager, db_var
from ra_aid.database.models import ResearchNote, BaseModel
from ra_aid.database.repositories.research_note_repository import (
ResearchNoteRepository,
ResearchNoteRepositoryManager,
get_research_note_repository,
research_note_repo_var
)
@pytest.fixture
def cleanup_db():
"""Reset the database contextvar and connection state after each test."""
# Reset before the test
db = db_var.get()
if db is not None:
try:
if not db.is_closed():
db.close()
except Exception:
# Ignore errors when closing the database
pass
db_var.set(None)
# Run the test
yield
# Reset after the test
db = db_var.get()
if db is not None:
try:
if not db.is_closed():
db.close()
except Exception:
# Ignore errors when closing the database
pass
db_var.set(None)
@pytest.fixture
def cleanup_repo():
"""Reset the repository contextvar after each test."""
# Reset before the test
research_note_repo_var.set(None)
# Run the test
yield
# Reset after the test
research_note_repo_var.set(None)
@pytest.fixture
def setup_db(cleanup_db):
"""Set up an in-memory database with the ResearchNote table and patch the BaseModel.Meta.database."""
# Initialize an in-memory database connection
with DatabaseManager(in_memory=True) as db:
# Patch the BaseModel.Meta.database to use our in-memory database
# This ensures that model operations like ResearchNote.create() use our test database
with patch.object(BaseModel._meta, 'database', db):
# Create the ResearchNote table
with db.atomic():
db.create_tables([ResearchNote], safe=True)
yield db
# Clean up
with db.atomic():
ResearchNote.drop_table(safe=True)
def test_create_research_note(setup_db):
"""Test creating a research note."""
# Set up repository
repo = ResearchNoteRepository(db=setup_db)
# Create a research note
content = "Test research note"
note = repo.create(content)
# Verify the note was created correctly
assert note.id is not None
assert note.content == content
# Verify we can retrieve it from the database using the repository
note_from_db = repo.get(note.id)
assert note_from_db.content == content
def test_get_research_note(setup_db):
"""Test retrieving a research note by ID."""
# Set up repository
repo = ResearchNoteRepository(db=setup_db)
# Create a research note
content = "Test research note"
note = repo.create(content)
# Retrieve the note by ID
retrieved_note = repo.get(note.id)
# Verify the retrieved note matches the original
assert retrieved_note is not None
assert retrieved_note.id == note.id
assert retrieved_note.content == content
# Try to retrieve a non-existent note
non_existent_note = repo.get(999)
assert non_existent_note is None
def test_update_research_note(setup_db):
"""Test updating a research note."""
# Set up repository
repo = ResearchNoteRepository(db=setup_db)
# Create a research note
original_content = "Original content"
note = repo.create(original_content)
# Update the note
new_content = "Updated content"
updated_note = repo.update(note.id, new_content)
# Verify the note was updated correctly
assert updated_note is not None
assert updated_note.id == note.id
assert updated_note.content == new_content
# Verify we can retrieve the updated content from the database using the repository
note_from_db = repo.get(note.id)
assert note_from_db.content == new_content
# Try to update a non-existent note
non_existent_update = repo.update(999, "This shouldn't work")
assert non_existent_update is None
def test_delete_research_note(setup_db):
"""Test deleting a research note."""
# Set up repository
repo = ResearchNoteRepository(db=setup_db)
# Create a research note
content = "Test research note to delete"
note = repo.create(content)
# Verify the note exists using the repository
assert repo.get(note.id) is not None
# Delete the note
delete_result = repo.delete(note.id)
# Verify the delete operation was successful
assert delete_result is True
# Verify the note no longer exists in the database using the repository
assert repo.get(note.id) is None
# Try to delete a non-existent note
non_existent_delete = repo.delete(999)
assert non_existent_delete is False
def test_get_all_research_notes(setup_db):
"""Test retrieving all research notes."""
# Set up repository
repo = ResearchNoteRepository(db=setup_db)
# Create some research notes
contents = ["Note 1", "Note 2", "Note 3"]
for content in contents:
repo.create(content)
# Retrieve all notes
all_notes = repo.get_all()
# Verify we got the correct number of notes
assert len(all_notes) == len(contents)
# Verify the content of each note
note_contents = [note.content for note in all_notes]
for content in contents:
assert content in note_contents
def test_get_notes_dict(setup_db):
"""Test retrieving research notes as a dictionary."""
# Set up repository
repo = ResearchNoteRepository(db=setup_db)
# Create some research notes
notes = []
contents = ["Note 1", "Note 2", "Note 3"]
for content in contents:
notes.append(repo.create(content))
# Retrieve notes as dictionary
notes_dict = repo.get_notes_dict()
# Verify we got the correct number of notes
assert len(notes_dict) == len(contents)
# Verify each note is in the dictionary with the correct content
for note in notes:
assert note.id in notes_dict
assert notes_dict[note.id] == note.content
def test_repository_init_without_db():
"""Test that ResearchNoteRepository raises an error when initialized without a db parameter."""
# Attempt to create a repository without a database connection
with pytest.raises(ValueError) as excinfo:
ResearchNoteRepository(db=None)
# Verify the correct error message
assert "Database connection is required" in str(excinfo.value)
def test_research_note_repository_manager(setup_db, cleanup_repo):
"""Test the ResearchNoteRepositoryManager context manager."""
# Use the context manager to create a repository
with ResearchNoteRepositoryManager(setup_db) as repo:
# Verify the repository was created correctly
assert isinstance(repo, ResearchNoteRepository)
assert repo.db is setup_db
# Verify we can use the repository
content = "Test note via context manager"
note = repo.create(content)
assert note.id is not None
assert note.content == content
# Verify we can get the repository using get_research_note_repository
repo_from_var = get_research_note_repository()
assert repo_from_var is repo
# Verify the repository was removed from the context var
with pytest.raises(RuntimeError) as excinfo:
get_research_note_repository()
assert "No ResearchNoteRepository available" in str(excinfo.value)
def test_get_research_note_repository_when_not_set(cleanup_repo):
"""Test that get_research_note_repository raises an error when no repository is in context."""
# Attempt to get the repository when none exists
with pytest.raises(RuntimeError) as excinfo:
get_research_note_repository()
# Verify the correct error message
assert "No ResearchNoteRepository available" in str(excinfo.value)

View File

@ -15,7 +15,6 @@ from ra_aid.tools.memory import _global_memory
@pytest.fixture
def reset_memory():
"""Reset global memory before each test"""
_global_memory["research_notes"] = []
_global_memory["plans"] = []
_global_memory["tasks"] = {}
_global_memory["task_id_counter"] = 0
@ -24,7 +23,6 @@ def reset_memory():
_global_memory["work_log"] = []
yield
# Clean up after test
_global_memory["research_notes"] = []
_global_memory["plans"] = []
_global_memory["tasks"] = {}
_global_memory["task_id_counter"] = 0

View File

@ -32,7 +32,6 @@ from ra_aid.database.models import KeyFact
@pytest.fixture
def reset_memory():
"""Reset global memory before each test"""
_global_memory["research_notes"] = []
_global_memory["plans"] = []
_global_memory["tasks"] = {}
_global_memory["task_id_counter"] = 0
@ -41,7 +40,6 @@ def reset_memory():
_global_memory["work_log"] = []
yield
# Clean up after test
_global_memory["research_notes"] = []
_global_memory["plans"] = []
_global_memory["tasks"] = {}
_global_memory["task_id_counter"] = 0
@ -188,17 +186,14 @@ def test_emit_key_facts_single_fact(reset_memory, mock_repository):
def test_get_memory_value_other_types(reset_memory):
"""Test get_memory_value remains compatible with other memory types"""
# Add some research notes
_global_memory["research_notes"].append("Note 1")
_global_memory["research_notes"].append("Note 2")
assert get_memory_value("research_notes") == "Note 1\nNote 2"
# Test with empty list
assert get_memory_value("plans") == ""
# Test with non-existent key
assert get_memory_value("nonexistent") == ""
# Test research_notes returns empty string when no repository is available
assert get_memory_value("research_notes") == ""
def test_log_work_event(reset_memory):