From 9a69bb173e465c1b00b88af21182de60d3d1d2e0 Mon Sep 17 00:00:00 2001 From: AI Christianson Date: Sun, 2 Mar 2025 20:27:52 -0500 Subject: [PATCH] do not gc key facts/snippets associated with current human input --- ra_aid/agents/key_facts_gc_agent.py | 130 +++++++++++----- ra_aid/agents/key_snippets_gc_agent.py | 146 +++++++++++++----- ...250302_201611_add_human_input_reference.py | 55 ++++--- 3 files changed, 233 insertions(+), 98 deletions(-) diff --git a/ra_aid/agents/key_facts_gc_agent.py b/ra_aid/agents/key_facts_gc_agent.py index f92f4f6..f8a5543 100644 --- a/ra_aid/agents/key_facts_gc_agent.py +++ b/ra_aid/agents/key_facts_gc_agent.py @@ -15,6 +15,7 @@ from rich.panel import Panel from ra_aid.agent_utils import create_agent, run_agent_with_retry from ra_aid.database.repositories.key_fact_repository import KeyFactRepository +from ra_aid.database.repositories.human_input_repository import HumanInputRepository from ra_aid.llm import initialize_llm from ra_aid.prompts.key_facts_gc_prompts import KEY_FACTS_GC_PROMPT from ra_aid.tools.memory import log_work_event, _global_memory @@ -22,6 +23,7 @@ from ra_aid.tools.memory import log_work_event, _global_memory console = Console() key_fact_repository = KeyFactRepository() +human_input_repository = HumanInputRepository() @tool @@ -37,12 +39,27 @@ def delete_key_facts(fact_ids: List[int]) -> str: deleted_facts = [] not_found_facts = [] failed_facts = [] + protected_facts = [] + + # Try to get the current human input to protect its facts + current_human_input_id = None + try: + recent_inputs = human_input_repository.get_recent(1) + if recent_inputs and len(recent_inputs) > 0: + current_human_input_id = recent_inputs[0].id + except Exception as e: + console.print(f"Warning: Could not retrieve current human input: {str(e)}") for fact_id in fact_ids: # Get the fact first to display information fact = key_fact_repository.get(fact_id) if fact: - # Delete the fact + # Check if this fact is associated with the current human input + if current_human_input_id is not None and fact.human_input_id == current_human_input_id: + protected_facts.append((fact_id, fact.content)) + continue + + # Delete the fact if it's not protected was_deleted = key_fact_repository.delete(fact_id) if was_deleted: deleted_facts.append((fact_id, fact.content)) @@ -61,6 +78,13 @@ def delete_key_facts(fact_ids: List[int]) -> str: Panel(Markdown(deleted_msg), title="Facts Deleted", border_style="green") ) + if protected_facts: + protected_msg = "Protected facts (associated with current request):\n" + "\n".join([f"- #{fact_id}: {content}" for fact_id, content in protected_facts]) + result_parts.append(protected_msg) + console.print( + Panel(Markdown(protected_msg), title="Facts Protected", border_style="blue") + ) + if not_found_facts: not_found_msg = f"Facts not found: {', '.join([f'#{fact_id}' for fact_id in not_found_facts])}" result_parts.append(not_found_msg) @@ -77,6 +101,7 @@ def run_key_facts_gc_agent() -> None: The agent analyzes all key facts and determines which are the least valuable, deleting them to maintain a manageable collection size of high-value facts. + Facts associated with the current human input are excluded from deletion. """ # Get the count of key facts facts = key_fact_repository.get_all() @@ -87,44 +112,75 @@ def run_key_facts_gc_agent() -> None: # Only run the agent if we actually have facts to clean if fact_count > 0: - # Get all facts as a formatted string for the prompt - facts_dict = key_fact_repository.get_facts_dict() - formatted_facts = "\n".join([f"Fact #{k}: {v}" for k, v in facts_dict.items()]) + # Try to get the current human input ID to exclude its facts + current_human_input_id = None + try: + recent_inputs = human_input_repository.get_recent(1) + if recent_inputs and len(recent_inputs) > 0: + current_human_input_id = recent_inputs[0].id + except Exception as e: + console.print(f"Warning: Could not retrieve current human input: {str(e)}") - # Retrieve configuration - llm_config = _global_memory.get("config", {}) + # Get all facts that are not associated with the current human input + eligible_facts = [] + protected_facts = [] + for fact in facts: + if current_human_input_id is not None and fact.human_input_id == current_human_input_id: + protected_facts.append(fact) + else: + eligible_facts.append(fact) + + # Only process if we have facts that can be deleted + if eligible_facts: + # Format facts as a dictionary for the prompt + facts_dict = {fact.id: fact.content for fact in eligible_facts} + formatted_facts = "\n".join([f"Fact #{k}: {v}" for k, v in facts_dict.items()]) + + # Retrieve configuration + llm_config = _global_memory.get("config", {}) - # Initialize the LLM model - model = initialize_llm( - llm_config.get("provider", "anthropic"), - llm_config.get("model", "claude-3-7-sonnet-20250219"), - temperature=llm_config.get("temperature") - ) - - # Create the agent with the delete_key_facts tool - agent = create_agent(model, [delete_key_facts]) - - # Format the prompt with the current facts - prompt = KEY_FACTS_GC_PROMPT.format(key_facts=formatted_facts) - - # Set up the agent configuration - agent_config = { - "recursion_limit": 50 # Set a reasonable recursion limit - } - - # Run the agent - run_agent_with_retry(agent, prompt, agent_config) - - # Get updated count - updated_facts = key_fact_repository.get_all() - updated_count = len(updated_facts) - - # Show info panel with updated count - console.print( - Panel( - f"Cleaned key facts: {fact_count} → {updated_count}", - title="🗑 GC Complete" + # Initialize the LLM model + model = initialize_llm( + llm_config.get("provider", "anthropic"), + llm_config.get("model", "claude-3-7-sonnet-20250219"), + temperature=llm_config.get("temperature") ) - ) + + # Create the agent with the delete_key_facts tool + agent = create_agent(model, [delete_key_facts]) + + # Format the prompt with the eligible facts + prompt = KEY_FACTS_GC_PROMPT.format(key_facts=formatted_facts) + + # Set up the agent configuration + agent_config = { + "recursion_limit": 50 # Set a reasonable recursion limit + } + + # Run the agent + run_agent_with_retry(agent, prompt, agent_config) + + # Get updated count + updated_facts = key_fact_repository.get_all() + updated_count = len(updated_facts) + + # Show info panel with updated count and protected facts count + protected_count = len(protected_facts) + if protected_count > 0: + console.print( + Panel( + f"Cleaned key facts: {fact_count} → {updated_count}\nProtected facts (associated with current request): {protected_count}", + title="🗑 GC Complete" + ) + ) + else: + console.print( + Panel( + f"Cleaned key facts: {fact_count} → {updated_count}", + title="🗑 GC Complete" + ) + ) + else: + console.print(Panel(f"All {len(protected_facts)} facts are associated with the current request and protected from deletion.", title="🗑 GC Info")) else: console.print(Panel("No key facts to clean.", title="🗑 GC Info")) \ No newline at end of file diff --git a/ra_aid/agents/key_snippets_gc_agent.py b/ra_aid/agents/key_snippets_gc_agent.py index 13bc124..ee4e390 100644 --- a/ra_aid/agents/key_snippets_gc_agent.py +++ b/ra_aid/agents/key_snippets_gc_agent.py @@ -15,6 +15,7 @@ from rich.panel import Panel from ra_aid.agent_utils import create_agent, run_agent_with_retry from ra_aid.database.repositories.key_snippet_repository import KeySnippetRepository +from ra_aid.database.repositories.human_input_repository import HumanInputRepository from ra_aid.llm import initialize_llm from ra_aid.prompts.key_snippets_gc_prompts import KEY_SNIPPETS_GC_PROMPT from ra_aid.tools.memory import log_work_event, _global_memory @@ -22,6 +23,7 @@ from ra_aid.tools.memory import log_work_event, _global_memory console = Console() key_snippet_repository = KeySnippetRepository() +human_input_repository = HumanInputRepository() @tool @@ -38,13 +40,29 @@ def delete_key_snippets(snippet_ids: List[int]) -> str: results = [] not_found_snippets = [] failed_snippets = [] + protected_snippets = [] + + # Try to get the current human input to protect its snippets + current_human_input_id = None + try: + recent_inputs = human_input_repository.get_recent(1) + if recent_inputs and len(recent_inputs) > 0: + current_human_input_id = recent_inputs[0].id + except Exception as e: + console.print(f"Warning: Could not retrieve current human input: {str(e)}") for snippet_id in snippet_ids: # Get the snippet first to capture filepath for the message snippet = key_snippet_repository.get(snippet_id) if snippet: filepath = snippet.filepath - # Delete from database + + # Check if this snippet is associated with the current human input + if current_human_input_id is not None and snippet.human_input_id == current_human_input_id: + protected_snippets.append((snippet_id, filepath)) + continue + + # Delete from database if not protected success = key_snippet_repository.delete(snippet_id) if success: success_msg = f"Successfully deleted snippet #{snippet_id} from {filepath}" @@ -66,6 +84,13 @@ def delete_key_snippets(snippet_ids: List[int]) -> str: deleted_msg = "Successfully deleted snippets:\n" + "\n".join([f"- #{snippet_id}: {filepath}" for snippet_id, filepath in results]) result_parts.append(deleted_msg) + if protected_snippets: + protected_msg = "Protected snippets (associated with current request):\n" + "\n".join([f"- #{snippet_id}: {filepath}" for snippet_id, filepath in protected_snippets]) + result_parts.append(protected_msg) + console.print( + Panel(Markdown(protected_msg), title="Snippets Protected", border_style="blue") + ) + if not_found_snippets: not_found_msg = f"Snippets not found: {', '.join([f'#{snippet_id}' for snippet_id in not_found_snippets])}" result_parts.append(not_found_msg) @@ -82,6 +107,7 @@ def run_key_snippets_gc_agent() -> None: The agent analyzes all key snippets and determines which are the least valuable, deleting them to maintain a manageable collection size of high-value snippets. + Snippets associated with the current human input are excluded from deletion. """ # Get the count of key snippets snippets = key_snippet_repository.get_all() @@ -92,47 +118,87 @@ def run_key_snippets_gc_agent() -> None: # Only run the agent if we actually have snippets to clean if snippet_count > 0: - # Get all snippets as a formatted string for the prompt - snippets_dict = key_snippet_repository.get_snippets_dict() - formatted_snippets = "\n".join([ - f"Snippet #{k}: filepath={v['filepath']}, line_number={v['line_number']}, description={v['description']}\n```python\n{v['snippet']}\n```" - for k, v in snippets_dict.items() - ]) + # Try to get the current human input ID to exclude its snippets + current_human_input_id = None + try: + recent_inputs = human_input_repository.get_recent(1) + if recent_inputs and len(recent_inputs) > 0: + current_human_input_id = recent_inputs[0].id + except Exception as e: + console.print(f"Warning: Could not retrieve current human input: {str(e)}") - # Retrieve configuration - llm_config = _global_memory.get("config", {}) + # Get all snippets that are not associated with the current human input + eligible_snippets = [] + protected_snippets = [] + for snippet in snippets: + if current_human_input_id is not None and snippet.human_input_id == current_human_input_id: + protected_snippets.append(snippet) + else: + eligible_snippets.append(snippet) + + # Only process if we have snippets that can be deleted + if eligible_snippets: + # Get eligible snippets as a formatted string for the prompt + snippets_dict = { + snippet.id: { + 'filepath': snippet.filepath, + 'line_number': snippet.line_number, + 'snippet': snippet.snippet, + 'description': snippet.description + } + for snippet in eligible_snippets + } + + formatted_snippets = "\n".join([ + f"Snippet #{k}: filepath={v['filepath']}, line_number={v['line_number']}, description={v['description']}\n```python\n{v['snippet']}\n```" + for k, v in snippets_dict.items() + ]) + + # Retrieve configuration + llm_config = _global_memory.get("config", {}) - # Initialize the LLM model - model = initialize_llm( - llm_config.get("provider", "anthropic"), - llm_config.get("model", "claude-3-7-sonnet-20250219"), - temperature=llm_config.get("temperature") - ) - - # Create the agent with the delete_key_snippets tool - agent = create_agent(model, [delete_key_snippets]) - - # Format the prompt with the current snippets - prompt = KEY_SNIPPETS_GC_PROMPT.format(key_snippets=formatted_snippets) - - # Set up the agent configuration - agent_config = { - "recursion_limit": 50 # Set a reasonable recursion limit - } - - # Run the agent - run_agent_with_retry(agent, prompt, agent_config) - - # Get updated count - updated_snippets = key_snippet_repository.get_all() - updated_count = len(updated_snippets) - - # Show info panel with updated count - console.print( - Panel( - f"Cleaned key snippets: {snippet_count} → {updated_count}", - title="🗑 GC Complete" + # Initialize the LLM model + model = initialize_llm( + llm_config.get("provider", "anthropic"), + llm_config.get("model", "claude-3-7-sonnet-20250219"), + temperature=llm_config.get("temperature") ) - ) + + # Create the agent with the delete_key_snippets tool + agent = create_agent(model, [delete_key_snippets]) + + # Format the prompt with the eligible snippets + prompt = KEY_SNIPPETS_GC_PROMPT.format(key_snippets=formatted_snippets) + + # Set up the agent configuration + agent_config = { + "recursion_limit": 50 # Set a reasonable recursion limit + } + + # Run the agent + run_agent_with_retry(agent, prompt, agent_config) + + # Get updated count + updated_snippets = key_snippet_repository.get_all() + updated_count = len(updated_snippets) + + # Show info panel with updated count and protected snippets count + protected_count = len(protected_snippets) + if protected_count > 0: + console.print( + Panel( + f"Cleaned key snippets: {snippet_count} → {updated_count}\nProtected snippets (associated with current request): {protected_count}", + title="🗑 GC Complete" + ) + ) + else: + console.print( + Panel( + f"Cleaned key snippets: {snippet_count} → {updated_count}", + title="🗑 GC Complete" + ) + ) + else: + console.print(Panel(f"All {len(protected_snippets)} snippets are associated with the current request and protected from deletion.", title="🗑 GC Info")) else: console.print(Panel("No key snippets to clean.", title="🗑 GC Info")) \ No newline at end of file diff --git a/ra_aid/migrations/005_20250302_201611_add_human_input_reference.py b/ra_aid/migrations/005_20250302_201611_add_human_input_reference.py index 8827dcc..6caf097 100644 --- a/ra_aid/migrations/005_20250302_201611_add_human_input_reference.py +++ b/ra_aid/migrations/005_20250302_201611_add_human_input_reference.py @@ -37,31 +37,44 @@ with suppress(ImportError): def migrate(migrator: Migrator, database: pw.Database, *, fake=False): """Add human_input_id foreign key field to KeyFact and KeySnippet tables.""" - # Add human_input_id field to KeyFact model - migrator.add_fields( - 'key_fact', - human_input_id=pw.ForeignKeyField( - 'human_input', - null=True, - field='id', - on_delete='SET NULL' - ) - ) + # Get the HumanInput model from migrator.orm + HumanInput = migrator.orm['human_input'] - # Add human_input_id field to KeySnippet model - migrator.add_fields( - 'key_snippet', - human_input_id=pw.ForeignKeyField( - 'human_input', - null=True, - field='id', - on_delete='SET NULL' + # Skip adding fields if they already exist + # Check if the column exists before trying to add it + try: + # Check if key_fact table has human_input_id column + database.execute_sql("SELECT human_input_id FROM key_fact LIMIT 1") + except pw.OperationalError: + # Column doesn't exist, safe to add + migrator.add_fields( + 'key_fact', + human_input=pw.ForeignKeyField( + HumanInput, + null=True, + field='id', + on_delete='SET NULL' + ) + ) + + try: + # Check if key_snippet table has human_input_id column + database.execute_sql("SELECT human_input_id FROM key_snippet LIMIT 1") + except pw.OperationalError: + # Column doesn't exist, safe to add + migrator.add_fields( + 'key_snippet', + human_input=pw.ForeignKeyField( + HumanInput, + null=True, + field='id', + on_delete='SET NULL' + ) ) - ) def rollback(migrator: Migrator, database: pw.Database, *, fake=False): """Remove human_input_id field from KeyFact and KeySnippet tables.""" - migrator.remove_fields('key_fact', 'human_input_id') - migrator.remove_fields('key_snippet', 'human_input_id') \ No newline at end of file + migrator.remove_fields('key_fact', 'human_input') + migrator.remove_fields('key_snippet', 'human_input') \ No newline at end of file