RA.Aid/ra_aid/tools/expert.py

325 lines
11 KiB
Python

import os
import logging
from typing import List
from langchain_core.tools import tool
from rich.console import Console
from rich.markdown import Markdown
from rich.panel import Panel
logger = logging.getLogger(__name__)
from ..database.repositories.trajectory_repository import get_trajectory_repository
from ..database.repositories.human_input_repository import get_human_input_repository
from ..database.repositories.key_fact_repository import get_key_fact_repository
from ..database.repositories.key_snippet_repository import get_key_snippet_repository
from ..database.repositories.related_files_repository import get_related_files_repository
from ..database.repositories.research_note_repository import get_research_note_repository
from ..database.repositories.config_repository import get_config_repository
from ..llm import initialize_expert_llm
from ..model_formatters import format_key_facts_dict
from ..model_formatters.key_snippets_formatter import format_key_snippets_dict
from ..model_formatters.research_notes_formatter import format_research_notes_dict
from ..models_params import models_params
from ..text.processing import process_thinking_content
console = Console()
_model = None
def get_model():
global _model
try:
if _model is None:
config_repo = get_config_repository()
provider = config_repo.get("expert_provider") or config_repo.get("provider")
model = config_repo.get("expert_model") or config_repo.get("model")
_model = initialize_expert_llm(provider, model)
except Exception as e:
_model = None
console.print(
Panel(
f"Failed to initialize expert model: {e}",
title="Error",
border_style="red",
)
)
raise
return _model
# Keep track of context globally
expert_context = {
"text": [], # Additional textual context
"files": [], # File paths to include
}
@tool("emit_expert_context")
def emit_expert_context(context: str) -> str:
"""Add context for the next expert question.
This should be highly detailed contents such as entire sections of source code, etc.
Do not include your question in the additional context.
Err on the side of adding more context rather than less, but keep it information dense and under 500 words total.
You must give the complete contents.
Expert context will be reset after the ask_expert tool is called.
Args:
context: The context to add
"""
expert_context["text"].append(context)
# Record expert context in trajectory
try:
trajectory_repo = get_trajectory_repository()
human_input_id = get_human_input_repository().get_most_recent_id()
trajectory_repo.create(
tool_name="emit_expert_context",
tool_parameters={"context_length": len(context)},
step_data={
"display_title": "Expert Context",
"context_length": len(context),
},
record_type="tool_execution",
human_input_id=human_input_id
)
except Exception as e:
logger.error(f"Failed to record trajectory: {e}")
# Create and display status panel
panel_content = f"Added expert context ({len(context)} characters)"
console.print(Panel(panel_content, title="Expert Context", border_style="blue"))
return "Context added."
def read_files_with_limit(file_paths: List[str], max_lines: int = 10000) -> str:
"""Read multiple files and concatenate contents, stopping at line limit.
Args:
file_paths: List of file paths to read
max_lines: Maximum total lines to read (default: 10000)
Note:
- Each file's contents will be prefaced with its path as a header
- Stops reading files when max_lines limit is reached
- Files that would exceed the line limit are truncated
"""
total_lines = 0
contents = []
for path in file_paths:
try:
if not os.path.exists(path):
console.print(f"Warning: File not found: {path}", style="yellow")
continue
with open(path, "r", encoding="utf-8") as f:
file_content = []
for i, line in enumerate(f):
if total_lines + i >= max_lines:
file_content.append(
f"\n... truncated after {max_lines} lines ..."
)
break
file_content.append(line)
if file_content:
contents.append(f"\n## File: {path}\n")
contents.append("".join(file_content))
total_lines += len(file_content)
except Exception as e:
console.print(f"Error reading file {path}: {str(e)}", style="red")
continue
return "".join(contents)
def read_related_files(file_paths: List[str]) -> str:
"""Read the provided files and return their contents.
Args:
file_paths: List of file paths to read
Returns:
String containing concatenated file contents, or empty string if no paths
"""
if not file_paths:
return ""
return read_files_with_limit(file_paths, max_lines=10000)
@tool("ask_expert")
def ask_expert(question: str) -> str:
"""Ask a question to an expert AI model.
Keep your questions specific, but long and detailed.
You only query the expert when you have a specific question in mind.
The expert can be extremely useful at logic questions, debugging, and reviewing complex source code, but you must provide all context including source manually.
The expert can see any key facts and code snippets previously noted, along with any additional context you've provided.
But the expert cannot see or reason about anything you have not explicitly provided in this way.
Try to phrase your question in a way that it does not expand the scope of our top-level task.
The expert can be prone to overthinking depending on what and how you ask it.
"""
global expert_context
# Get all content first
file_paths = list(get_related_files_repository().get_all().values())
related_contents = read_related_files(file_paths)
# Get key snippets directly from repository and format using the formatter
try:
key_snippets = format_key_snippets_dict(get_key_snippet_repository().get_snippets_dict())
except RuntimeError as e:
logger.error(f"Failed to access key snippet repository: {str(e)}")
key_snippets = ""
# Get key facts directly from repository and format using the formatter
try:
facts_dict = get_key_fact_repository().get_facts_dict()
key_facts = format_key_facts_dict(facts_dict)
except RuntimeError as e:
logger.error(f"Failed to access key fact repository: {str(e)}")
key_facts = ""
# Get research notes directly from repository and format using the formatter
try:
repository = get_research_note_repository()
notes_dict = repository.get_notes_dict()
formatted_research_notes = format_research_notes_dict(notes_dict)
except RuntimeError as e:
logger.error(f"Failed to access research note repository: {str(e)}")
formatted_research_notes = ""
# Build display query (just question)
display_query = "# Question\n" + question
# Record expert query in trajectory
try:
trajectory_repo = get_trajectory_repository()
human_input_id = get_human_input_repository().get_most_recent_id()
trajectory_repo.create(
tool_name="ask_expert",
tool_parameters={"question": question},
step_data={
"display_title": "Expert Query",
"question": question,
},
record_type="tool_execution",
human_input_id=human_input_id
)
except Exception as e:
logger.error(f"Failed to record trajectory: {e}")
# Show only question in panel
console.print(
Panel(Markdown(display_query), title="🤔 Expert Query", border_style="yellow")
)
# Clear context after panel display
expert_context["text"].clear()
expert_context["files"].clear()
# Build full query in specified order
query_parts = []
if related_contents:
query_parts.extend(["# Related Files", related_contents])
if formatted_research_notes:
query_parts.extend(["# Research Notes", formatted_research_notes])
if key_snippets and len(key_snippets) > 0:
query_parts.extend(["# Key Snippets", key_snippets])
if key_facts and len(key_facts) > 0:
query_parts.extend(["# Key Facts About This Project", key_facts])
if expert_context["text"]:
query_parts.extend(
["\n# Additional Context", "\n".join(expert_context["text"])]
)
query_parts.extend(["# Question", question])
query_parts.extend(
[
"\n # Addidional Requirements",
"**DO NOT OVERTHINK**",
"**DO NOT OVERCOMPLICATE**",
]
)
# Ensure all elements in query_parts are strings before joining
query_parts = [str(part) for part in query_parts]
# Join all parts
full_query = "\n".join(query_parts)
# Get response using full query
response = get_model().invoke(full_query)
# Get the content from the response
content = response.content
logger.debug(f"Expert response content type: {type(content).__name__}")
# Check if model supports think tags
config_repo = get_config_repository()
provider = config_repo.get("expert_provider") or config_repo.get("provider")
model_name = config_repo.get("expert_model") or config_repo.get("model")
model_config = models_params.get(provider, {}).get(model_name, {})
supports_think_tag = model_config.get("supports_think_tag", False)
supports_thinking = model_config.get("supports_thinking", False)
logger.debug(f"Expert model: {provider}/{model_name}")
logger.debug(f"Model supports think tag: {supports_think_tag}")
logger.debug(f"Model supports thinking: {supports_thinking}")
# Process thinking content using the common processing function
try:
# Use the process_thinking_content function to handle both string and list responses
content, thinking = process_thinking_content(
content=content,
supports_think_tag=supports_think_tag,
supports_thinking=supports_thinking,
panel_title="💭 Thoughts",
panel_style="yellow",
logger=logger
)
except Exception as e:
logger.error(f"Exception during content processing: {str(e)}")
raise
# Record expert response in trajectory
try:
trajectory_repo = get_trajectory_repository()
human_input_id = get_human_input_repository().get_most_recent_id()
trajectory_repo.create(
tool_name="ask_expert",
tool_parameters={"question": question},
step_data={
"display_title": "Expert Response",
"response_length": len(content),
},
record_type="tool_execution",
human_input_id=human_input_id
)
except Exception as e:
logger.error(f"Failed to record trajectory: {e}")
# Format and display response
console.print(
Panel(Markdown(content), title="Expert Response", border_style="blue")
)
return content