improve work logging; use reasoning_effort=high for openai expert models; improve prompts

This commit is contained in:
AI Christianson 2025-02-08 14:36:08 -05:00
parent 5fad3fc755
commit f40e11ee21
6 changed files with 74 additions and 19 deletions

View File

@ -62,6 +62,7 @@ from ra_aid.tools.memory import (
_global_memory,
get_memory_value,
get_related_files,
log_work_event,
)
console = Console()
@ -405,7 +406,11 @@ def run_research_agent(
if agent is not None:
logger.debug("Research agent completed successfully")
return run_agent_with_retry(agent, prompt, run_config)
_result = run_agent_with_retry(agent, prompt, run_config)
if _result:
# Log research completion
log_work_event(f"Completed research phase for: {base_task_or_query}")
return _result
else:
logger.debug("No model provided, running web research tools directly")
return run_web_research_agent(
@ -517,7 +522,11 @@ def run_web_research_agent(
console.print(Panel(Markdown(console_message), title="🔬 Researching..."))
logger.debug("Web research agent completed successfully")
return run_agent_with_retry(agent, prompt, run_config)
_result = run_agent_with_retry(agent, prompt, run_config)
if _result:
# Log web research completion
log_work_event(f"Completed web research phase for: {query}")
return _result
except (KeyboardInterrupt, AgentInterrupt):
raise
@ -618,7 +627,11 @@ def run_planning_agent(
try:
print_stage_header("Planning Stage")
logger.debug("Planning agent completed successfully")
return run_agent_with_retry(agent, planning_prompt, run_config)
_result = run_agent_with_retry(agent, planning_prompt, run_config)
if _result:
# Log planning completion
log_work_event(f"Completed planning phase for: {base_task}")
return _result
except (KeyboardInterrupt, AgentInterrupt):
raise
except Exception as e:
@ -719,7 +732,11 @@ def run_task_implementation_agent(
try:
logger.debug("Implementation agent completed successfully")
return run_agent_with_retry(agent, prompt, run_config)
_result = run_agent_with_retry(agent, prompt, run_config)
if _result:
# Log task implementation completion
log_work_event(f"Completed implementation of task: {task}")
return _result
except (KeyboardInterrupt, AgentInterrupt):
raise
except Exception as e:

View File

@ -181,11 +181,14 @@ def create_llm_client(
is_expert=is_expert,
)
elif provider == "openai":
return ChatOpenAI(
api_key=config["api_key"],
model=model_name,
openai_kwargs = {
"api_key": config["api_key"],
"model": model_name,
**temp_kwargs,
)
}
if is_expert:
openai_kwargs["reasoning_effort"] = "high"
return ChatOpenAI(**openai_kwargs)
elif provider == "anthropic":
return ChatAnthropic(
api_key=config["api_key"],

View File

@ -17,6 +17,8 @@ Expert Consultation:
- Use emit_expert_context to provide all relevant context about what you've found
- Wait for the expert response before proceeding with research
- The expert can help analyze complex codebases, unclear patterns, or subtle edge cases
The expert is really good at logic, debugging and planning, but it only has access to the context you give it, and it is unable to access the outside world.
"""
EXPERT_PROMPT_SECTION_PLANNING = """
@ -25,6 +27,10 @@ Expert Consultation:
- First use emit_expert_context to provide all relevant context
- Wait for the expert's response before defining tasks in non-trivial scenarios
- The expert can help with architectural decisions, correctness checks, and detailed planning
The expert is really good at logic, debugging and planning, but it only has access to the context you give it, and it is unable to access the outside world.
**ALWAYS** use the expert to come up with the high level plan.
"""
EXPERT_PROMPT_SECTION_IMPLEMENTATION = """
@ -33,6 +39,8 @@ Expert Consultation:
- Use emit_expert_context to provide context about your specific concern
- Ask the expert to perform deep analysis or correctness checks
- Wait for expert guidance before proceeding with implementation
The expert is really good at logic, debugging and planning, but it only has access to the context you give it, and it is unable to access the outside world.
"""
EXPERT_PROMPT_SECTION_CHAT = """
@ -41,6 +49,8 @@ Expert Consultation:
- Use emit_expert_context to provide the current conversation state, user requirements, and discovered details
- Ask the expert for advice on handling ambiguous user requests or complex technical challenges, and to verify correctness
- Wait for the experts guidance before making decisions that significantly alter the approach or final outcome
The expert is really good at logic, debugging and planning, but it only has access to the context you give it, and it is unable to access the outside world.
"""
# Human-specific prompt sections
@ -124,6 +134,8 @@ Because this is a new project
Remember, this is the research phase. Your main focus right now is research and creating instructions for the implementation which will be handed off to the implementation team.
Focus on finding best practices, idiomatic approaches, and using all available research tools as well as the expert, if available.
Remember, our scope and capabilities are limited --unless the user specifically asks, we do not want to set up servers like postgres. We want to use sqlite or similar for initial implementation, but make it extensible.
If the expert tool is available, **ALWAYS** ask the expert to review and refine your research before requesting implementation.
"""
# Research stage prompt - guides initial codebase analysis
@ -597,7 +609,11 @@ IMPLEMENTATION_PROMPT = """Current Date: {current_date}
Working Directory: {working_directory}
Base-level task (for reference only):
{base_task} --keep it simple
<base task>
{base_task}
</base task>
keep it simple. if the expert tool is available, use it frequently for high level logic and planning.
Plan Overview (for reference only, remember you are only implementing your specific task):
{plan}

View File

@ -10,12 +10,19 @@ from rich.text import Text
from ra_aid.logging_config import get_logger
from ra_aid.proc.interactive import run_interactive_command
from ra_aid.text.processing import truncate_output
from ra_aid.tools.memory import _global_memory
from ra_aid.tools.memory import _global_memory, log_work_event
console = Console()
logger = get_logger(__name__)
def _truncate_for_log(text: str, max_length: int = 300) -> str:
"""Truncate text for logging, adding [truncated] if necessary."""
if len(text) <= max_length:
return text
return text[:max_length] + "... [truncated]"
@tool
def run_programming_task(
instructions: str, files: List[str] = []
@ -101,14 +108,17 @@ def run_programming_task(
try:
# Run the command interactively
print()
output, return_code = run_interactive_command(command)
result = run_interactive_command(command)
print()
# Log the programming task
log_work_event(f"Executed programming task: {_truncate_for_log(instructions)}")
# Return structured output
return {
"output": truncate_output(output.decode() if output else ""),
"return_code": return_code,
"success": return_code == 0,
"output": truncate_output(result[0].decode()) if result[0] else "",
"return_code": result[1],
"success": result[1] == 0,
}
except Exception as e:

View File

@ -8,11 +8,18 @@ from rich.prompt import Prompt
from ra_aid.console.cowboy_messages import get_cowboy_message
from ra_aid.proc.interactive import run_interactive_command
from ra_aid.text.processing import truncate_output
from ra_aid.tools.memory import _global_memory
from ra_aid.tools.memory import _global_memory, log_work_event
console = Console()
def _truncate_for_log(text: str, max_length: int = 300) -> str:
"""Truncate text for logging, adding [truncated] if necessary."""
if len(text) <= max_length:
return text
return text[:max_length] + "... [truncated]"
@tool
def run_shell_command(command: str) -> Dict[str, Union[str, int, bool]]:
"""Execute a shell command and return its output.
@ -68,11 +75,13 @@ def run_shell_command(command: str) -> Dict[str, Union[str, int, bool]]:
print()
output, return_code = run_interactive_command(["/bin/bash", "-c", command])
print()
return {
result = {
"output": truncate_output(output.decode()) if output else "",
"return_code": return_code,
"success": return_code == 0,
}
log_work_event(f"Executed shell command: {_truncate_for_log(command)}")
return result
except Exception as e:
print()
console.print(Panel(str(e), title="❌ Error", border_style="red"))

View File

@ -54,7 +54,7 @@ def test_initialize_expert_defaults(clean_env, mock_openai, monkeypatch):
monkeypatch.setenv("EXPERT_OPENAI_API_KEY", "test-key")
_llm = initialize_expert_llm()
mock_openai.assert_called_once_with(api_key="test-key", model="o1")
mock_openai.assert_called_once_with(api_key="test-key", model="o1", reasoning_effort="high")
def test_initialize_expert_openai_custom(clean_env, mock_openai, monkeypatch):
@ -63,7 +63,7 @@ def test_initialize_expert_openai_custom(clean_env, mock_openai, monkeypatch):
_llm = initialize_expert_llm("openai", "gpt-4-preview")
mock_openai.assert_called_once_with(
api_key="test-key", model="gpt-4-preview", temperature=0
api_key="test-key", model="gpt-4-preview", temperature=0, reasoning_effort="high"
)
@ -348,7 +348,7 @@ def test_environment_variable_precedence(clean_env, mock_openai, monkeypatch):
# Test LLM client creation with expert mode
_llm = create_llm_client("openai", "o1", is_expert=True)
mock_openai.assert_called_with(api_key="expert-key", model="o1")
mock_openai.assert_called_with(api_key="expert-key", model="o1", reasoning_effort="high")
# Test environment validation
monkeypatch.setenv("EXPERT_OPENAI_API_KEY", "")