improve work logging; use reasoning_effort=high for openai expert models; improve prompts
This commit is contained in:
parent
5fad3fc755
commit
f40e11ee21
|
|
@ -62,6 +62,7 @@ from ra_aid.tools.memory import (
|
|||
_global_memory,
|
||||
get_memory_value,
|
||||
get_related_files,
|
||||
log_work_event,
|
||||
)
|
||||
|
||||
console = Console()
|
||||
|
|
@ -405,7 +406,11 @@ def run_research_agent(
|
|||
|
||||
if agent is not None:
|
||||
logger.debug("Research agent completed successfully")
|
||||
return run_agent_with_retry(agent, prompt, run_config)
|
||||
_result = run_agent_with_retry(agent, prompt, run_config)
|
||||
if _result:
|
||||
# Log research completion
|
||||
log_work_event(f"Completed research phase for: {base_task_or_query}")
|
||||
return _result
|
||||
else:
|
||||
logger.debug("No model provided, running web research tools directly")
|
||||
return run_web_research_agent(
|
||||
|
|
@ -517,7 +522,11 @@ def run_web_research_agent(
|
|||
console.print(Panel(Markdown(console_message), title="🔬 Researching..."))
|
||||
|
||||
logger.debug("Web research agent completed successfully")
|
||||
return run_agent_with_retry(agent, prompt, run_config)
|
||||
_result = run_agent_with_retry(agent, prompt, run_config)
|
||||
if _result:
|
||||
# Log web research completion
|
||||
log_work_event(f"Completed web research phase for: {query}")
|
||||
return _result
|
||||
|
||||
except (KeyboardInterrupt, AgentInterrupt):
|
||||
raise
|
||||
|
|
@ -618,7 +627,11 @@ def run_planning_agent(
|
|||
try:
|
||||
print_stage_header("Planning Stage")
|
||||
logger.debug("Planning agent completed successfully")
|
||||
return run_agent_with_retry(agent, planning_prompt, run_config)
|
||||
_result = run_agent_with_retry(agent, planning_prompt, run_config)
|
||||
if _result:
|
||||
# Log planning completion
|
||||
log_work_event(f"Completed planning phase for: {base_task}")
|
||||
return _result
|
||||
except (KeyboardInterrupt, AgentInterrupt):
|
||||
raise
|
||||
except Exception as e:
|
||||
|
|
@ -719,7 +732,11 @@ def run_task_implementation_agent(
|
|||
|
||||
try:
|
||||
logger.debug("Implementation agent completed successfully")
|
||||
return run_agent_with_retry(agent, prompt, run_config)
|
||||
_result = run_agent_with_retry(agent, prompt, run_config)
|
||||
if _result:
|
||||
# Log task implementation completion
|
||||
log_work_event(f"Completed implementation of task: {task}")
|
||||
return _result
|
||||
except (KeyboardInterrupt, AgentInterrupt):
|
||||
raise
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -181,11 +181,14 @@ def create_llm_client(
|
|||
is_expert=is_expert,
|
||||
)
|
||||
elif provider == "openai":
|
||||
return ChatOpenAI(
|
||||
api_key=config["api_key"],
|
||||
model=model_name,
|
||||
openai_kwargs = {
|
||||
"api_key": config["api_key"],
|
||||
"model": model_name,
|
||||
**temp_kwargs,
|
||||
)
|
||||
}
|
||||
if is_expert:
|
||||
openai_kwargs["reasoning_effort"] = "high"
|
||||
return ChatOpenAI(**openai_kwargs)
|
||||
elif provider == "anthropic":
|
||||
return ChatAnthropic(
|
||||
api_key=config["api_key"],
|
||||
|
|
|
|||
|
|
@ -17,6 +17,8 @@ Expert Consultation:
|
|||
- Use emit_expert_context to provide all relevant context about what you've found
|
||||
- Wait for the expert response before proceeding with research
|
||||
- The expert can help analyze complex codebases, unclear patterns, or subtle edge cases
|
||||
|
||||
The expert is really good at logic, debugging and planning, but it only has access to the context you give it, and it is unable to access the outside world.
|
||||
"""
|
||||
|
||||
EXPERT_PROMPT_SECTION_PLANNING = """
|
||||
|
|
@ -25,6 +27,10 @@ Expert Consultation:
|
|||
- First use emit_expert_context to provide all relevant context
|
||||
- Wait for the expert's response before defining tasks in non-trivial scenarios
|
||||
- The expert can help with architectural decisions, correctness checks, and detailed planning
|
||||
|
||||
The expert is really good at logic, debugging and planning, but it only has access to the context you give it, and it is unable to access the outside world.
|
||||
|
||||
**ALWAYS** use the expert to come up with the high level plan.
|
||||
"""
|
||||
|
||||
EXPERT_PROMPT_SECTION_IMPLEMENTATION = """
|
||||
|
|
@ -33,6 +39,8 @@ Expert Consultation:
|
|||
- Use emit_expert_context to provide context about your specific concern
|
||||
- Ask the expert to perform deep analysis or correctness checks
|
||||
- Wait for expert guidance before proceeding with implementation
|
||||
|
||||
The expert is really good at logic, debugging and planning, but it only has access to the context you give it, and it is unable to access the outside world.
|
||||
"""
|
||||
|
||||
EXPERT_PROMPT_SECTION_CHAT = """
|
||||
|
|
@ -41,6 +49,8 @@ Expert Consultation:
|
|||
- Use emit_expert_context to provide the current conversation state, user requirements, and discovered details
|
||||
- Ask the expert for advice on handling ambiguous user requests or complex technical challenges, and to verify correctness
|
||||
- Wait for the expert’s guidance before making decisions that significantly alter the approach or final outcome
|
||||
|
||||
The expert is really good at logic, debugging and planning, but it only has access to the context you give it, and it is unable to access the outside world.
|
||||
"""
|
||||
|
||||
# Human-specific prompt sections
|
||||
|
|
@ -124,6 +134,8 @@ Because this is a new project
|
|||
Remember, this is the research phase. Your main focus right now is research and creating instructions for the implementation which will be handed off to the implementation team.
|
||||
Focus on finding best practices, idiomatic approaches, and using all available research tools as well as the expert, if available.
|
||||
Remember, our scope and capabilities are limited --unless the user specifically asks, we do not want to set up servers like postgres. We want to use sqlite or similar for initial implementation, but make it extensible.
|
||||
|
||||
If the expert tool is available, **ALWAYS** ask the expert to review and refine your research before requesting implementation.
|
||||
"""
|
||||
|
||||
# Research stage prompt - guides initial codebase analysis
|
||||
|
|
@ -597,7 +609,11 @@ IMPLEMENTATION_PROMPT = """Current Date: {current_date}
|
|||
Working Directory: {working_directory}
|
||||
|
||||
Base-level task (for reference only):
|
||||
{base_task} --keep it simple
|
||||
<base task>
|
||||
{base_task}
|
||||
</base task>
|
||||
|
||||
keep it simple. if the expert tool is available, use it frequently for high level logic and planning.
|
||||
|
||||
Plan Overview (for reference only, remember you are only implementing your specific task):
|
||||
{plan}
|
||||
|
|
|
|||
|
|
@ -10,12 +10,19 @@ from rich.text import Text
|
|||
from ra_aid.logging_config import get_logger
|
||||
from ra_aid.proc.interactive import run_interactive_command
|
||||
from ra_aid.text.processing import truncate_output
|
||||
from ra_aid.tools.memory import _global_memory
|
||||
from ra_aid.tools.memory import _global_memory, log_work_event
|
||||
|
||||
console = Console()
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def _truncate_for_log(text: str, max_length: int = 300) -> str:
|
||||
"""Truncate text for logging, adding [truncated] if necessary."""
|
||||
if len(text) <= max_length:
|
||||
return text
|
||||
return text[:max_length] + "... [truncated]"
|
||||
|
||||
|
||||
@tool
|
||||
def run_programming_task(
|
||||
instructions: str, files: List[str] = []
|
||||
|
|
@ -101,14 +108,17 @@ def run_programming_task(
|
|||
try:
|
||||
# Run the command interactively
|
||||
print()
|
||||
output, return_code = run_interactive_command(command)
|
||||
result = run_interactive_command(command)
|
||||
print()
|
||||
|
||||
# Log the programming task
|
||||
log_work_event(f"Executed programming task: {_truncate_for_log(instructions)}")
|
||||
|
||||
# Return structured output
|
||||
return {
|
||||
"output": truncate_output(output.decode() if output else ""),
|
||||
"return_code": return_code,
|
||||
"success": return_code == 0,
|
||||
"output": truncate_output(result[0].decode()) if result[0] else "",
|
||||
"return_code": result[1],
|
||||
"success": result[1] == 0,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -8,11 +8,18 @@ from rich.prompt import Prompt
|
|||
from ra_aid.console.cowboy_messages import get_cowboy_message
|
||||
from ra_aid.proc.interactive import run_interactive_command
|
||||
from ra_aid.text.processing import truncate_output
|
||||
from ra_aid.tools.memory import _global_memory
|
||||
from ra_aid.tools.memory import _global_memory, log_work_event
|
||||
|
||||
console = Console()
|
||||
|
||||
|
||||
def _truncate_for_log(text: str, max_length: int = 300) -> str:
|
||||
"""Truncate text for logging, adding [truncated] if necessary."""
|
||||
if len(text) <= max_length:
|
||||
return text
|
||||
return text[:max_length] + "... [truncated]"
|
||||
|
||||
|
||||
@tool
|
||||
def run_shell_command(command: str) -> Dict[str, Union[str, int, bool]]:
|
||||
"""Execute a shell command and return its output.
|
||||
|
|
@ -68,11 +75,13 @@ def run_shell_command(command: str) -> Dict[str, Union[str, int, bool]]:
|
|||
print()
|
||||
output, return_code = run_interactive_command(["/bin/bash", "-c", command])
|
||||
print()
|
||||
return {
|
||||
result = {
|
||||
"output": truncate_output(output.decode()) if output else "",
|
||||
"return_code": return_code,
|
||||
"success": return_code == 0,
|
||||
}
|
||||
log_work_event(f"Executed shell command: {_truncate_for_log(command)}")
|
||||
return result
|
||||
except Exception as e:
|
||||
print()
|
||||
console.print(Panel(str(e), title="❌ Error", border_style="red"))
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ def test_initialize_expert_defaults(clean_env, mock_openai, monkeypatch):
|
|||
monkeypatch.setenv("EXPERT_OPENAI_API_KEY", "test-key")
|
||||
_llm = initialize_expert_llm()
|
||||
|
||||
mock_openai.assert_called_once_with(api_key="test-key", model="o1")
|
||||
mock_openai.assert_called_once_with(api_key="test-key", model="o1", reasoning_effort="high")
|
||||
|
||||
|
||||
def test_initialize_expert_openai_custom(clean_env, mock_openai, monkeypatch):
|
||||
|
|
@ -63,7 +63,7 @@ def test_initialize_expert_openai_custom(clean_env, mock_openai, monkeypatch):
|
|||
_llm = initialize_expert_llm("openai", "gpt-4-preview")
|
||||
|
||||
mock_openai.assert_called_once_with(
|
||||
api_key="test-key", model="gpt-4-preview", temperature=0
|
||||
api_key="test-key", model="gpt-4-preview", temperature=0, reasoning_effort="high"
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -348,7 +348,7 @@ def test_environment_variable_precedence(clean_env, mock_openai, monkeypatch):
|
|||
|
||||
# Test LLM client creation with expert mode
|
||||
_llm = create_llm_client("openai", "o1", is_expert=True)
|
||||
mock_openai.assert_called_with(api_key="expert-key", model="o1")
|
||||
mock_openai.assert_called_with(api_key="expert-key", model="o1", reasoning_effort="high")
|
||||
|
||||
# Test environment validation
|
||||
monkeypatch.setenv("EXPERT_OPENAI_API_KEY", "")
|
||||
|
|
|
|||
Loading…
Reference in New Issue