diff --git a/ra_aid/agent_utils.py b/ra_aid/agent_utils.py index 132ff57..cdeac25 100644 --- a/ra_aid/agent_utils.py +++ b/ra_aid/agent_utils.py @@ -62,6 +62,7 @@ from ra_aid.tools.memory import ( _global_memory, get_memory_value, get_related_files, + log_work_event, ) console = Console() @@ -405,7 +406,11 @@ def run_research_agent( if agent is not None: logger.debug("Research agent completed successfully") - return run_agent_with_retry(agent, prompt, run_config) + _result = run_agent_with_retry(agent, prompt, run_config) + if _result: + # Log research completion + log_work_event(f"Completed research phase for: {base_task_or_query}") + return _result else: logger.debug("No model provided, running web research tools directly") return run_web_research_agent( @@ -517,7 +522,11 @@ def run_web_research_agent( console.print(Panel(Markdown(console_message), title="🔬 Researching...")) logger.debug("Web research agent completed successfully") - return run_agent_with_retry(agent, prompt, run_config) + _result = run_agent_with_retry(agent, prompt, run_config) + if _result: + # Log web research completion + log_work_event(f"Completed web research phase for: {query}") + return _result except (KeyboardInterrupt, AgentInterrupt): raise @@ -618,7 +627,11 @@ def run_planning_agent( try: print_stage_header("Planning Stage") logger.debug("Planning agent completed successfully") - return run_agent_with_retry(agent, planning_prompt, run_config) + _result = run_agent_with_retry(agent, planning_prompt, run_config) + if _result: + # Log planning completion + log_work_event(f"Completed planning phase for: {base_task}") + return _result except (KeyboardInterrupt, AgentInterrupt): raise except Exception as e: @@ -719,7 +732,11 @@ def run_task_implementation_agent( try: logger.debug("Implementation agent completed successfully") - return run_agent_with_retry(agent, prompt, run_config) + _result = run_agent_with_retry(agent, prompt, run_config) + if _result: + # Log task implementation completion + log_work_event(f"Completed implementation of task: {task}") + return _result except (KeyboardInterrupt, AgentInterrupt): raise except Exception as e: diff --git a/ra_aid/llm.py b/ra_aid/llm.py index 3080813..f95509e 100644 --- a/ra_aid/llm.py +++ b/ra_aid/llm.py @@ -181,11 +181,14 @@ def create_llm_client( is_expert=is_expert, ) elif provider == "openai": - return ChatOpenAI( - api_key=config["api_key"], - model=model_name, + openai_kwargs = { + "api_key": config["api_key"], + "model": model_name, **temp_kwargs, - ) + } + if is_expert: + openai_kwargs["reasoning_effort"] = "high" + return ChatOpenAI(**openai_kwargs) elif provider == "anthropic": return ChatAnthropic( api_key=config["api_key"], diff --git a/ra_aid/prompts.py b/ra_aid/prompts.py index 1c43567..f17cc00 100644 --- a/ra_aid/prompts.py +++ b/ra_aid/prompts.py @@ -17,6 +17,8 @@ Expert Consultation: - Use emit_expert_context to provide all relevant context about what you've found - Wait for the expert response before proceeding with research - The expert can help analyze complex codebases, unclear patterns, or subtle edge cases + +The expert is really good at logic, debugging and planning, but it only has access to the context you give it, and it is unable to access the outside world. """ EXPERT_PROMPT_SECTION_PLANNING = """ @@ -25,6 +27,10 @@ Expert Consultation: - First use emit_expert_context to provide all relevant context - Wait for the expert's response before defining tasks in non-trivial scenarios - The expert can help with architectural decisions, correctness checks, and detailed planning + +The expert is really good at logic, debugging and planning, but it only has access to the context you give it, and it is unable to access the outside world. + +**ALWAYS** use the expert to come up with the high level plan. """ EXPERT_PROMPT_SECTION_IMPLEMENTATION = """ @@ -33,6 +39,8 @@ Expert Consultation: - Use emit_expert_context to provide context about your specific concern - Ask the expert to perform deep analysis or correctness checks - Wait for expert guidance before proceeding with implementation + +The expert is really good at logic, debugging and planning, but it only has access to the context you give it, and it is unable to access the outside world. """ EXPERT_PROMPT_SECTION_CHAT = """ @@ -41,6 +49,8 @@ Expert Consultation: - Use emit_expert_context to provide the current conversation state, user requirements, and discovered details - Ask the expert for advice on handling ambiguous user requests or complex technical challenges, and to verify correctness - Wait for the expert’s guidance before making decisions that significantly alter the approach or final outcome + +The expert is really good at logic, debugging and planning, but it only has access to the context you give it, and it is unable to access the outside world. """ # Human-specific prompt sections @@ -124,6 +134,8 @@ Because this is a new project Remember, this is the research phase. Your main focus right now is research and creating instructions for the implementation which will be handed off to the implementation team. Focus on finding best practices, idiomatic approaches, and using all available research tools as well as the expert, if available. Remember, our scope and capabilities are limited --unless the user specifically asks, we do not want to set up servers like postgres. We want to use sqlite or similar for initial implementation, but make it extensible. + +If the expert tool is available, **ALWAYS** ask the expert to review and refine your research before requesting implementation. """ # Research stage prompt - guides initial codebase analysis @@ -597,7 +609,11 @@ IMPLEMENTATION_PROMPT = """Current Date: {current_date} Working Directory: {working_directory} Base-level task (for reference only): -{base_task} --keep it simple + +{base_task} + + +keep it simple. if the expert tool is available, use it frequently for high level logic and planning. Plan Overview (for reference only, remember you are only implementing your specific task): {plan} diff --git a/ra_aid/tools/programmer.py b/ra_aid/tools/programmer.py index a9e6b8a..008a737 100644 --- a/ra_aid/tools/programmer.py +++ b/ra_aid/tools/programmer.py @@ -10,12 +10,19 @@ from rich.text import Text from ra_aid.logging_config import get_logger from ra_aid.proc.interactive import run_interactive_command from ra_aid.text.processing import truncate_output -from ra_aid.tools.memory import _global_memory +from ra_aid.tools.memory import _global_memory, log_work_event console = Console() logger = get_logger(__name__) +def _truncate_for_log(text: str, max_length: int = 300) -> str: + """Truncate text for logging, adding [truncated] if necessary.""" + if len(text) <= max_length: + return text + return text[:max_length] + "... [truncated]" + + @tool def run_programming_task( instructions: str, files: List[str] = [] @@ -101,14 +108,17 @@ def run_programming_task( try: # Run the command interactively print() - output, return_code = run_interactive_command(command) + result = run_interactive_command(command) print() + # Log the programming task + log_work_event(f"Executed programming task: {_truncate_for_log(instructions)}") + # Return structured output return { - "output": truncate_output(output.decode() if output else ""), - "return_code": return_code, - "success": return_code == 0, + "output": truncate_output(result[0].decode()) if result[0] else "", + "return_code": result[1], + "success": result[1] == 0, } except Exception as e: diff --git a/ra_aid/tools/shell.py b/ra_aid/tools/shell.py index 271128a..a9d232d 100644 --- a/ra_aid/tools/shell.py +++ b/ra_aid/tools/shell.py @@ -8,11 +8,18 @@ from rich.prompt import Prompt from ra_aid.console.cowboy_messages import get_cowboy_message from ra_aid.proc.interactive import run_interactive_command from ra_aid.text.processing import truncate_output -from ra_aid.tools.memory import _global_memory +from ra_aid.tools.memory import _global_memory, log_work_event console = Console() +def _truncate_for_log(text: str, max_length: int = 300) -> str: + """Truncate text for logging, adding [truncated] if necessary.""" + if len(text) <= max_length: + return text + return text[:max_length] + "... [truncated]" + + @tool def run_shell_command(command: str) -> Dict[str, Union[str, int, bool]]: """Execute a shell command and return its output. @@ -68,11 +75,13 @@ def run_shell_command(command: str) -> Dict[str, Union[str, int, bool]]: print() output, return_code = run_interactive_command(["/bin/bash", "-c", command]) print() - return { + result = { "output": truncate_output(output.decode()) if output else "", "return_code": return_code, "success": return_code == 0, } + log_work_event(f"Executed shell command: {_truncate_for_log(command)}") + return result except Exception as e: print() console.print(Panel(str(e), title="❌ Error", border_style="red")) diff --git a/tests/ra_aid/test_llm.py b/tests/ra_aid/test_llm.py index d81a2f3..1f16ad2 100644 --- a/tests/ra_aid/test_llm.py +++ b/tests/ra_aid/test_llm.py @@ -54,7 +54,7 @@ def test_initialize_expert_defaults(clean_env, mock_openai, monkeypatch): monkeypatch.setenv("EXPERT_OPENAI_API_KEY", "test-key") _llm = initialize_expert_llm() - mock_openai.assert_called_once_with(api_key="test-key", model="o1") + mock_openai.assert_called_once_with(api_key="test-key", model="o1", reasoning_effort="high") def test_initialize_expert_openai_custom(clean_env, mock_openai, monkeypatch): @@ -63,7 +63,7 @@ def test_initialize_expert_openai_custom(clean_env, mock_openai, monkeypatch): _llm = initialize_expert_llm("openai", "gpt-4-preview") mock_openai.assert_called_once_with( - api_key="test-key", model="gpt-4-preview", temperature=0 + api_key="test-key", model="gpt-4-preview", temperature=0, reasoning_effort="high" ) @@ -348,7 +348,7 @@ def test_environment_variable_precedence(clean_env, mock_openai, monkeypatch): # Test LLM client creation with expert mode _llm = create_llm_client("openai", "o1", is_expert=True) - mock_openai.assert_called_with(api_key="expert-key", model="o1") + mock_openai.assert_called_with(api_key="expert-key", model="o1", reasoning_effort="high") # Test environment validation monkeypatch.setenv("EXPERT_OPENAI_API_KEY", "")