Adjustments to get smaller agent models working better.

2024-12-28 18:19:42 -05:00 · 2024-12-28 18:19:42 -05:00 · ace34633de
parent 35db6b633f
commit ace34633de
5 changed files with 40 additions and 46 deletions
--- a/ra_aid/agent_utils.py
+++ b/ra_aid/agent_utils.py
@ -316,38 +316,8 @@ def run_web_research_agent(
        if console_message:
            console.print(Panel(Markdown(console_message), title="🔬 Researching..."))

-        # Run agent with retry logic if available
-        if agent is not None:
-            logger.debug("Web research agent completed successfully")
-            return run_agent_with_retry(agent, prompt, run_config)
-        else:
-            # Just use the web research tools directly
-            logger.debug("No model provided, using web research tools directly")
-            tavily_tool = next((tool for tool in tools if tool.name == 'web_search_tavily'), None)
-
-            if not tavily_tool:
-                return "No web research results found"
-
-            result = tavily_tool.invoke({"query": query})
-            if not result:
-                return "No web research results found"
-
-            # Format Tavily results
-            markdown_result = "# Search Results\n\n"
-            for item in result.get('results', []):
-                title = item.get('title', 'Untitled')
-                url = item.get('url', '')
-                content = item.get('content', '')
-                score = item.get('score', 0)
-
-                markdown_result += f"## {title}\n"
-                markdown_result += f"**Score**: {score:.2f}\n\n"
-                markdown_result += f"{content}\n\n"
-                markdown_result += f"[Read more]({url})\n\n"
-                markdown_result += "---\n\n"
-
-            console.print(Panel(Markdown(markdown_result), title="🔍 Web Research Results"))
-            return markdown_result
+        logger.debug("Web research agent completed successfully")
+        return run_agent_with_retry(agent, prompt, run_config)

    except (KeyboardInterrupt, AgentInterrupt):
        raise
@ -571,7 +541,7 @@ def run_agent_with_retry(agent, prompt: str, config: dict) -> Optional[str]:
                        print_agent_output(chunk)
                        if _global_memory['plan_completed']:
                            _global_memory['plan_completed'] = False
-                            _global_memory['task_completed'] = False
+                            _global_memory['taskd_completed'] = False
                            _global_memory['completion_message'] = ''
                            break
                        if _global_memory['task_completed'] or _global_memory['plan_completed']:
--- a/ra_aid/agents/ciayn_agent.py
+++ b/ra_aid/agents/ciayn_agent.py
@ -1,8 +1,13 @@
 import inspect
 from dataclasses import dataclass
 from typing import Dict, Any, Generator, List, Optional, Union
+
 from langchain_core.messages import AIMessage, HumanMessage, BaseMessage
 from ra_aid.exceptions import ToolExecutionError
+from ra_aid.logging_config import get_logger
+
+logger = get_logger(__name__)
+
@dataclass
 class ChunkMessage:
    content: str
@ -88,10 +93,11 @@ The user cannot see the results of function calls, so you have to explicitly use
 You must always respond with a single line of python that calls one of the available tools.
 Use as many steps as you need to in order to fully complete the task.
 Start by asking the user what they want.
-</agent instructions>

 You must carefully review the conversation history, which functions were called so far, returned results, etc., and make sure the very next function call you make makes sense in order to achieve the original goal.
 You must achieve the goal in as few steps possible, but no fewer.
+You typically don't want to keep calling the same function over and over with the same parameters.
+</agent instructions>

 You must ONLY use ONE of the following functions (these are the ONLY functions that exist):

@ -99,11 +105,29 @@ You must ONLY use ONE of the following functions (these are the ONLY functions t
 {"\n\n".join(self.available_functions)}
 </available functions>

-<example output>
+You may use ANY of the above functions to complete your job. Use the best one for the current step you are on. Be efficient, avoid getting stuck in repetitive loops, and do not hesitate to call functions which delegate your work to make your life easier.
+But you MUST NOT assume tools exist that are not in the above list, e.g. write_file_tool.
+
+<example bad output>
+write_file_tool(...)
+</example bad output>
+
+<example good output>
 request_research_and_implementation(\"\"\"
 Example query.
 \"\"\")
-</example output>
+
+run_programming_task(\"\"\"
+# Example Programming Task
+
+Implement a widget factory satisfying the following requirements:
+
+- Requirement A
+- Requirement B
+
+...
+\"\"\")
+</example good output>

 Output **ONLY THE CODE** and **NO MARKDOWN BACKTICKS**"""
        return base_prompt
@ -212,6 +236,7 @@ Output **ONLY THE CODE** and **NO MARKDOWN BACKTICKS**"""
            response = self.model.invoke(full_history)
                
            try:
+                logger.debug(f"Code generated by agent: {response.content}")
                last_result = self._execute_tool(response.content)
                chat_history.append(response)
                first_iteration = False
--- a/ra_aid/prompts.py
+++ b/ra_aid/prompts.py
@ -227,6 +227,8 @@ If uncertain at any stage, consult the expert (if expert is available) for final

 If this is a top-level README.md or docs folder, start there. If relevant tests exist, run them upfront as part of the research phase to establish a baseline.

+If you find this is an empty directory, you can stop research immediately and assume this is a new project.
+
 You have often been criticized for:
  - Needlessly requesting more research tasks, especially for general background knowledge which you already know.
  - Not requesting more research tasks when it is truly called for, e.g. to dig deeper into a specific aspect of a monorepo project.
@ -480,7 +482,9 @@ Guidelines:

 You have often been criticized for:
  - Overcomplicating things.
+  - Researching things that are already in your research notes.
  - Doing the same work over and over across tasks.
+    - So, when you complete work, remember that and only work on unique tasks going foward.
  - Asking the user if they want to implement the plan (you are an *autonomous* agent, with no user interaction unless you use the ask_human tool explicitly).

 NEVER ANNOUNCE WHAT YOU ARE DOING, JUST DO IT!
--- a/ra_aid/tools/directory.py
+++ b/ra_aid/tools/directory.py
--- a/ra_aid/tools/programmer.py
+++ b/ra_aid/tools/programmer.py
@ -12,14 +12,9 @@ from ra_aid.text.processing import truncate_output

 console = Console()

-
-class RunProgrammingTaskInput(BaseModel):
-    instructions: str = Field(description="Instructions for the programming task")
-    files: Optional[List[str]] = Field(None, description="Optional list of files for Aider to examine")
-
@tool
-def run_programming_task(input: RunProgrammingTaskInput) -> Dict[str, Union[str, int, bool]]:
-    """Assign a programming task to a human programmer.
+def run_programming_task(instructions: str, files: List[str] = []) -> Dict[str, Union[str, int, bool]]:
+    """Assign a programming task to a human programmer. Use this instead of trying to write code to files yourself.

 Before using this tool, ensure all related files have been emitted with emit_related_files.

@ -60,17 +55,17 @@ Returns: { "output": stdout+stderr, "return_code": 0 if success, "success": True
    # ensure message aider argument is always present
    command.append("-m")

-    command.append(input.instructions)
+    command.append(instructions)

    # Add files to command
-    files_to_use = file_paths + (input.files or [])
+    files_to_use = file_paths + (files or [])
    if files_to_use:
        command.extend(files_to_use)

    # Create a pretty display of what we're doing
    task_display = [
        "## Instructions\n",
-        f"{input.instructions}\n"
+        f"{instructions}\n"
    ]

    if files_to_use: