From 99d0b1f44056bc4489ecdcbee0fcf2bb0954ed76 Mon Sep 17 00:00:00 2001
From: AI Christianson <ai.christianson@christianson.ai>
Date: Sat, 28 Dec 2024 19:12:00 -0500
Subject: [PATCH] Improve prompts.

---
 ra_aid/agents/ciayn_agent.py | 8 +++++---
 ra_aid/prompts.py            | 9 +++++++--
 2 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/ra_aid/agents/ciayn_agent.py b/ra_aid/agents/ciayn_agent.py
index 1e32dc2..cf6a3af 100644
--- a/ra_aid/agents/ciayn_agent.py
+++ b/ra_aid/agents/ciayn_agent.py
@@ -2,7 +2,7 @@ import inspect
 from dataclasses import dataclass
 from typing import Dict, Any, Generator, List, Optional, Union
 
-from langchain_core.messages import AIMessage, HumanMessage, BaseMessage
+from langchain_core.messages import AIMessage, HumanMessage, BaseMessage, SystemMessage
 from ra_aid.exceptions import ToolExecutionError
 from ra_aid.logging_config import get_logger
 
@@ -107,6 +107,7 @@ You must ONLY use ONE of the following functions (these are the ONLY functions t
 
 You may use ANY of the above functions to complete your job. Use the best one for the current step you are on. Be efficient, avoid getting stuck in repetitive loops, and do not hesitate to call functions which delegate your work to make your life easier.
 But you MUST NOT assume tools exist that are not in the above list, e.g. write_file_tool.
+Consider your task done only once you have taken *ALL* the steps required to complete it.
 
 <example bad output>
 write_file_tool(...)
@@ -128,8 +129,9 @@ Implement a widget factory satisfying the following requirements:
 ...
 \"\"\")
 </example good output>
-
+DO NOT CLAIM YOU ARE FINISHED UNTIL YOU ACTUALLY ARE!
 Output **ONLY THE CODE** and **NO MARKDOWN BACKTICKS**"""
+
         return base_prompt
 
     def _execute_tool(self, code: str) -> str:
@@ -233,7 +235,7 @@ Output **ONLY THE CODE** and **NO MARKDOWN BACKTICKS**"""
             chat_history.append(HumanMessage(content=base_prompt))
             
             full_history = self._trim_chat_history(initial_messages, chat_history)
-            response = self.model.invoke(full_history)
+            response = self.model.invoke([SystemMessage("Execute efficiently yet completely as a fully autonomous agent.")] + full_history)
                 
             try:
                 logger.debug(f"Code generated by agent: {response.content}")
diff --git a/ra_aid/prompts.py b/ra_aid/prompts.py
index ed31f44..cb6317f 100644
--- a/ra_aid/prompts.py
+++ b/ra_aid/prompts.py
@@ -308,6 +308,7 @@ You have often been criticized for:
     - Expanding beyond the original query scope
     - Not clearly organizing output around the query
     - Not indicating confidence levels or noting uncertainties
+    - Instantly claiming the task has been complete before you have done any work at all.
 
 NEVER ANNOUNCE WHAT YOU ARE DOING, JUST DO IT!
 """
@@ -409,8 +410,12 @@ Thoroughness and Completeness
     - Not requesting enough research subtasks on changes on large projects, e.g. to discover testing or UI conventions, etc.
 
 You have often been criticized for:
-  - Needlessly requesting more research tasks, especially for general background knowledge which you already know.
-  - Not requesting more research tasks when it is truly called for, e.g. to dig deeper into a specific aspect of a monorepo project.
+    - Not searching thoroughly enough before emitting findings
+    - Missing key sources or perspectives
+    - Not properly citing information
+    - Expanding beyond the original query scope
+    - Not clearly organizing output around the query
+    - Not indicating confidence levels or noting uncertainties
 
 NEVER ANNOUNCE WHAT YOU ARE DOING, JUST DO IT!
 """