[
  {
    "path": ".gitattributes",
    "content": "*.py text eol=lf"
  },
  {
    "path": ".gitignore",
    "content": "__pycache__/\n*.py[cod]\n*$py.class\n\n.env\nenv/\n.venv\n*venv/\n\n.vscode/\n.idea/\n\nmodels\nllama/\n\n# for node\nchroma/\nnode_modules/\n.DS_Store"
  },
  {
    "path": "LICENSE",
    "content": "MIT License\n\nCopyright (c) 2023 by Kroll Software-Entwicklung\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
  },
  {
    "path": "README.md",
    "content": "# BabyAGI4All\n\nA small autonomous AI agent based on [BabyAGI](https://github.com/yoheinakajima/babyagi) by Yohei Nakajima.\n</br>\n\nRuns on CPU with the [GPT4All](https://github.com/nomic-ai/gpt4all) model by Nomic AI.\n</br>\n\n100% open source, 100% local, no API-keys needed.\n</br>\n\n# Installation:\n\n1. Clone this repository\n2. Install the requirements: *pip install -r requirements.txt*\n3. Download a model file (see below)\n4. Copy the file *.env.example* to *.env*\n4. Edit the model-path and other preferences in the file *.env*\n\n## Model Downloads\n\nThe following model files have been tested successfully:\n\n* *gpt4all-lora-quantized-ggml.bin*\n* *ggml-wizardLM-7B.q4_2.bin*\n* *ggml-vicuna-7b-1.1-q4_2.bin*\n\nSome of these model files can be downloaded from [here](https://github.com/nomic-ai/gpt4all-chat#manual-download-of-models).\n</br>\n</br>\n\nThen run *python babyagi.py*\n</br>\n\nHave fun!\n</br>\n"
  },
  {
    "path": "babyagi.py",
    "content": "import os\nimport time\nimport logging\nfrom collections import deque\nfrom typing import Dict, List\nimport importlib\nimport chromadb\nfrom dotenv import load_dotenv\nfrom chromadb.api.types import Documents, EmbeddingFunction, Embeddings\nfrom chromadb.utils.embedding_functions import InstructorEmbeddingFunction\nfrom llama_cpp import Llama\n\n# Load default environment variables (.env)\nload_dotenv()\n\n# Engine configuration\nLLM_MODEL = \"GPT4All\"\n\n# Table config\nRESULTS_STORE_NAME = os.getenv(\"RESULTS_STORE_NAME\", os.getenv(\"TABLE_NAME\", \"\"))\nassert RESULTS_STORE_NAME, \"\\033[91m\\033[1m\" + \"RESULTS_STORE_NAME environment variable is missing from .env\" + \"\\033[0m\\033[0m\"\n\n# Run configuration\nINSTANCE_NAME = os.getenv(\"INSTANCE_NAME\", os.getenv(\"BABY_NAME\", \"BabyAGI\"))\nCOOPERATIVE_MODE = \"none\"\nJOIN_EXISTING_OBJECTIVE = False\n\n# Goal configuation\nOBJECTIVE = os.getenv(\"OBJECTIVE\", \"\")\nINITIAL_TASK = os.getenv(\"INITIAL_TASK\", os.getenv(\"FIRST_TASK\", \"\"))\n\n# Model configuration\nTEMPERATURE = float(os.getenv(\"TEMPERATURE\", 0.2))\n\nVERBOSE = (os.getenv(\"VERBOSE\", \"false\").lower() == \"true\")\n\n# Extensions support begin\n\ndef can_import(module_name):\n    try:\n        importlib.import_module(module_name)\n        return True\n    except ImportError:\n        return False\n\nprint(\"\\033[95m\\033[1m\"+\"\\n*****CONFIGURATION*****\\n\"+\"\\033[0m\\033[0m\")\nprint(f\"Name  : {INSTANCE_NAME}\")\nprint(f\"Mode  : {'alone' if COOPERATIVE_MODE in ['n', 'none'] else 'local' if COOPERATIVE_MODE in ['l', 'local'] else 'distributed' if COOPERATIVE_MODE in ['d', 'distributed'] else 'undefined'}\")\nprint(f\"LLM   : {LLM_MODEL}\")\n\n# Check if we know what we are doing\nassert OBJECTIVE, \"\\033[91m\\033[1m\" + \"OBJECTIVE environment variable is missing from .env\" + \"\\033[0m\\033[0m\"\nassert INITIAL_TASK, \"\\033[91m\\033[1m\" + \"INITIAL_TASK environment variable is missing from .env\" + \"\\033[0m\\033[0m\"\n\nMODEL_PATH = os.getenv(\"MODEL_PATH\", \"models/gpt4all-lora-quantized-ggml.bin\")\n    \nprint(f\"GPT4All : {MODEL_PATH}\" + \"\\n\")\nassert os.path.exists(MODEL_PATH), \"\\033[91m\\033[1m\" + f\"Model can't be found.\" + \"\\033[0m\\033[0m\"\n\n#CTX_MAX = 2048\n#CTX_MAX = 8192\nCTX_MAX = 16384\n#THREADS_NUM = 16\nTHREADS_NUM = 4\n\nllm = Llama(\n    model_path=MODEL_PATH,\n    n_ctx=CTX_MAX, n_threads=THREADS_NUM,\n    use_mlock=True,\n    verbose=False,\n)\n\nprint(\"\\033[94m\\033[1m\" + \"\\n*****OBJECTIVE*****\\n\" + \"\\033[0m\\033[0m\")\nprint(f\"{OBJECTIVE}\")\n\nif not JOIN_EXISTING_OBJECTIVE: print(\"\\033[93m\\033[1m\" + \"\\nInitial task:\" + \"\\033[0m\\033[0m\" + f\" {INITIAL_TASK}\")\nelse: print(\"\\033[93m\\033[1m\" + f\"\\nJoining to help the objective\" + \"\\033[0m\\033[0m\")\n\n# Results storage using local ChromaDB\nclass DefaultResultsStorage:\n    def __init__(self):\n        logging.getLogger('chromadb').setLevel(logging.ERROR)\n        # Create Chroma collection\n        chroma_persist_dir = \"chroma\"\n        chroma_client = chromadb.Client(\n            settings=chromadb.config.Settings(\n                chroma_db_impl=\"duckdb+parquet\",\n                persist_directory=chroma_persist_dir,\n            )\n        )\n\n        metric = \"cosine\"\n        embedding_function = InstructorEmbeddingFunction()\n        self.collection = chroma_client.get_or_create_collection(\n            name=RESULTS_STORE_NAME,\n            metadata={\"hnsw:space\": metric},\n            embedding_function=embedding_function,\n        )\n\n    def add(self, task: Dict, result: Dict, result_id: str, vector: List):        \n        embeddings = self.collection._embedding_function([vector])\n\n        if (len(self.collection.get(ids=[result_id], include=[])[\"ids\"]) > 0):  # Check if the result already exists\n            self.collection.update(\n                ids=result_id,\n                embeddings=embeddings,\n                documents=vector,\n                metadatas={\"task\": task[\"task_name\"], \"result\": result},\n            )\n        else:\n            self.collection.add(\n                ids=result_id,\n                embeddings=embeddings,\n                documents=vector,\n                metadatas={\"task\": task[\"task_name\"], \"result\": result},\n            )\n\n    def query(self, query: str, top_results_num: int) -> List[dict]:\n        count: int = self.collection.count()\n        if count == 0:\n            return []\n        results = self.collection.query(\n            query_texts=query,\n            n_results=min(top_results_num, count),\n            include=[\"metadatas\"]\n        )        \n        tasks = []\n        count = len(results[\"ids\"][0])\n        for i in range(count):            \n            resultidstr = results[\"ids\"][0][i]            \n            id = int(resultidstr[7:])\n            item = results[\"metadatas\"][0][i]            \n            task = {'task_id': id, 'task_name': item[\"task\"]}\n            tasks.append(task)            \n        return tasks\n   \n\n# Initialize results storage\nresults_storage = DefaultResultsStorage()\n\n# Task storage supporting only a single instance of BabyAGI\nclass SingleTaskListStorage:\n    def __init__(self):\n        self.tasks = deque([])\n        self.task_id_counter = 0\n\n    def append(self, task: Dict):\n        self.tasks.append(task)\n\n    def replace(self, tasks: List[Dict]):\n        self.tasks = deque(tasks)\n\n    def popleft(self):\n        return self.tasks.popleft()\n\n    def is_empty(self):\n        return False if self.tasks else True\n\n    def next_task_id(self):\n        self.task_id_counter += 1\n        return self.task_id_counter\n\n    def get_task_names(self):\n        return [t[\"task_name\"] for t in self.tasks]\n\n\n# Initialize tasks storage\ntasks_storage = SingleTaskListStorage()\n\ndef gpt_call(prompt: str, temperature: float = TEMPERATURE, max_tokens: int = 256):\n    result = llm(prompt[:CTX_MAX], echo=True, temperature=temperature, max_tokens=max_tokens)\n    return result['choices'][0]['text'][len(prompt):].strip()\n\ndef strip_numbered_list(nl: List[str]) -> List[str]:\n    result_list = []\n    filter_chars = ['#', '(', ')', '[', ']', '.', ':', ' ']\n\n    for line in nl:\n        line = line.strip()\n        if len(line) > 0:\n            parts = line.split(\" \", 1)\n            if len(parts) == 2:\n                left_part = ''.join(x for x in parts[0] if not x in filter_chars)\n                if left_part.isnumeric():\n                    result_list.append(parts[1].strip())\n                else:\n                    result_list.append(line)\n            else:\n                result_list.append(line)\n\n    # filter result_list\n    result_list = [line for line in result_list if len(line) > 3]\n    \n    # remove duplicates\n    result_list = list(set(result_list))\n    return result_list\n\ndef fix_prompt(prompt: str) -> str:\n    lines = prompt.split(\"\\n\") if \"\\n\" in prompt else [prompt]    \n    return \"\\n\".join([line.strip() for line in lines])\n\ndef task_creation_agent(\n    objective: str, result: Dict, task_description: str, task_list: List[str]\n):    \n    prompt = f\"\"\"\n    Your objective: {objective}\\n\n    Take into account these previously completed tasks but don't repeat them: {task_list}.\\n\n    The last completed task has the result: {result[\"data\"]}.\\n\n    Develop a task list based on the result.\\n\n    Response:\"\"\"\n\n    prompt = fix_prompt(prompt)\n\n    response = gpt_call(prompt)\n    pos = response.find(\"1\")\n    if (pos > 0):\n        response = response[pos - 1:]\n\n    if response == '':\n        print(\"\\n*** Empty Response from task_creation_agent***\")\n        new_tasks_list = result[\"data\"].split(\"\\n\") if len(result) > 0 else [response]\n    else:\n        new_tasks = response.split(\"\\n\") if \"\\n\" in response else [response]\n        new_tasks_list = strip_numbered_list(new_tasks)\n        \n    return [{\"task_name\": task_name} for task_name in (t for t in new_tasks_list if not t == '')]\n\n\ndef prioritization_agent():\n    task_names = tasks_storage.get_task_names()\n    next_task_id = tasks_storage.next_task_id()    \n\n    prompt = f\"\"\"\n    Please prioritize, summarize and consolidate the following tasks: {task_names}.\\n\n    Consider the ultimate objective: {OBJECTIVE}.\\n\n    Return the result as a numbered list.\n    \"\"\"\n\n    prompt = fix_prompt(prompt)\n\n    response = gpt_call(prompt)\n    pos = response.find(\"1\")\n    if (pos > 0):\n        response = response[pos - 1:]\n\n    new_tasks = response.split(\"\\n\") if \"\\n\" in response else [response]\n    new_tasks = strip_numbered_list(new_tasks)\n    new_tasks_list = []\n    i = 0\n    for task_string in new_tasks:        \n        new_tasks_list.append({\"task_id\": i + next_task_id, \"task_name\": task_string})\n        i += 1\n    \n    if len(new_tasks_list) > 0:\n        tasks_storage.replace(new_tasks_list)\n\n\n# Execute a task based on the objective and five previous tasks\ndef execution_agent(objective: str, task: str) -> str:\n    \"\"\"\n    Executes a task based on the given objective and previous context.\n\n    Args:\n        objective (str): The objective or goal for the AI to perform the task.\n        task (str): The task to be executed by the AI.\n\n    Returns:\n        str: The response generated by the AI for the given task.\n\n    \"\"\"\n    \n    context = context_agent(query=objective, top_results_num=5)\n\n    context_list = [t['task_name'] for t in context if t['task_name'] != INITIAL_TASK]\n    #context_list = [t['task_name'] for t in context]\n\n    # remove duplicates\n    context_list = list(set(context_list))    \n\n    if VERBOSE and len(context_list) > 0:\n        print(\"\\n*******RELEVANT CONTEXT******\\n\")\n        print(context_list)\n\n    if task == INITIAL_TASK:\n        prompt = f\"\"\"\n        You are an AI who performs one task based on the following objective: {objective}.\\n\n        Your task: {task}\\nResponse:\"\"\"\n    else:\n        prompt = f\"\"\"\n        Your objective: {objective}.\\n\n        Take into account these previously completed tasks but don't repeat them: {context_list}.\\n\n        Your task: {task}\\n\n        Response:\"\"\"\n\n    #Give an advice how to achieve your task!\\n\n\n    prompt = fix_prompt(prompt)\n\n    result = gpt_call(prompt)\n    pos = result.find(\"1\")\n    if (pos > 0):\n        result = result[pos - 1:]\n    return result\n\n\n# Get the top n completed tasks for the objective\ndef context_agent(query: str, top_results_num: int):\n    \"\"\"\n    Retrieves context for a given query from an index of tasks.\n\n    Args:\n        query (str): The query or objective for retrieving context.\n        top_results_num (int): The number of top results to retrieve.\n\n    Returns:\n        list: A list of tasks as context for the given query, sorted by relevance.\n\n    \"\"\"\n    results = results_storage.query(query=query, top_results_num=top_results_num)\n    #print(\"\\n***** RESULTS *****\")\n    #print(results)\n    return results\n\n# Add the initial task if starting new objective\nif not JOIN_EXISTING_OBJECTIVE:\n    initial_task = {\n        \"task_id\": tasks_storage.next_task_id(),\n        \"task_name\": INITIAL_TASK\n    }\n    tasks_storage.append(initial_task)\n\ndef main ():\n    while True:\n        # As long as there are tasks in the storage...\n        if not tasks_storage.is_empty():\n            # Print the task list\n            print(\"\\033[95m\\033[1m\" + \"\\n*****TASK LIST*****\\n\" + \"\\033[0m\\033[0m\")\n            for t in tasks_storage.get_task_names():\n                print(\" • \"+t)\n\n            # Step 1: Pull the first incomplete task\n            task = tasks_storage.popleft()\n            print(\"\\033[92m\\033[1m\" + \"\\n*****NEXT TASK*****\\n\" + \"\\033[0m\\033[0m\")\n            print(task['task_name'])\n\n            # Send to execution function to complete the task based on the context\n            result = execution_agent(OBJECTIVE, task[\"task_name\"])            \n\n            print(\"\\033[93m\\033[1m\" + \"\\n*****TASK RESULT*****\\n\" + \"\\033[0m\\033[0m\")\n            print(result)\n\n            # Step 2: Enrich result and store in the results storage\n            # This is where you should enrich the result if needed\n            enriched_result = {\n                \"data\": result\n            }  \n            # extract the actual result from the dictionary\n            # since we don't do enrichment currently\n            vector = enriched_result[\"data\"]  \n\n            result_id = f\"result_{task['task_id']}\"\n            results_storage.add(task, result, result_id, vector)\n\n            # Step 3: Create new tasks and reprioritize task list\n            # only the main instance in cooperative mode does that\n            new_tasks = task_creation_agent(\n                OBJECTIVE,\n                enriched_result,\n                task[\"task_name\"],\n                tasks_storage.get_task_names(),\n            )\n\n            for new_task in new_tasks:\n                if not new_task['task_name'] == '':\n                    new_task.update({\"task_id\": tasks_storage.next_task_id()})\n                    tasks_storage.append(new_task)\n\n            if not JOIN_EXISTING_OBJECTIVE: prioritization_agent()\n\n            # Sleep a bit before checking the task list again\n            time.sleep(5) \n\n        else:\n            print (\"Ready, no more tasks.\")\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "requirements.txt",
    "content": "argparse==1.4.0\nchromadb==0.3.21\npre-commit>=3.2.0\npython-dotenv==1.0.0\nInstructorEmbedding>=1.0.0\nllama-cpp-python==0.1.43"
  }
]