Showing preview only (216K chars total). Download the full file or copy to clipboard to get everything.
Repository: topoteretes/PromethAI-Backend
Branch: main
Commit: cfcf10acd983
Files: 58
Total size: 200.8 KB
Directory structure:
gitextract_hp6s2jka/
├── .github/
│ ├── actions/
│ │ └── image_builder/
│ │ └── action.yaml
│ └── workflows/
│ ├── cd.yaml
│ ├── cd_prd.yaml
│ └── ci.yaml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── Dockerfile
├── LICENSE.md
├── README.md
├── act.env.example
├── agent.py
├── api.py
├── assistant_templates.yaml
├── auth/
│ ├── auth.py
│ ├── auth_utils.py
│ └── cognito/
│ └── JWTBearer.py
├── bin/
│ └── dockerize
├── bots/
│ ├── __init__.py
│ ├── bot_extension.py
│ ├── bot_loading_util.py
│ ├── ff.yaml
│ └── mam.tf
├── docker-compose.yml
├── entrypoint.sh
├── examples/
│ ├── level_1/
│ │ ├── level_1_pdf_vectorstore_dlt_etl.py
│ │ └── ticket_schema.json
│ ├── level_2/
│ │ ├── Dockerfile
│ │ ├── Readme.md
│ │ ├── api.py
│ │ ├── docker-compose.yml
│ │ ├── entrypoint.sh
│ │ ├── level_2_pdf_vectorstore__dlt_contracts.py
│ │ ├── pyproject.toml
│ │ └── schema_registry/
│ │ └── ticket_schema.json
│ └── simple_ETLs.py
├── extensions/
│ ├── __init__.py
│ ├── argparseext.py
│ └── dotenvext.py
├── fetch_secret.py
├── fixtures/
│ ├── choose_meal_tree_response.json
│ ├── goal_response.json
│ ├── recipe_response.json
│ ├── subgoal_response.json
│ └── update_meal_tree_response.json
├── food_scrapers/
│ └── wolt_tool.py
├── heuristic_experience_orchestrator/
│ ├── README.md
│ └── task_identification.py
├── initdb/
│ └── init.sql
├── llm_chains/
│ ├── __init__.py
│ └── chains.py
├── pyproject.toml
├── test_api.py
├── utils/
│ ├── load_prod_redis.py
│ └── utils.py
└── validations/
├── defaults/
│ ├── categories_defaults.json
│ └── categories_input_defaults.json
└── schema/
├── decompose_categories.json
└── decompose_categories_input.json
================================================
FILE CONTENTS
================================================
================================================
FILE: .github/actions/image_builder/action.yaml
================================================
name: 'Build Docker images for PromethAI'
description: 'Build PromethAI-related Docker images and push to the Docker registry (AWS ECR)'
inputs:
stage:
description: 'The stage of the pipeline, such as "dev" or "prd", for the PromethAI app'
required: true
aws_account_id:
description: 'The AWS account ID for the PromethAI app'
required: true
should_publish:
description: 'Whether to publish the PromethAI Docker image to AWS ECR; should be either "true" or "false"'
required: true
ecr_image_repo_name:
description: 'The Docker image ECR repository name for the PromethAI app, such as "workflows"'
required: true
dockerfile_location:
description: 'The directory location of the Dockerfile for the PromethAI app'
required: true
runs:
using: "composite"
steps:
- name: Build PromethAI App Docker image
shell: bash
env:
STAGE: ${{ inputs.stage }}
run: |
export SHA_SHORT="$(git rev-parse --short HEAD)"
export CUR_DATE="$(date +%Y%m%d%H%M%S)"
export VERSION="${{ inputs.stage }}-$CUR_DATE-$SHA_SHORT"
export STAGE="${{ inputs.stage }}"
export APP_DIR="$PWD/${{ inputs.dockerfile_location }}"
image_name="${{ inputs.ecr_image_repo_name }}" docker_login="true" version="$VERSION" account="${{ inputs.aws_account_id }}" app_dir="$APP_DIR" publish="${{ inputs.should_publish }}" ./bin/dockerize
echo "Docker tag is: $VERSION"
echo $VERSION > /tmp/.DOCKER_IMAGE_VERSION
================================================
FILE: .github/workflows/cd.yaml
================================================
name: Publishing promethai-backend Docker image
on:
push:
branches:
- dev
- feature/*
paths-ignore:
- 'examples/**'
- 'document_store/**'
- '**.md'
env:
AWS_ROLE_DEV_CICD: "arn:aws:iam::463722570299:role/promethai-dev-base-role-github-ci-cd"
AWS_ACCOUNT_ID_DEV: "463722570299"
jobs:
publish_docker_to_ecr:
name: Publish Docker PromethAI image
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
steps:
- name: Take code from repo
uses: actions/checkout@v3
- name: Set environment variable for stage
id: set-env
run: |
if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then
echo "STAGE=prd" >> $GITHUB_ENV
echo "::set-output name=stage::prd"
else
echo "STAGE=dev" >> $GITHUB_ENV
echo "::set-output name=stage::dev"
fi
- name: Use output
run: echo "The stage is ${{ steps.set-env.outputs.stage }}"
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
role-to-assume: ${{ env.AWS_ROLE_DEV_CICD }}
aws-region: eu-west-1
- name: Create Docker image and push to ECR
uses: ./.github/actions/image_builder
id: generate-promethai-docker
with:
stage: dev
aws_account_id: ${{ env.AWS_ACCOUNT_ID_DEV }}
should_publish: true
ecr_image_repo_name: promethai-dev-backend-promethai-backend
dockerfile_location: ./
- name: Export Docker image tag
id: export-promethai-docker-tag
run: |
export DOCKER_TAG=$(cat /tmp/.DOCKER_IMAGE_VERSION)
echo "Docker tag is: $DOCKER_TAG"
echo "promethai_docker_tag_backend=$DOCKER_TAG" >> $GITHUB_OUTPUT
outputs:
promethai_docker_tag_backend: ${{ steps.export-promethai-docker-tag.outputs.promethai_docker_tag_backend }}
apply_tf:
name: Trigger terraform apply workflow
runs-on: ubuntu-latest
needs: publish_docker_to_ecr
steps:
- name: TF apply workflow triggers step
uses: actions/github-script@v6
with:
github-token: ${{ secrets.PAT_FOR_CROSS_REPOS_CICD_TRIGGERING }}
script: |
await github.rest.actions.createWorkflowDispatch({
owner: 'topoteretes',
repo: 'PromethAI-Infra',
workflow_id: 'terraform.apply.yml',
ref: 'main'
})
================================================
FILE: .github/workflows/cd_prd.yaml
================================================
on:
push:
branches:
- main
paths-ignore:
- '**.md'
- 'examples/**'
name: Publishing promethai-backend Docker image to prd ECR
env:
AWS_ROLE_DEV_CICD: "arn:aws:iam::463722570299:role/promethai-dev-base-role-github-ci-cd"
AWS_ACCOUNT_ID_DEV: "463722570299"
ENVIRONMENT: prd
jobs:
publish_docker_to_ecr:
name: Publish Docker PromethAI image
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
steps:
- name: Take code from repo
uses: actions/checkout@v3
- name: Set environment variable for stage
id: set-env
run: |
if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then
echo "STAGE=prd" >> $GITHUB_ENV
echo "::set-output name=stage::prd"
else
echo "STAGE=dev" >> $GITHUB_ENV
echo "::set-output name=stage::dev"
fi
- name: Use output
run: echo "The stage is ${{ steps.set-env.outputs.stage }}"
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
role-to-assume: ${{ env.AWS_ROLE_DEV_CICD }}
aws-region: eu-west-1
- name: Create Docker image and push to ECR
uses: ./.github/actions/image_builder
id: generate-promethai-docker
with:
stage: prd
aws_account_id: ${{ env.AWS_ACCOUNT_ID_DEV }}
should_publish: true
ecr_image_repo_name: promethai-prd-backend-promethai-backend
dockerfile_location: ./
- name: Export Docker image tag
id: export-promethai-docker-tag
run: |
export DOCKER_TAG=$(cat /tmp/.DOCKER_IMAGE_VERSION)
echo "Docker tag is: $DOCKER_TAG"
echo "promethai_docker_tag_backend=$DOCKER_TAG" >> $GITHUB_OUTPUT
# - name: Create Tag and Release
# runs-on: ubuntu-latest
# uses: actions/checkout@v3
# needs: publish_docker_to_ecr # ensure this job runs after Docker image is pushed
# steps:
# - name: Check out code
# uses: actions/checkout@v3
# - name: Bump version and push tag
# id: bump_version_and_push_tag
# uses: anothrNick/github-tag-action@1.34.0
# env:
# GITHUB_TOKEN: ${{ secrets.PAT_FOR_CROSS_REPOS_CICD_TRIGGERING }}
# WITH_V: true
# DEFAULT_BUMP: 'minor' # or 'minor' or 'major'
# - name: Create Release
# id: create_release
# uses: actions/create-release@v1
# env:
# GITHUB_TOKEN: ${{ secrets.PAT_FOR_CROSS_REPOS_CICD_TRIGGERING }}
# with:
# tag_name: ${{ steps.bump_version_and_push_tag.outputs.tag }}
# release_name: Release ${{ steps.bump_version_and_push_tag.outputs.tag }}
outputs:
promethai_docker_tag_backend: ${{ steps.export-promethai-docker-tag.outputs.promethai_docker_tag_backend }}
apply_tf:
name: Trigger terraform apply workflow
runs-on: ubuntu-latest
needs: publish_docker_to_ecr
steps:
- name: TF apply workflow triggers step
uses: actions/github-script@v6
with:
github-token: ${{ secrets.PAT_FOR_CROSS_REPOS_CICD_TRIGGERING }}
script: |
await github.rest.actions.createWorkflowDispatch({
owner: 'topoteretes',
repo: 'PromethAI-Infra',
workflow_id: 'terraform.apply.yml',
ref: 'main'
})
================================================
FILE: .github/workflows/ci.yaml
================================================
name: Test build docker image for PromethAI backend app
on: pull_request
env:
AWS_ACCOUNT_ID_DEV: "463722570299"
jobs:
build_docker:
name: Build PromethAI Backend Docker App Image
runs-on: ubuntu-latest
steps:
- name: Check out PromethAI code
uses: actions/checkout@v3
- name: Build PromethAI backend Docker image tag
id: backend-docker-tag
run: |
export SHA_SHORT="$(git rev-parse --short HEAD)"
export CUR_DATE="$(date +%Y%m%d%H%M%S)"
export VERSION="dev-$CUR_DATE-$SHA_SHORT"
image_name="backend" docker_login="false" version="$VERSION" account="${{ env.AWS_ACCOUNT_ID_DEV }}" app_dir="backend" publish="false" ./bin/dockerize
export DOCKER_TAG=$(cat /tmp/.DOCKER_IMAGE_VERSION)
echo "Successfully built PromethAI backend Docker tag is: $DOCKER_TAG"
================================================
FILE: .gitignore
================================================
.env
__pycache__
tst.py
.idea
================================================
FILE: CODE_OF_CONDUCT.md
================================================
# Code of Conduct for Auto-GPT
## 1. Purpose
The purpose of this Code of Conduct is to provide guidelines for contributors to the auto-gpt project on GitHub. We aim to create a positive and inclusive environment where all participants can contribute and collaborate effectively. By participating in this project, you agree to abide by this Code of Conduct.
## 2. Scope
This Code of Conduct applies to all contributors, maintainers, and users of the auto-gpt project. It extends to all project spaces, including but not limited to issues, pull requests, code reviews, comments, and other forms of communication within the project.
## 3. Our Standards
We encourage the following behavior:
* Being respectful and considerate to others
* Actively seeking diverse perspectives
* Providing constructive feedback and assistance
* Demonstrating empathy and understanding
We discourage the following behavior:
* Harassment or discrimination of any kind
* Disrespectful, offensive, or inappropriate language or content
* Personal attacks or insults
* Unwarranted criticism or negativity
## 4. Reporting and Enforcement
If you witness or experience any violations of this Code of Conduct, please report them to the project maintainers by email or other appropriate means. The maintainers will investigate and take appropriate action, which may include warnings, temporary or permanent bans, or other measures as necessary.
Maintainers are responsible for ensuring compliance with this Code of Conduct and may take action to address any violations.
## 5. Acknowledgements
This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/version/2/0/code_of_conduct.html).
## 6. Contact
If you have any questions or concerns, please contact the project maintainers.
================================================
FILE: Dockerfile
================================================
FROM python:3.11-slim
# Set build argument
ARG API_ENABLED
# Set environment variable based on the build argument
ENV API_ENABLED=${API_ENABLED} \
PIP_NO_CACHE_DIR=true
ENV PATH="${PATH}:/root/.poetry/bin"
RUN pip install poetry
WORKDIR /app
COPY pyproject.toml poetry.lock /app/
# Install the dependencies
RUN poetry config virtualenvs.create false && \
poetry install --no-root --no-dev
RUN apt-get update -q && \
apt-get install curl zip jq netcat-traditional -y -q
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
unzip -qq awscliv2.zip && ./aws/install && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
#RUN playwright install
#RUN playwright install-deps
WORKDIR /app
COPY . /app
COPY entrypoint.sh /app/entrypoint.sh
RUN chmod +x /app/entrypoint.sh
ENTRYPOINT ["/app/entrypoint.sh"]
================================================
FILE: LICENSE.md
================================================
MIT License
Copyright (c) 2023 topoteretes
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
# PromethAI
<p align="center">
<a href="https://prometh.ai//#gh-light-mode-only">
<img src="assets/topoteretes_logo.png" width="10%" alt="promethAI logo" />
</a>
</p>
<p align="center"><i>Open-source framework that gives you AI Agents that help you navigate decision-making, get personalized goals and execute them </i></p>
<p align="center">
<a href="https://github.com/topoteretes/PromethAI-Backend/fork" target="blank">
<img src="https://img.shields.io/github/forks/topoteretes/PromethAI-Backend?style=for-the-badge" alt="promethAI forks"/>
</a>
<a href="https://github.com/topoteretes/PromethAI-Backend/stargazers" target="blank">
<img src="https://img.shields.io/github/stars/topoteretes/PromethAI-Backend?style=for-the-badge" alt="promethAI stars"/>
</a>
<a href="https://github.com/topoteretes/PromethAI-Backend/pulls" target="blank">
<img src="https://img.shields.io/github/issues-pr/topoteretes/PromethAI-Backend?style=for-the-badge" alt="promethAI pull-requests"/>
</a>
<a href='https://github.com/topoteretes/PromethAI-Backend/releases'>
<img src='https://img.shields.io/github/release/topoteretes/PromethAI-Backend?&label=Latest&style=for-the-badge'>
</a>
</p>
[//]: # (<p align="center"><b>Follow PromethAI </b></p>)
[//]: # (<p align="center">)
[//]: # (<a href="https://twitter.com/_promethAI" target="blank">)
[//]: # (<img src="https://img.shields.io/twitter/follow/_promethAI?label=Follow: _promethAI&style=social" alt="Follow _promethAI"/>)
[//]: # (</a>)
[//]: # (<p align="center">)
[//]: # (<a href="https://prometh.ai" target="_blank"><img src="https://img.shields.io/twitter/url?label=promethAI Website&logo=website&style=social&url=https://github.com/topoteretes/PromethAI-Backend-Backend"/></a>)
[//]: # (<p align="center">)
[//]: # (<a href="https://www.youtube.com/@_promethAI" target="_blank"><img src="https://img.shields.io/twitter/url?label=Youtube&logo=youtube&style=social&url=https://github.com/topoteretes/PromethAI-Backend-Backend"/></a>)
[//]: # (</p>)
<p align="center"><b>Share promethAI Repository</b></p>
<p align="center">
<a href="https://twitter.com/intent/tweet?text=Check%20this%20GitHub%20repository%20out.%20promethAI%20-%20Let%27s%20you%20easily%20build,%20manage%20and%20run%20useful%20autonomous%20AI%20agents.&url=https://github.com/topoteretes/PromethAI-Backend-Backend&hashtags=promethAI,AGI,Autonomics,future" target="blank">
<img src="https://img.shields.io/twitter/follow/_promethAI?label=Share Repo on Twitter&style=social" alt="Follow _promethAI"/></a>
<a href="https://t.me/share/url?text=Check%20this%20GitHub%20repository%20out.%20promethAI%20-%20Let%27s%20you%20easily%20build,%20manage%20and%20run%20useful%20autonomous%20AI%20agents.&url=https://github.com/topoteretes/PromethAI-Backend" target="_blank"><img src="https://img.shields.io/twitter/url?label=Telegram&logo=Telegram&style=social&url=https://github.com/topoteretes/PromethAI-Backend" alt="Share on Telegram"/></a>
<a href="https://api.whatsapp.com/send?text=Check%20this%20GitHub%20repository%20out.%20promethAI%20-%20Let's%20you%20easily%20build,%20manage%20and%20run%20useful%20autonomous%20AI%20agents.%20https://github.com/topoteretes/PromethAI-Backend"><img src="https://img.shields.io/twitter/url?label=whatsapp&logo=whatsapp&style=social&url=https://github.com/topoteretes/PromethAI-Backend" /></a> <a href="https://www.reddit.com/submit?url=https://github.com/topoteretes/PromethAI-Backend&title=Check%20this%20GitHub%20repository%20out.%20promethAI%20-%20Let's%20you%20easily%20build,%20manage%20and%20run%20useful%20autonomous%20AI%20agents.
" target="blank">
<img src="https://img.shields.io/twitter/url?label=Reddit&logo=Reddit&style=social&url=https://github.com/topoteretes/PromethAI-Backend" alt="Share on Reddit"/>
</a> <a href="mailto:?subject=Check%20this%20GitHub%20repository%20out.&body=promethAI%20-%20Let%27s%20you%20easily%20build,%20manage%20and%20run%20useful%20autonomous%20AI%20agents.%3A%0Ahttps://github.com/topoteretes/PromethAI-Backend" target="_blank"><img src="https://img.shields.io/twitter/url?label=Gmail&logo=Gmail&style=social&url=https://github.com/topoteretes/PromethAI-Backend"/></a> <a href="https://www.buymeacoffee.com/promethAI" target="_blank"><img src="https://cdn.buymeacoffee.com/buttons/default-orange.png" alt="Buy Me A Coffee" height="23" width="100" style="border-radius:1px"></a>
</p>
<hr>
## We took all the work we did with PromethAI into our new product, cognee -> check it out [here](https://github.com/topoteretes/cognee)
## What is it
PromethAI is a Python-based AGI project that recommends choices based on a user's goals and preferences and can modify its recommendations based on user feedback.
Our focus is currently on food, but the system is extendible to any area.
## 💡 Features
- Optimized for Autonomous Agents
- Personalized for each user
- Introduces decision trees to help user navigate and decide on a solution
- Runs asynchronusly
- For App builds, check out this repo [promethAI-GUI](https://github.com/topoteretes/PromethAI-Mobile)
- Supports automating tasks and executing decisions
- Multiple Vector DBs supported trough Langchain
- Low latency
- Easy to use
- Easy to deploy
## 💻 Demo
<p align="center">
<a href="https://prometh.ai">
<img src="https://promethai-public-assets.s3.eu-west-1.amazonaws.com/product_demo-min.gif" width="25%" height="50%"/>
</a>
</p>
## 🛣 Architecture
<p align="center">
<img src="assets/PromethAI_infra.png" alt="PromethAI Architecture" width="50%" height="50%">
</p>
## 🛣 Roadmap
<p align="center">
<img src="assets/roadmap.png" alt="Topoteretes Roadmap" width="50%" height="50%">
</p>
## ⚙️ Setting up
1. Download the repo using `git clone https://github.com/topoteretes/PromethAI-Backend-Backend.git` in your terminal or directly from github page in zip format.
2. Navigate to the directory using `cd PromethAI-Backend` and create a copy of `.env.template` and name it `.env`.
3. Enter your unique OpenAI API Key, Google key, Custom search engine ID without any quotes or spaces in `.env` file. Follow the links below to get your keys:
| Keys | Accessing the keys |
|-----------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| **OpenAI API Key** | Sign up and create an API key at [OpenAI Developer](https://beta.openai.com/signup/) |
| **Pinecone API Key** | Sign up and create an API key at [Pinecone.io](https://www.pinecone.io/) |
| **Google API key** | Create a project in the [Google Cloud Console](https://console.cloud.google.com/) and enable the API you need (for example: Google Custom Search JSON API). Then, create an API key in the "Credentials" section. |
| **Custom search engine ID** | Visit [Google Programmable Search Engine](https://programmablesearchengine.google.com/about/) to create a custom search engine for your application and obtain the search engine ID. |
4. Ensure that Docker and Docker Compose are installed in your system, if not, Install it from [here](https://docs.docker.com/get-docker/).
5. Once you have Docker Desktop running, run command : `docker-compose up promethai --build` in promethai directory. Open your browser and go to `localhost:3000` to see promethAI running.
## Resources
Papers like ["Generative Agents: Interactive Simulacra of Human Behavior"](https://arxiv.org/abs/2304.03442)
## Quick start
Make sure to add your credentions in the .env file.Launch the app with:
```docker-compose build promethai && docker-compose up promethai```
## How it Works
Here is what happens everytime the AI is queried by the user:
1. AI vectorizes the query and stores it in a Pinecone Vector Database
2. AI looks inside its memory and finds memories and past queries that are relevant to the current query
3. AI thinks about what action to take
4. AI stores the thought from Step 3
5. Based on the thought from Step 3 and relevant memories from Step 2, AI generates an output
6. AI stores the current query and its answer in its Pinecone vector database memory
## How to use
```
docker-compose build promethai
```
6. Access the API by doing CURL requests, example:
```
curl -X POST "http://0.0.0.0:8000/data-request" -H "Content-Type: application/json" --data-raw
```
## Example of available endpoint
The available endpoint:
```
POST request to '/recipe-request' endpoint that takes a JSON payload containing 'user_id', 'session_id', 'factors' keys, and returns a JSON response with a 'response' key.
```
All endpoints receive a payload in JSON format and return a response in JSON format.
Example of curl requests
```
curl --location --request POST 'http://0.0.0.0:8000/recipe-request' \
--header 'Content-Type: application/json' \
--data-raw '{
"payload": {
"user_id": "659",
"session_id": "459",
"model_speed":"slow",
"prompt":"I would like a healthy chicken meal over 125$"
}
}'
```
# 🔰 Notice
PromethAI is a work in progress, delivered to you without any guarantees, whether explicit or implied. By choosing to use this application, you consent to take on any associated risks, including data loss, system failure, or any other complications that may arise.
The creators and contributors of PromethAI disclaim any responsibility or liability for any potential losses, damages, or any other adverse effects resulting from your use of this software. The onus is solely on you for any decisions or actions you take based on the information given by PromethAI.
Please be aware that usage of the GPT-4 language model could incur significant costs due to its token consumption. By using this software, you acknowledge and agree to monitor your own token usage and manage the associated costs. We strongly suggest routinely checking your OpenAI API usage and implementing necessary limits or alerts to avoid unexpected fees.
Given its experimental nature, PromethAI may generate content or perform actions that do not align with real-world business norms or legal obligations. It falls on you to ensure that any actions or decisions based on this software’s output adhere to all relevant laws, regulations, and ethical standards. The creators and contributors of this project will not be held accountable for any fallout from using this software.
By utilizing PromethAI, you agree to protect, defend, and absolve the creators, contributors, and any affiliated parties from any claims, damages, losses, liabilities, costs, and expenses (including reasonable attorneys' fees) that arise from your use of this software or your violation of these terms.
# 📝 License
MIT License
# Credits:
Teenage AGI -> https://github.com/seanpixel/Teenage-AGI
Baby AGI -> https://github.com/yoheinakajima/babyagi
================================================
FILE: act.env.example
================================================
AWS_ACCESS_KEY_ID=
AWS_SECRET_ACCESS_KEY=
AWS_DEFAULT_REGION=eu-west-1
================================================
FILE: agent.py
================================================
import openai
import os
import pinecone
from dotenv import load_dotenv
import nltk
from langchain.text_splitter import NLTKTextSplitter
from typing import Optional
# Download NLTK for Reading
nltk.download("punkt")
import subprocess
import datetime
# Initialize Text Splitter
text_splitter = NLTKTextSplitter(chunk_size=2500)
# Load default environment variables (.env)
load_dotenv()
OPENAI_MODEL = os.getenv("OPENAI_MODEL") or "gpt-3.5-turbo"
OPENAI_TEMPERATURE = float(os.getenv("OPENAI_TEMPERATURE", 0.0))
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
PINECONE_API_ENV = os.getenv("PINECONE_API_ENV")
# Top matches length
k_n = 3
# initialize pinecone
pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_API_ENV)
# initialize openAI
openai.api_key = OPENAI_API_KEY # you can just copy and paste your key here if you want
def get_ada_embedding(text):
text = text.replace("\n", " ")
return openai.Embedding.create(input=[text], model="text-embedding-ada-002")[
"data"
][0]["embedding"]
================================================
FILE: api.py
================================================
from llm_chains.chains import Agent
from fastapi import FastAPI
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from typing import Dict, Any
import re
import json
import logging
import os
import uvicorn
from fastapi import Request
import yaml
from fastapi import HTTPException
CANNED_RESPONSES = False
# Set up logging
logging.basicConfig(
level=logging.INFO, # Set the logging level (e.g., DEBUG, INFO, WARNING, ERROR, CRITICAL)
format="%(asctime)s [%(levelname)s] %(message)s", # Set the log message format
)
logger = logging.getLogger(__name__)
from dotenv import load_dotenv
load_dotenv()
app = FastAPI(debug=True)
from auth.cognito.JWTBearer import JWTBearer
from auth.auth import jwks
auth = JWTBearer(jwks)
from fastapi import Depends
class Payload(BaseModel):
payload: Dict[str, Any]
class ImageResponse(BaseModel):
success: bool
message: str
def str_to_bool(s):
"""Converts a string to boolean. If the string is not recognizable, returns the original value."""
if s.lower() == "true":
return True
elif s.lower() == "false":
return False
return s
@app.get("/", )
async def root():
"""
Root endpoint that returns a welcome message.
"""
return {"message": "Hello, World, I am alive!"}
@app.get("/health",dependencies=[Depends(auth)])
def health_check():
"""
Health check endpoint that returns the server status.
"""
return {"status": "OK"}
# @app.post("/testbot", response_model=Dict[str, Any])
# async def test(request_data: Payload) -> Dict[str, Any]:
# """
# Endpoint to clear the cache.
#
# Parameters:
# request_data (Payload): The request data containing the user and session IDs.
#
# Returns:
# dict: A dictionary with a message indicating the cache was cleared.
# """
# json_payload = request_data.payload
#
# try:
# # Instantiate AppAgent and call manage_resources
# app_agent = AppAgent(user_id=json_payload["user_id"])
# app_agent.manage_resources("add", "web_page", "https://nav.al/agi")
# return JSONResponse(content={"response": "Test"}, status_code=200)
# except Exception as e:
# raise HTTPException(status_code=500, detail=str(e))
@app.post("/clear-cache", response_model=dict,dependencies=[Depends(auth)])
async def clear_cache(request_data: Payload) -> dict:
"""
Endpoint to clear the cache.
Parameters:
request_data (Payload): The request data containing the user and session IDs.
Returns:
dict: A dictionary with a message indicating the cache was cleared.
"""
json_payload = request_data.payload
agent = Agent()
agent.set_user_session(json_payload["user_id"], json_payload["session_id"])
try:
agent.clear_cache()
return JSONResponse(content={"response": "Cache cleared"}, status_code=200)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/correct-prompt-grammar", response_model=dict,dependencies=[Depends(auth)])
async def prompt_to_correct_grammar(request_data: Payload) -> dict:
json_payload = request_data.payload
agent = Agent()
agent.set_user_session(json_payload["user_id"], json_payload["session_id"])
logging.info("Correcting grammar %s", json_payload["prompt_source"])
output = agent.prompt_correction(json_payload["prompt_source"], model_speed= json_payload["model_speed"])
return JSONResponse(content={"response": {"result": json.loads(output)}})
# @app.post("/action-add-zapier-calendar-action", response_model=dict,dependencies=[Depends(auth)])
# async def action_add_zapier_calendar_action(
# request: Request, request_data: Payload
# ) -> dict:
# json_payload = request_data.payload
# agent = Agent()
# agent.set_user_session(json_payload["user_id"], json_payload["session_id"])
# # Extract the bearer token from the header
# auth_header = request.headers.get("Authorization")
# if auth_header:
# bearer_token = auth_header.replace("Bearer ", "")
# else:
# bearer_token = None
# outcome = agent.add_zapier_calendar_action(
# prompt_base=json_payload["prompt_base"],
# token=bearer_token,
# model_speed=json_payload["model_speed"],
# )
# return JSONResponse(content={"response": outcome})
@app.post("/prompt-to-choose-meal-tree", response_model=dict,dependencies=[Depends(auth)])
async def prompt_to_choose_meal_tree(request_data: Payload) -> dict:
json_payload = request_data.payload
agent = Agent()
user_defaults = str_to_bool(json_payload.get("user_defaults", "True"))
assistant_category = json_payload.get("assistant_category", "food")
agent.set_user_session(json_payload["user_id"], json_payload["session_id"])
output = agent.prompt_to_choose_tree(
json_payload["prompt"],
model_speed=json_payload["model_speed"],
assistant_category=assistant_category,
load_defaults = user_defaults
)
return JSONResponse(content=json.loads(output))
# def create_endpoint_with_resources(category: str, solution_type: str, prompt: str, json_example: str, *args, **kwargs):
# class Payload(BaseModel):
# payload: Dict[str, Any]
#
# @app.post(f"/chatbot/{category}", response_model=dict,dependencies=[Depends(auth)])
# async def prompt_to_choose_tree(request_data: Payload) -> dict:
# json_payload = request_data.payload
# from bots.bot_extension import AppAgent
# agent = AppAgent()
# agent.set_user_session(json_payload["user_id"], json_payload["session_id"])
# output = agent.query(
# json_payload["prompt"]
# )
# logging.info("HERE IS THE CHAIN RESULT %s", output)
# return JSONResponse(content={"response": output})
#
def create_endpoint(category: str, solution_type: str, prompt: str, json_example: str, *args, **kwargs):
class Payload(BaseModel):
payload: Dict[str, Any]
@app.post(f"/{category}/prompt-to-decompose-categories", response_model=dict)
async def prompt_to_decompose_categories(request_data: Payload) -> dict:
json_payload = request_data.payload
agent = Agent()
agent.set_user_session(json_payload["user_id"], json_payload["session_id"])
output = await agent.prompt_decompose_to_tree_categories(
json_payload["prompt_struct"],
assistant_category=category,
model_speed=json_payload["model_speed"],
)
return JSONResponse(content={"response": output})
@app.post(f"/{category}/update-agent-summary/{solution_type}", response_model=dict,dependencies=[Depends(auth)])
async def update_agent_summary(request_data: Payload) -> dict:
json_payload = request_data.payload
agent = Agent()
agent.set_user_session(json_payload["user_id"], json_payload["session_id"])
output = await agent.update_agent_summary(
model_speed=json_payload["model_speed"]
)
return {"response": output}
@app.post(f"/{category}/prompt-to-update-tree", response_model=dict,dependencies=[Depends(auth)])
async def prompt_to_update_tree(request_data: Payload) -> dict:
json_payload = request_data.payload
agent = Agent()
agent.set_user_session(json_payload["user_id"], json_payload["session_id"])
output = agent.prompt_to_update_meal_tree(
json_payload["category"],
json_payload["from"],
json_payload["to"],
model_speed=json_payload["model_speed"],
)
print("HERE IS THE OUTPUT", output)
return JSONResponse(content={"response": output})
@app.post(f"/{category}/fetch-user-summary/{solution_type}", response_model=dict,dependencies=[Depends(auth)])
async def fetch_user_summary(request_data: Payload) -> dict:
json_payload = request_data.payload
agent = Agent()
agent.set_user_session(json_payload["user_id"], json_payload["session_id"])
output = agent.fetch_user_summary(model_speed=json_payload["model_speed"])
return {"response": output}
@app.post(f"/{category}/request/{solution_type}", response_model=dict,dependencies=[Depends(auth)])
async def solution_request(request_data: Payload) -> dict:
json_payload = request_data.payload
agent = Agent()
agent.set_user_session(json_payload["user_id"], json_payload["session_id"])
# method_to_call = getattr(agent, f"{solution_type}_generation")
output = await agent.solution_generation(json_payload["prompt"], prompt_template=prompt, json_example=json_example, model_speed="slow")
output = output.replace("'", '"')
return JSONResponse(content={"response": json.loads(output)})
# Load categories from a yaml file
with open('assistant_templates.yaml', 'r') as file:
data = yaml.safe_load(file)
# Create an endpoint for each category and solution type
for role in ['assistant', 'chatbot']:
# If the role is 'assistant'
if role == 'assistant':
# Iterate through the categories and solution_types
for category in data[role]['categories']:
for solution_type in category['solution_types']:
create_endpoint(category['name'], solution_type['name'], solution_type['prompt'], json.loads(solution_type['json_example']))
# # If the role is 'chatbot'
# elif role == 'chatbot':
# # Iterate through the categories and resources
# for category in data[role]['categories']:
# create_endpoint_with_resources(category['name'], solution_type="", prompt="", json_example="")
# # for category in data[role]['categories']:
# # create_endpoint_with_resources(category['name'])
@app.post("/prompt-to-decompose-meal-tree-categories", response_model=dict,dependencies=[Depends(auth)])
async def prompt_to_decompose_meal_tree_categories(request_data: Payload) -> dict:
json_payload = request_data.payload
agent = Agent()
import time
# Wait for 0.5 seconds
time.sleep(1.1)
agent.set_user_session(json_payload["user_id"], json_payload["session_id"])
user_defaults = str_to_bool(json_payload.get("user_defaults", "False"))
assistant_category = json_payload.get("assistant_category", "food")
output = await agent.prompt_decompose_to_tree_categories(
json_payload["prompt_struct"],
assistant_category=assistant_category,
model_speed=json_payload["model_speed"],
load_defaults=user_defaults
)
logging.info("Prompt to decompose meal tree categories %s", str(output))
if user_defaults:
return output
else:
return JSONResponse(content={"response": output})
@app.post("/correct-prompt-grammar", response_model=dict,dependencies=[Depends(auth)])
async def prompt_to_correct_grammar(request_data: Payload) -> dict:
json_payload = request_data.payload
agent = Agent()
agent.set_user_session(json_payload["user_id"], json_payload["session_id"])
logging.info("Correcting grammar %s", json_payload["prompt_source"])
output = agent.prompt_correction(json_payload["prompt_source"], model_speed= json_payload["model_speed"])
return JSONResponse(content={"response": {"result": json.loads(output)}})
@app.post("/fetch-user-summary", response_model=dict,dependencies=[Depends(auth)])
async def fetch_user_summary(request_data: Payload) -> dict:
json_payload = request_data.payload
agent = Agent()
agent.set_user_session(json_payload["user_id"], json_payload["session_id"])
output = agent.fetch_user_summary(model_speed=json_payload["model_speed"])
print("HERE IS THE OUTPUT", output)
return JSONResponse(content={"response": output})
@app.post("/recipe-request", response_model=dict,dependencies=[Depends(auth)])
async def recipe_request(request_data: Payload) -> dict:
if CANNED_RESPONSES:
with open("fixtures/recipe_response.json", "r") as f:
json_data = json.load(f)
stripped_string_dict = {"response": json_data}
return JSONResponse(content=stripped_string_dict)
json_payload = request_data.payload
# factors_dict = {factor['name']: factor['amount'] for factor in json_payload['factors']}
agent = Agent()
agent.set_user_session(json_payload["user_id"], json_payload["session_id"])
output = await agent.solution_generation(json_payload["prompt"], model_speed="slow", prompt_template=None, json_example=None)
return JSONResponse(content={"response": output})
@app.post("/food/solution-name-request", response_model=dict,dependencies=[Depends(auth)])
async def solution_name_request(request_data: Payload) -> dict:
json_payload = request_data.payload
# factors_dict = {factor['name']: factor['amount'] for factor in json_payload['factors']}
agent = Agent()
agent.set_user_session(json_payload["user_id"], json_payload["session_id"])
output = await agent.solution_name_generation(json_payload["prompt"], model_speed="slow", prompt_template=None, json_example=None)
return JSONResponse(content={"response": output})
# @app.post("/restaurant-request", response_model=dict)
# async def restaurant_request(request_data: Payload) -> dict:
# json_payload = request_data.payload
# agent = Agent()
# agent.set_user_session(json_payload["user_id"], json_payload["session_id"])
# output = agent.restaurant_generation(json_payload["prompt"], model_speed="slow")
# return JSONResponse(content={"response": {"restaurants": output}})
# @app.post("/delivery-request", response_model=dict)
# async def delivery_request(request_data: Payload) -> dict:
# json_payload = request_data.payload
# # factors_dict = {factor['name']: factor['amount'] for factor in json_payload['factors']}
# agent = Agent()
# agent.set_user_session(json_payload["user_id"], json_payload["session_id"])
# output = await agent.delivery_generation( json_payload["prompt"], zipcode=json_payload["zipcode"], model_speed="slow")
# print("HERE IS THE OUTPUT", output)
# return JSONResponse(content={"response": {"url": output}})
def start_api_server(host: str = "0.0.0.0", port: int = 8000):
"""
Start the API server using uvicorn.
Parameters:
host (str): The host for the server.
port (int): The port for the server.
"""
try:
logger.info(f"Starting server at {host}:{port}")
uvicorn.run(app, host=host, port=port)
except Exception as e:
logger.exception(f"Failed to start server: {e}")
# Here you could add any cleanup code or error recovery code.
if __name__ == "__main__":
start_api_server()
================================================
FILE: assistant_templates.yaml
================================================
assistant:
categories:
- name: food
solution_types:
- name: recipe
prompt: |-
Create a food recipe based on the following prompt: '{{prompt}}'. Instructions and ingredients should have medium detail.
Answer a condensed valid JSON in this format: {{ json_example}} Do not explain or write anything else.
json_example: '{"recipes":[{"title":"value","rating":"value","prep_time":"value","cook_time":"value","description":"value","ingredients":["value"],"instructions":["value"]}]}'
- name: restaurant
prompt: |-
Create a detailed weekly workout routine based on the following '{{prompt}}': I want to gain muscle mass in the next six month . Instructions should have medium detail.
Answer a condensed valid JSON in this format: {{ json_example}} Do not explain or write anything else.
json_example: '{"plans":[{"day":"Monday","workouts":[{"title":"example workout","instructions":["example instruction"]}]} ,{"day":"Tuesday","workouts":[{"title":"example workout","instructions":["example instruction"]}]} ,{"day":"Wednesday","workouts":[{"title":"example workout","instructions":["example instruction"]}]} ,{"day":"Thursday","workouts":[{"title":"example workout","instructions":["example instruction"]}]} ,{"day":"Friday","workouts":[{"title":"example workout","instructions":["example instruction"]}]} ,{"day":"Saturday","workouts":[{"title":"example workout","instructions":["example instruction"]}]} ,{"day":"Sunday","workouts":[{"title":"example workout","instructions":["example instruction"]}]}]}'
- name: fitness
solution_types:
- name: exercise
prompt: |-
Create a detailed weekly workout routine based on the following '{{prompt}}': I want to gain muscle mass in the next six month . Instructions should have medium detail.
Answer a condensed valid JSON in this format: {{ json_example}} Do not explain or write anything else.
json_example: '{"plans":[{"day":"Monday","workouts":[{"title":"example workout","instructions":["example instruction"]}]} ,{"day":"Tuesday","workouts":[{"title":"example workout","instructions":["example instruction"]}]} ,{"day":"Wednesday","workouts":[{"title":"example workout","instructions":["example instruction"]}]} ,{"day":"Thursday","workouts":[{"title":"example workout","instructions":["example instruction"]}]} ,{"day":"Friday","workouts":[{"title":"example workout","instructions":["example instruction"]}]} ,{"day":"Saturday","workouts":[{"title":"example workout","instructions":["example instruction"]}]} ,{"day":"Sunday","workouts":[{"title":"example workout","instructions":["example instruction"]}]}]}'
chatbot:
categories:
- name: nutrition_chatbot
resources:
- type: 'pdf_file'
url: 'https://navalmanack.s3.amazonaws.com/Eric-Jorgenson_The-Almanack-of-Naval-Ravikant_Final.pdf'
- type: 'pdf_file'
url: 'https://navalmanack.s3.amazonaws.com/Eric-Jorgenson_The-Almanack-of-Naval-Ravikant_Final.pdf'
================================================
FILE: auth/auth.py
================================================
import os
import requests
from dotenv import load_dotenv
from fastapi import Depends, HTTPException
from starlette.status import HTTP_403_FORBIDDEN
from auth.cognito.JWTBearer import JWKS, JWTBearer, JWTAuthorizationCredentials
load_dotenv() # Automatically load environment variables from a '.env' file.
# jwks = JWKS.parse_obj(
# requests.get(
# f"https://cognito-idp.{os.environ.get('eu-west-1:46372257029')}.amazonaws.com/"
# f"{os.environ.get('eu-west-1_3VUqKzMgj')}/.well-known/jwks.json"
# ).json()
# )
# Construct the Cognito User Pool URL using the correct syntax
region = "eu-west-1"
user_pool_id = "eu-west-1_viUyNCqKp"
cognito_url = f"https://cognito-idp.{region}.amazonaws.com/{user_pool_id}/.well-known/jwks.json"
# Fetch the JWKS using the updated URL
jwks = JWKS.parse_obj(requests.get(cognito_url).json())
auth = JWTBearer(jwks)
async def get_current_user(
credentials: JWTAuthorizationCredentials = Depends(auth)
) -> str:
try:
return credentials.claims["username"]
except KeyError:
HTTPException(status_code=HTTP_403_FORBIDDEN, detail="Username missing")
================================================
FILE: auth/auth_utils.py
================================================
from cognito.JWTBearer import JWKS, JWTBearer, JWTAuthorizationCredentials
import requests
region = "eu-west-1"
user_pool_id = "" #needed
cognito_url = f"https://cognito-idp.{region}.amazonaws.com/{user_pool_id}/.well-known/jwks.json"
# Fetch the JWKS using the updated URL
jwks = JWKS.parse_obj(requests.get(cognito_url).json())
print(jwks)
auth = JWTBearer(jwks)
import requests
# Set the Cognito authentication endpoint URL
auth = JWTBearer(jwks)
# Set the user credentials
username = "" #needed
password = "" #needed
# Create the authentication payload
payload = {
"username": username,
"password": password
}
# Set the Cognito authentication endpoint URL
# Set the Cognito token endpoint URL
token_endpoint = f"https://your-cognito-domain.auth.{region}.amazoncognito.com/oauth2/token"
# Set the client credentials
client_id = "" #needed
client_secret = ""
import boto3
def authenticate_and_get_token(username: str, password: str,
user_pool_id: str, app_client_id: str) -> None:
client = boto3.client('cognito-idp')
resp = client.admin_initiate_auth(
UserPoolId=user_pool_id,
ClientId=app_client_id,
AuthFlow='ADMIN_NO_SRP_AUTH',
AuthParameters={
"USERNAME": username,
"PASSWORD": password
}
)
print("Log in success")
print("Access token:", resp['AuthenticationResult']['AccessToken'])
print("ID token:", resp['AuthenticationResult']['IdToken'])
authenticate_and_get_token(username, password, user_pool_id, client_id)
================================================
FILE: auth/cognito/JWTBearer.py
================================================
from typing import Dict, Optional, List
from fastapi import HTTPException
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from jose import jwt, jwk, JWTError
from jose.utils import base64url_decode
from pydantic import BaseModel
from starlette.requests import Request
from starlette.status import HTTP_403_FORBIDDEN
JWK = Dict[str, str]
class JWKS(BaseModel):
keys: List[JWK]
class JWTAuthorizationCredentials(BaseModel):
jwt_token: str
header: Dict[str, str]
claims: Dict[str, str]
signature: str
message: str
class JWTBearer(HTTPBearer):
def __init__(self, jwks: JWKS, auto_error: bool = True):
super().__init__(auto_error=auto_error)
self.kid_to_jwk = {jwk["kid"]: jwk for jwk in jwks.keys}
def verify_jwk_token(self, jwt_credentials: JWTAuthorizationCredentials) -> bool:
try:
public_key = self.kid_to_jwk[jwt_credentials.header["kid"]]
except KeyError:
raise HTTPException(
status_code=HTTP_403_FORBIDDEN, detail="JWK public key not found"
)
key = jwk.construct(public_key)
decoded_signature = base64url_decode(jwt_credentials.signature.encode())
return key.verify(jwt_credentials.message.encode(), decoded_signature)
async def __call__(self, request: Request) -> Optional[JWTAuthorizationCredentials]:
credentials: HTTPAuthorizationCredentials = await super().__call__(request)
if credentials:
if not credentials.scheme == "Bearer":
raise HTTPException(
status_code=HTTP_403_FORBIDDEN, detail="Wrong authentication method"
)
jwt_token = credentials.credentials
message, signature = jwt_token.rsplit(".", 1)
try:
jwt_credentials = JWTAuthorizationCredentials(
jwt_token=jwt_token,
header=jwt.get_unverified_header(jwt_token),
claims=jwt.get_unverified_claims(jwt_token),
signature=signature,
message=message,
)
except JWTError:
raise HTTPException(status_code=HTTP_403_FORBIDDEN, detail="JWK invalid")
if not self.verify_jwk_token(jwt_credentials):
raise HTTPException(status_code=HTTP_403_FORBIDDEN, detail="JWK invalid")
return jwt_credentials
================================================
FILE: bin/dockerize
================================================
set -euo pipefail
AWS_REGION=${region:-eu-west-1}
AWS_DEPLOYMENT_ACCOUNT=${account:-463722570299}
AWS_REPOSITORY=${repo:-"${AWS_DEPLOYMENT_ACCOUNT}.dkr.ecr.${AWS_REGION}.amazonaws.com"}
STAGE=${stage:-"dev"}
SHA_SHORT="$(git rev-parse --short HEAD)"
CUR_DATE="$(date +%Y%m%d%H%M%S)"
VERSION="$STAGE-$CUR_DATE-$SHA_SHORT"
IMAGE_NAME=${image_name:-promethai-${STAGE}-promethai-backend}
REPO_NAME="${AWS_REPOSITORY}/${IMAGE_NAME}"
FULL_IMAGE_NAME="${REPO_NAME}:${VERSION}"
APP_DIR=${app_dir:-"."}
PUBLISH=${publish:-false}
echo "Building docker image ${FULL_IMAGE_NAME} located in dir ${app_dir}"
pushd "${APP_DIR}" &&
docker buildx build --platform linux/amd64 \
--build-arg STAGE=${STAGE} \
-t "${FULL_IMAGE_NAME}" . &&
echo "${VERSION}" >/tmp/.DOCKER_IMAGE_VERSION &&
echo "Successfully built docker image ${FULL_IMAGE_NAME}"
if [ "${PUBLISH}" = true ]; then
echo "Pushing docker image ${FULL_IMAGE_NAME} to ECR repository to AWS account ${AWS_DEPLOYMENT_ACCOUNT}"
if [ "${PUBLISH}" = true ]; then
echo "logging in"
aws ecr get-login-password --region "${AWS_REGION}" | docker login --username AWS --password-stdin "${AWS_REPOSITORY}"
fi
docker push "${FULL_IMAGE_NAME}" &&
echo "Successfully pushed docker image ${FULL_IMAGE_NAME} to ECR repository"
fi
================================================
FILE: bots/__init__.py
================================================
================================================
FILE: bots/bot_extension.py
================================================
import sys
from typing import Optional
sys.path.append('../llm_chains')
# from embedchain import EmbedChain
from llm_chains.chains import Agent
from embedchain import App
class AppAgent(App, Agent):
def __init__(self, db=None, ef=None, table_name=None, user_id: Optional[str] = "676", session_id: Optional[str] = None):
Agent.__init__(self, table_name, user_id, session_id)
App.__init__(self, db)
# naval_chat_bot= AppAgent()
#
# naval_chat_bot.add("web_page", "https://nav.al/agi")
#
# # Embed Local Resources
# naval_chat_bot.add_local("qna_pair", (
# "Who is Naval Ravikant?", "Naval Ravikant is an Indian-American entrepreneur and investor."))
#
# naval_chat_bot.query(
# "What unique capacity does Naval argue humans possess when it comes to understanding explanations or concepts?")
================================================
FILE: bots/bot_loading_util.py
================================================
from bot_extension import AppAgent
import yaml
import sys
sys.path.append('../')
with open('../assistant_templates.yaml', 'r') as file:
data = yaml.safe_load(file)
def _load_extension( object_type:str, object_value:str):
naval_chat_bot = AppAgent()
naval_chat_bot.add(object_type, object_value)
# Directly access the 'chatbot' role
chatbot_categories = data['chatbot']['categories']
# Iterate through the categories and resources
for category in chatbot_categories:
for resource in category['resources']:
resource_type = resource['type']
resource_url = resource['url']
_load_extension(resource_type, resource_url)
================================================
FILE: bots/ff.yaml
================================================
Resources:
ChromaInstance:
Type: 'AWS::AccessAnalyzer::Analyzer'
Properties:
ImageId: !FindInMap
- Region2AMI
- !Ref 'AWS::Region'
- AMI
InstanceType: "t3.small"
UserData: !Base64
'Fn::Join':
- ''
- - |
Content-Type: multipart/mixed; boundary="//"
- |
MIME-Version: 1.0
- |+
- |
--//
- |
Content-Type: text/cloud-config; charset="us-ascii"
- |
MIME-Version: 1.0
- |
Content-Transfer-Encoding: 7bit
- |
Content-Disposition: attachment; filename="cloud-config.txt"
- |+
- |+
- |
#cloud-config
- |
cloud_final_modules:
- |
- [scripts-user, always]
- |+
- |+
- |
--//
- |
Content-Type: text/x-shellscript; charset="us-ascii"
- |
MIME-Version: 1.0
- |
Content-Transfer-Encoding: 7bit
- |
Content-Disposition: attachment; filename="userdata.txt"
- |+
- |+
- |
#!/bin/bash
- |
amazon-linux-extras install docker
- |
usermod -a -G docker ec2-user
- >
curl -L
https://github.com/docker/compose/releases/latest/download/docker-compose-$(uname
-s)-$(uname -m) -o /usr/local/bin/docker-compose
- |
chmod +x /usr/local/bin/docker-compose
- |
ln -s /usr/local/bin/docker-compose /usr/bin/docker-compose
- |
systemctl enable docker
- |
systemctl start docker
- |+
- |
cat << EOF > /home/ec2-user/docker-compose.yml
- |
version: '3.9'
- |+
- |
networks:
- |2
net:
- |2
driver: bridge
- |+
- |
services:
- |2
server:
- !Sub |2
image: ghcr.io/chroma-core/chroma:${ChromaVersion}
- |2
volumes:
- |2
- index_data:/index_data
- |2
environment:
- |2
- CHROMA_DB_IMPL=clickhouse
- |2
- CLICKHOUSE_HOST=clickhouse
- |2
- CLICKHOUSE_PORT=8123
- |2
ports:
- |2
- 8000:8000
- |2
depends_on:
- |2
- clickhouse
- |2
networks:
- |2
- net
- |+
- |2
clickhouse:
- |2
image: clickhouse/clickhouse-server:22.9-alpine
- |2
environment:
- |2
- ALLOW_EMPTY_PASSWORD=yes
- |2
- CLICKHOUSE_TCP_PORT=9000
- |2
- CLICKHOUSE_HTTP_PORT=8123
- |2
ports:
- |2
- '8123:8123'
- |2
- '9000:9000'
- |2
volumes:
- |2
- clickhouse_data:/bitnami/clickhouse
- |2
- backups:/backups
- |2
- ./config/backup_disk.xml:/etc/clickhouse-server/config.d/backup_disk.xml
- |2
- ./config/chroma_users.xml:/etc/clickhouse-server/users.d/chroma.xml
- |2
networks:
- |2
- net
- |+
- |
volumes:
- |2
clickhouse_data:
- |2
driver: local
- |2
index_data:
- |2
driver: local
- |2
backups:
- |2
driver: local
- |+
- |
EOF
- |+
- |
mkdir /home/ec2-user/config
- |+
- |
cat << EOF > /home/ec2-user/config/backup_disk.xml
- |
<clickhouse>
- |2
<storage_configuration>
- |2
<disks>
- |2
<backups>
- |2
<type>local</type>
- |2
<path>/etc/clickhouse-server/</path>
- |2
</backups>
- |2
</disks>
- |2
</storage_configuration>
- |2
<backups>
- |2
<allowed_disk>backups</allowed_disk>
- |2
<allowed_path>/etc/clickhouse-server/</allowed_path>
- |2
</backups>
- |
</clickhouse>
- |+
- |
EOF
- |+
- |
cat << EOF > /home/ec2-user/config/chroma_users.xml
- |
<clickhouse>
- |2
<profiles>
- |2
<default>
- |2
<allow_experimental_lightweight_delete>1</allow_experimental_lightweight_delete>
- |2
<mutations_sync>1</mutations_sync>
- |2
</default>
- |2
</profiles>
- |
</clickhouse>
- |+
- |
EOF
- |+
- |
docker-compose -f /home/ec2-user/docker-compose.yml up -d
- |+
- |
--//--
SecurityGroupIds:
- !Ref ChromaInstanceSecurityGroup
KeyName: !If
- HasKeyName
- !Ref KeyName
- !Ref 'AWS::NoValue'
BlockDeviceMappings:
- DeviceName: !FindInMap
- Region2AMI
- !Ref 'AWS::Region'
- RootDeviceName
Ebs:
VolumeSize: 24
================================================
FILE: bots/mam.tf
================================================
// Existing Terraform src code found at /var/folders/d_/1x0yyl7n5g5cc8vlgchdr06m0000gn/T/terraform_src.
================================================
FILE: docker-compose.yml
================================================
version: "3.9"
services:
promethai:
networks:
- agi_backend
depends_on:
- db_agi
- redis
build:
context: ./
volumes:
- "./:/app"
environment:
- HOST=0.0.0.0
profiles: ["exclude-from-up"] # Use `docker-compose run teenage-agi` to get an attached container
ports:
- 8000:8000
- 443:443
db_agi:
image: postgres:14-alpine
container_name: "agi_db"
networks:
- agi_backend
ports:
- "65432:5432"
environment:
POSTGRES_USER: master
POSTGRES_PASSWORD: supersecreto
POSTGRES_DB: agi_db
volumes:
- ./initdb:/docker-entrypoint-initdb.d
redis:
image: redis/redis-stack
container_name: "redis"
networks:
- agi_backend
ports:
- "6379:6379"
networks:
agi_backend:
name: agi_backend
================================================
FILE: entrypoint.sh
================================================
#!/bin/bash
export ENVIRONMENT
python fetch_secret.py
# Start Gunicorn
gunicorn -w 2 -k uvicorn.workers.UvicornWorker -t 120 --bind=0.0.0.0:8000 --bind=0.0.0.0:443 --log-level debug api:app
================================================
FILE: examples/level_1/level_1_pdf_vectorstore_dlt_etl.py
================================================
#Make sure to install the following packages: dlt, langchain, duckdb, python-dotenv, openai, weaviate-client
import dlt
from langchain import PromptTemplate, LLMChain
from langchain.chains.openai_functions import create_structured_output_chain
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader
import weaviate
import os
import json
import argparse
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import HumanMessagePromptTemplate, ChatPromptTemplate
from langchain.retrievers import WeaviateHybridSearchRetriever
from langchain.schema import Document, SystemMessage, HumanMessage
from langchain.vectorstores import Weaviate
import uuid
from dotenv import load_dotenv
load_dotenv()
from pathlib import Path
from langchain import OpenAI, LLMMathChain
import os
embeddings = OpenAIEmbeddings()
from deep_translator import (GoogleTranslator)
def _convert_pdf_to_document(path: str = None):
"""Convert a PDF document to a Document object"""
if path is None:
raise ValueError("A valid path to the document must be provided.")
loader = PyPDFLoader(path)
pages = loader.load_and_split()
print("PAGES", pages[0])
# Parse metadata from the folder path
path_parts = Path(path).parts
personal_receipts_index = path_parts.index("personal_receipts")
metadata_parts = path_parts[personal_receipts_index+1:]
documents = []
for page in pages:
translation = GoogleTranslator(source='auto', target='en').translate(text=page.page_content)
documents.append(
Document(
metadata={
"title": "Personal Receipt",
"country": metadata_parts[1],
"year": metadata_parts[0],
"author": str(uuid.uuid4()),
"source": "/".join(metadata_parts),
},
page_content=translation,
)
)
print(documents)
return documents
def _init_weaviate():
"""Initialize weaviate client and retriever"""
auth_config = weaviate.auth.AuthApiKey(api_key=os.environ.get('WEAVIATE_API_KEY'))
client = weaviate.Client(
url='https://my-vev-index-o4qitptw.weaviate.network',
auth_client_secret=auth_config,
additional_headers={
"X-OpenAI-Api-Key": os.environ.get('OPENAI_API_KEY')
}
)
retriever = WeaviateHybridSearchRetriever(
client=client,
index_name="PDFloader",
text_key="text",
attributes=[],
embedding=embeddings,
create_schema_if_missing=True,
)
return retriever
def load_to_weaviate(document_path=None):
"""Load documents to weaviate"""
retriever =_init_weaviate()
docs = _convert_pdf_to_document(document_path)
return retriever.add_documents(docs)
def get_from_weaviate(query=None, path=None, operator=None, valueText=None):
"""
Get documents from weaviate.
Args:
query (str): The query string.
path (list): The path for filtering, e.g., ['year'].
operator (str): The operator for filtering, e.g., 'Equal'.
valueText (str): The value for filtering, e.g., '2017*'.
Example:
get_from_weaviate(query="some query", path=['year'], operator='Equal', valueText='2017*')
"""
retriever = _init_weaviate()
# Initial retrieval without filters
output = retriever.get_relevant_documents(
query,
score=True,
)
# Apply filters if provided
if path or operator or valueText:
# Create the where_filter based on provided parameters
where_filter = {
'path': path if path else [],
'operator': operator if operator else '',
'valueText': valueText if valueText else ''
}
# Retrieve documents with filters applied
output = retriever.get_relevant_documents(
query,
score=True,
where_filter=where_filter
)
return output
def delete_from_weaviate(query=None, filters=None):
"""Delete documents from weaviate, pass dict as filters"""
""" {
'path': ['year'],
'operator': 'Equal',
'valueText': '2017*' }"""
auth_config = weaviate.auth.AuthApiKey(api_key=os.environ.get('WEAVIATE_API_KEY'))
client = weaviate.Client(
url='https://my-vev-index-o4qitptw.weaviate.network',
auth_client_secret=auth_config,
additional_headers={
"X-OpenAI-Api-Key": os.environ.get('OPENAI_API_KEY')
}
)
client.batch.delete_objects(
class_name='PDFloader',
# Same `where` filter as in the GraphQL API
where={
'path': ['year'],
'operator': 'Equal',
'valueText': '2017*'
},
)
return "Success"
llm = ChatOpenAI(
temperature=0.0,
max_tokens=1200,
openai_api_key=os.environ.get('OPENAI_API_KEY'),
model_name="gpt-4-0613",
)
def infer_schema_from_text(text: str):
"""Infer schema from text"""
prompt_ = """ You are a json schema master. Create a JSON schema based on the following data and don't write anything else: {prompt} """
complete_query = PromptTemplate(
input_variables=["prompt"],
template=prompt_,
)
chain = LLMChain(
llm=llm, prompt=complete_query, verbose=True
)
chain_result = chain.run(prompt=text).strip()
json_data = json.dumps(chain_result)
return json_data
def set_data_contract(data, version, date, agreement_id=None, privacy_policy=None, terms_of_service=None, format=None, schema_version=None, checksum=None, owner=None, license=None, validity_start=None, validity_end=None):
# Creating the generic data contract
data_contract = {
"version": version or "",
"date": date or "",
"agreement_id": agreement_id or "",
"privacy_policy": privacy_policy or "",
"terms_of_service": terms_of_service or "",
"format": format or "",
"schema_version": schema_version or "",
"checksum": checksum or "",
"owner": owner or "",
"license": license or "",
"validity_start": validity_start or "",
"validity_end": validity_end or "",
"properties": data # Adding the given data under the "properties" field
}
return data_contract
def create_id_dict(memory_id=None, st_memory_id=None, buffer_id=None):
"""
Create a dictionary containing IDs for memory, st_memory, and buffer.
Args:
memory_id (str): The Memory ID.
st_memory_id (str): The St_memory ID.
buffer_id (str): The Buffer ID.
Returns:
dict: A dictionary containing the IDs.
"""
id_dict = {
"memoryID": memory_id or "",
"st_MemoryID": st_memory_id or "",
"bufferID": buffer_id or ""
}
return id_dict
def init_buffer(data, version, date, memory_id=None, st_memory_id=None, buffer_id=None, agreement_id=None, privacy_policy=None, terms_of_service=None, format=None, schema_version=None, checksum=None, owner=None, license=None, validity_start=None, validity_end=None, text=None, process=None):
# Create ID dictionary
id_dict = create_id_dict(memory_id, st_memory_id, buffer_id)
# Set data contract
data_contract = set_data_contract(data, version, date, agreement_id, privacy_policy, terms_of_service, format, schema_version, checksum, owner, license, validity_start, validity_end)
# Add ID dictionary to properties
data_contract["properties"]["relations"] = id_dict
# Infer schema from text and add to properties
if text:
schema = infer_schema_from_text(text)
data_contract["properties"]["schema"] = schema
if process:
data_contract["properties"]["process"] = process
return data_contract
def infer_properties_from_text(text: str):
"""Infer schema properties from text"""
prompt_ = """ You are a json index master. Create a short JSON index containing the most important data and don't write anything else: {prompt} """
complete_query = PromptTemplate(
input_variables=["prompt"],
template=prompt_,
)
chain = LLMChain(
llm=llm, prompt=complete_query, verbose=True
)
chain_result = chain.run(prompt=text).strip()
# json_data = json.dumps(chain_result)
return chain_result
#
#
# # print(infer_schema_from_text(output[0].page_content))
def load_json_or_infer_schema(file_path, document_path):
"""Load JSON schema from file or infer schema from text"""
try:
# Attempt to load the JSON file
with open(file_path, 'r') as file:
json_schema = json.load(file)
return json_schema
except FileNotFoundError:
# If the file doesn't exist, run the specified function
output = _convert_pdf_to_document(path=document_path)
json_schema = infer_schema_from_text(output[0].page_content)
return json_schema
def ai_function(prompt=None, json_schema=None):
"""AI function to convert unstructured data to structured data"""
# Here we define the user prompt and the structure of the output we desire
# prompt = output[0].page_content
prompt_msgs = [
SystemMessage(
content="You are a world class algorithm converting unstructured data into structured data."
),
HumanMessage(content="Convert unstructured data to structured data:"),
HumanMessagePromptTemplate.from_template("{input}"),
HumanMessage(content="Tips: Make sure to answer in the correct format"),
]
prompt_ = ChatPromptTemplate(messages=prompt_msgs)
chain = create_structured_output_chain(json_schema , prompt=prompt_, llm=llm, verbose=True)
output = chain.run(input = prompt, llm=llm)
yield output
# Define a base directory if you have one; this could be the directory where your script is located
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
def higher_level_thinking():
"""Higher level thinking function to calculate the sum of the price of the tickets from these documents"""
docs_data = get_from_weaviate(query="Train", path=['year'], operator='Equal', valueText='2017*')
str_docs_data = str(docs_data)
llm_math = LLMMathChain.from_llm(llm, verbose=True)
output = llm_math.run(f"Calculate the sum of the price of the tickets from these documents: {str_docs_data}")
# data_format = init_buffer(data=output, version="0.0.1", date="2021-09-01")
yield output
result_higher_level_thinking = higher_level_thinking()
def process_higher_level_thinking(result=None):
data_format = init_buffer(data=result, version="0.0.1", date="2021-09-01")
import json
data_format=json.dumps(data_format)
yield data_format
document_paths = [
os.path.join(BASE_DIR, "personal_receipts", "2017", "de", "public_transport", "3ZCCCW.pdf"),
os.path.join(BASE_DIR, "personal_receipts", "2017", "de", "public_transport", "4GBEC9.pdf")
]
def main(raw_loading, processed_loading,document_paths):
BASE_DIR = os.getcwd() # Assuming the current working directory is where the data_processing_script.py is located
def format_document_paths(base_dir, path):
# Split the input path and extract the elements
elements = path.strip("/").split("/")
# Construct the document_paths list
document_paths = [os.path.join(base_dir, *elements)]
return document_paths
document_paths_ =[format_document_paths(BASE_DIR, path) for path in document_paths][0]
print(document_paths)
if raw_loading:
for document in document_paths_:
file_path = os.path.join(BASE_DIR, "ticket_schema.json")
json_schema = load_json_or_infer_schema(file_path, document)
output = _convert_pdf_to_document(path=document)
find_data_in_store = get_from_weaviate(query="Train", path=['year'], operator='Equal', valueText='2017*')
if find_data_in_store:
output = find_data_in_store
print(output[1])
else:
load_to_weaviate(document)
pipeline = dlt.pipeline(pipeline_name="train_ticket", destination='duckdb', dataset_name='train_ticket_data')
info = pipeline.run(data=ai_function(output[0].page_content, json_schema))
print(info)
elif processed_loading:
pipeline_processed = dlt.pipeline(pipeline_name="train_ticket_processed", destination='duckdb',
dataset_name='train_ticket_processed_data')
info = pipeline_processed.run(data=higher_level_thinking())
print(info)
else:
print("Please specify either '--raw_loading' or '--processed_loading' option.")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Data Processing Script")
parser.add_argument("--raw_loading", action="store_true", help="Load raw document data and perform AI tasks")
parser.add_argument("--processed_loading", action="store_true",
help="Load processed data and run higher-level thinking AI function")
parser.add_argument("document_paths", nargs="*", help="Paths to the documents to process")
args = parser.parse_args()
main(args.raw_loading, args.processed_loading, args.document_paths)
#to run: python3 level_1_pdf_vectorstore_dlt_etl.py --raw_loading "/personal_receipts/2017/de/public_transport/3ZCCCW.pdf"
================================================
FILE: examples/level_1/ticket_schema.json
================================================
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {
"ticketType": {
"type": "string",
"enum": ["online ticket", "ICE ticket"]
},
"departureDate": {
"type": "string",
"format": "date"
},
"priceType": {
"type": "string",
"enum": ["Flex price (single journey)"]
},
"class": {
"type": "integer",
"enum": [1]
},
"adult": {
"type": "object",
"properties": {
"quantity": {
"type": "integer"
},
"BC50": {
"type": "integer"
}
},
"required": ["quantity", "BC50"]
},
"journey": {
"type": "object",
"properties": {
"from": {
"type": "string"
},
"to": {
"type": "string"
},
"via": {
"type": "string"
},
"train": {
"type": "string",
"enum": ["ICE"]
}
},
"required": ["from", "to", "via", "train"]
},
"refundPolicy": {
"type": "string"
},
"payment": {
"type": "object",
"properties": {
"items": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"quantity": {
"type": "integer"
},
"price": {
"type": "number"
},
"vat19": {
"type": "number"
},
"vat7": {
"type": "number"
}
},
"required": ["name", "quantity", "price", "vat19", "vat7"]
}
},
"total": {
"type": "number"
},
"method": {
"type": "string",
"enum": ["credit card"]
},
"transactionDetails": {
"type": "object",
"properties": {
"amount": {
"type": "number"
},
"VUNumber": {
"type": "integer"
},
"transactionNumber": {
"type": "integer"
},
"date": {
"type": "string",
"format": "date"
},
"genNumber": {
"type": "string"
}
},
"required": ["amount", "VUNumber", "transactionNumber", "date", "genNumber"]
}
},
"required": ["items", "total", "method", "transactionDetails"]
},
"bookingDetails": {
"type": "object",
"properties": {
"bookingDate": {
"type": "string",
"format": "date-time"
},
"bookingAddress": {
"type": "string"
},
"taxNumber": {
"type": "string"
}
},
"required": ["bookingDate", "bookingAddress", "taxNumber"]
},
"journeyDetails": {
"type": "object",
"properties": {
"validFrom": {
"type": "string",
"format": "date"
},
"passengerName": {
"type": "string"
},
"orderNumber": {
"type": "string"
},
"stops": {
"type": "array",
"items": {
"type": "object",
"properties": {
"stop": {
"type": "string"
},
"date": {
"type": "string",
"format": "date"
},
"time": {
"type": "string",
"format": "time"
},
"track": {
"type": "integer"
},
"product": {
"type": "string"
},
"reservation": {
"type": "string"
}
},
"required": ["stop", "date", "time", "track", "product", "reservation"]
}
}
},
"required": ["validFrom", "passengerName", "orderNumber", "stops"]
},
"usageNotes": {
"type": "string"
}
},
"required": ["ticketType", "departureDate", "priceType", "class", "adult", "journey", "refundPolicy", "payment", "bookingDetails", "journeyDetails", "usageNotes"]
}
================================================
FILE: examples/level_2/Dockerfile
================================================
FROM python:3.11-slim
# Set build argument
ARG API_ENABLED
# Set environment variable based on the build argument
ENV API_ENABLED=${API_ENABLED} \
PIP_NO_CACHE_DIR=true
ENV PATH="${PATH}:/root/.poetry/bin"
RUN pip install poetry
WORKDIR /app
COPY pyproject.toml poetry.lock /app/
# Install the dependencies
RUN poetry config virtualenvs.create false && \
poetry install --no-root --no-dev
RUN apt-get update -q && \
apt-get install curl zip jq netcat-traditional -y -q
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
unzip -qq awscliv2.zip && ./aws/install && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
#RUN playwright install
#RUN playwright install-deps
WORKDIR /app
COPY . /app
COPY entrypoint.sh /app/entrypoint.sh
RUN chmod +x /app/entrypoint.sh
ENTRYPOINT ["/app/entrypoint.sh"]
================================================
FILE: examples/level_2/Readme.md
================================================
## PromethAI Memory Manager
### Description
Initial code lets you do three operations:
1. Add to memory
2. Retrieve from memory
3. Structure the data to schema and load to duckdb
#How to use
## Installation
```docker compose build promethai_mem ```
## Run
```docker compose up promethai_mem ```
## Usage
The fast API endpoint accepts prompts and PDF files and returns a JSON object with the generated text.
```curl
-X POST
-F "prompt=The quick brown fox"
-F "file=@/path/to/file.pdf"
http://localhost:8000/generate/
```
================================================
FILE: examples/level_2/api.py
================================================
from langchain.document_loaders import PyPDFLoader
from level_2_pdf_vectorstore__dlt_contracts import ShortTermMemory
from fastapi import FastAPI
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from typing import Dict, Any
import re
import json
import logging
import os
import uvicorn
from fastapi import Request
import yaml
from fastapi import HTTPException
from fastapi import FastAPI, UploadFile, File
from typing import List
from level_2_pdf_vectorstore__dlt_contracts import ShortTermMemory
# Set up logging
logging.basicConfig(
level=logging.INFO, # Set the logging level (e.g., DEBUG, INFO, WARNING, ERROR, CRITICAL)
format="%(asctime)s [%(levelname)s] %(message)s", # Set the log message format
)
logger = logging.getLogger(__name__)
from dotenv import load_dotenv
load_dotenv()
app = FastAPI(debug=True)
from fastapi import Depends
class Payload(BaseModel):
payload: str
class ImageResponse(BaseModel):
success: bool
message: str
@app.get("/", )
async def root():
"""
Root endpoint that returns a welcome message.
"""
return {"message": "Hello, World, I am alive!"}
@app.get("/health")
def health_check():
"""
Health check endpoint that returns the server status.
"""
return {"status": "OK"}
#curl -X POST -H "Content-Type: application/json" -d '{"data": "YourPayload"}' -F "files=@/path/to/your/pdf/file.pdf" http://127.0.0.1:8000/upload/
@app.post("/upload/")
async def upload_pdf_and_payload(
payload: Payload,
files: List[UploadFile] = File(...),
):
try:
# Process the payload
payload_data = payload.payload
decoded_payload = json.loads(payload_data)
# Process each uploaded PDF file
results = []
for file in files:
contents = await file.read()
tmp_location = os.path.join('/tmp', "tmp.pdf")
with open(tmp_location, 'wb') as tmp_file:
tmp_file.write(contents)
loader = PyPDFLoader(tmp_location)
pages = loader.load_and_split()
stm = ShortTermMemory( user_id=decoded_payload['user_id'])
stm.episodic_buffer.main_buffer(prompt=decoded_payload['prompt'], pages=pages)
# Here you can perform your processing on the PDF contents
results.append({"filename": file.filename, "size": len(contents)})
return {"message": "Upload successful", "results": results}
except Exception as e:
return {"error": str(e)}
# @app.post("/clear-cache", response_model=dict)
# async def clear_cache(request_data: Payload) -> dict:
# """
# Endpoint to clear the cache.
#
# Parameters:
# request_data (Payload): The request data containing the user and session IDs.
#
# Returns:
# dict: A dictionary with a message indicating the cache was cleared.
# """
# json_payload = request_data.payload
# agent = Agent()
# agent.set_user_session(json_payload["user_id"], json_payload["session_id"])
# try:
# agent.clear_cache()
# return JSONResponse(content={"response": "Cache cleared"}, status_code=200)
# except Exception as e:
# raise HTTPException(status_code=500, detail=str(e))
#
# @app.post("/correct-prompt-grammar", response_model=dict)
# async def prompt_to_correct_grammar(request_data: Payload) -> dict:
# json_payload = request_data.payload
# agent = Agent()
# agent.set_user_session(json_payload["user_id"], json_payload["session_id"])
# logging.info("Correcting grammar %s", json_payload["prompt_source"])
#
# output = agent.prompt_correction(json_payload["prompt_source"], model_speed= json_payload["model_speed"])
# return JSONResponse(content={"response": {"result": json.loads(output)}})
# @app.post("/action-add-zapier-calendar-action", response_model=dict,dependencies=[Depends(auth)])
# async def action_add_zapier_calendar_action(
# request: Request, request_data: Payload
# ) -> dict:
# json_payload = request_data.payload
# agent = Agent()
# agent.set_user_session(json_payload["user_id"], json_payload["session_id"])
# # Extract the bearer token from the header
# auth_header = request.headers.get("Authorization")
# if auth_header:
# bearer_token = auth_header.replace("Bearer ", "")
# else:
# bearer_token = None
# outcome = agent.add_zapier_calendar_action(
# prompt_base=json_payload["prompt_base"],
# token=bearer_token,
# model_speed=json_payload["model_speed"],
# )
# return JSONResponse(content={"response": outcome})
def start_api_server(host: str = "0.0.0.0", port: int = 8000):
"""
Start the API server using uvicorn.
Parameters:
host (str): The host for the server.
port (int): The port for the server.
"""
try:
logger.info(f"Starting server at {host}:{port}")
uvicorn.run(app, host=host, port=port)
except Exception as e:
logger.exception(f"Failed to start server: {e}")
# Here you could add any cleanup code or error recovery code.
if __name__ == "__main__":
start_api_server()
================================================
FILE: examples/level_2/docker-compose.yml
================================================
version: "3.9"
services:
promethai_mem:
networks:
- promethai_mem_backend
build:
context: ./
volumes:
- "./:/app"
environment:
- HOST=0.0.0.0
profiles: ["exclude-from-up"] # Use `docker-compose run teenage-agi` to get an attached container
ports:
- 8000:8000
- 443:443
networks:
promethai_mem_backend:
name: promethai_mem_backend
================================================
FILE: examples/level_2/entrypoint.sh
================================================
#!/bin/bash
export ENVIRONMENT
#python fetch_secret.py
# Start Gunicorn
gunicorn -w 2 -k uvicorn.workers.UvicornWorker -t 120 --bind=0.0.0.0:8000 --bind=0.0.0.0:443 --log-level debug api:app
================================================
FILE: examples/level_2/level_2_pdf_vectorstore__dlt_contracts.py
================================================
#Make sure to install the following packages: dlt, langchain, duckdb, python-dotenv, openai, weaviate-client
import dlt
from langchain import PromptTemplate, LLMChain
from langchain.agents import initialize_agent, AgentType
from langchain.chains.openai_functions import create_structured_output_chain
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader
import weaviate
import os
import json
from marvin import ai_classifier
from enum import Enum
import marvin
import asyncio
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import HumanMessagePromptTemplate, ChatPromptTemplate
from langchain.retrievers import WeaviateHybridSearchRetriever
from langchain.schema import Document, SystemMessage, HumanMessage
from langchain.tools import tool
from langchain.vectorstores import Weaviate
import uuid
from dotenv import load_dotenv
load_dotenv()
from pathlib import Path
from langchain import OpenAI, LLMMathChain
import os
from datetime import datetime
import os
from datetime import datetime
from jinja2 import Template
from langchain import PromptTemplate, LLMChain
from langchain.chains.openai_functions import create_structured_output_chain
from langchain.prompts import HumanMessagePromptTemplate, ChatPromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
import pinecone
from langchain.vectorstores import Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
from pydantic import BaseModel, Field
from dotenv import load_dotenv
from langchain.schema import Document, SystemMessage, HumanMessage
from langchain.vectorstores import Weaviate
import weaviate
import uuid
load_dotenv()
class VectorDB:
def __init__(self, user_id: str, index_name: str, memory_id:str, ltm_memory_id:str='00000', st_memory_id:str='0000', buffer_id:str='0000', db_type: str = "pinecone", namespace:str = None):
self.user_id = user_id
self.index_name = index_name
self.db_type = db_type
self.namespace=namespace
self.memory_id = memory_id
self.ltm_memory_id = ltm_memory_id
self.st_memory_id = st_memory_id
self.buffer_id = buffer_id
# if self.db_type == "pinecone":
# self.vectorstore = self.init_pinecone(self.index_name)
if self.db_type == "weaviate":
self.init_weaviate(namespace=self.namespace)
else:
raise ValueError(f"Unsupported database type: {db_type}")
def init_pinecone(self, index_name):
load_dotenv()
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY", "")
PINECONE_API_ENV = os.getenv("PINECONE_API_ENV", "")
pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_API_ENV)
pinecone.Index(index_name)
vectorstore: Pinecone = Pinecone.from_existing_index(
index_name=self.index_name,
embedding=OpenAIEmbeddings(),
namespace='RESULT'
)
return vectorstore
def init_weaviate(self, namespace:str):
embeddings = OpenAIEmbeddings()
auth_config = weaviate.auth.AuthApiKey(api_key=os.environ.get('WEAVIATE_API_KEY'))
client = weaviate.Client(
url=os.environ.get('WEAVIATE_URL'),
auth_client_secret=auth_config,
additional_headers={
"X-OpenAI-Api-Key": os.environ.get('OPENAI_API_KEY')
}
)
retriever = WeaviateHybridSearchRetriever(
client=client,
index_name=namespace,
text_key="text",
attributes=[],
embedding=embeddings,
create_schema_if_missing=True,
)
return retriever
def add_memories(self, observation: str, page: str = "", source: str = ""):
if self.db_type == "pinecone":
# Update Pinecone memories here
vectorstore: Pinecone = Pinecone.from_existing_index(
index_name=self.index_name, embedding=OpenAIEmbeddings(), namespace=self.namespace
)
retriever = vectorstore.as_retriever()
retriever.add_documents(
[
Document(
page_content=observation,
metadata={
"inserted_at": datetime.now(),
"text": observation,
"user_id": self.user_id,
"page": page,
"source": source
},
namespace=self.namespace,
)
]
)
elif self.db_type == "weaviate":
# Update Weaviate memories here
retriever = self.init_weaviate( self.namespace)
return retriever.add_documents([
Document(
metadata={
"inserted_at": str(datetime.now()),
"text": observation,
"user_id": str(self.user_id),
"memory_id": str(self.memory_id),
"ltm_memory_id": str(self.ltm_memory_id),
"st_memory_id": str(self.st_memory_id),
"buffer_id": str(self.buffer_id),
# **source_metadata,
},
page_content=observation,
)]
)
# def get_pinecone_vectorstore(self, namespace: str) -> pinecone.VectorStore:
# return Pinecone.from_existing_index(
# index_name=self.index, embedding=OpenAIEmbeddings(), namespace=namespace
# )
def fetch_memories(self, observation: str, params = None):
if self.db_type == "pinecone":
# Fetch Pinecone memories here
pass
elif self.db_type == "weaviate":
# Fetch Weaviate memories here
"""
Get documents from weaviate.
Args a json containing:
query (str): The query string.
path (list): The path for filtering, e.g., ['year'].
operator (str): The operator for filtering, e.g., 'Equal'.
valueText (str): The value for filtering, e.g., '2017*'.
Example:
get_from_weaviate(query="some query", path=['year'], operator='Equal', valueText='2017*')
"""
retriever = self.init_weaviate(self.namespace)
print(self.namespace)
print(str(datetime.now()))
# Retrieve documents with filters applied
output = retriever.get_relevant_documents(
observation,
score=True,
where_filter=params
)
return output
def delete_memories(self, params: None):
auth_config = weaviate.auth.AuthApiKey(api_key=os.environ.get('WEAVIATE_API_KEY'))
client = weaviate.Client(
url=os.environ.get('WEAVIATE_API_KEY'),
auth_client_secret=auth_config,
additional_headers={
"X-OpenAI-Api-Key": os.environ.get('OPENAI_API_KEY')
}
)
client.batch.delete_objects(
class_name=self.namespace,
# Same `where` filter as in the GraphQL API
where=params,
)
def update_memories(self):
pass
class SemanticMemory:
def __init__(self, user_id: str, memory_id:str, ltm_memory_id:str, index_name: str, db_type:str="weaviate", namespace:str="SEMANTICMEMORY"):
# Add any semantic memory-related attributes or setup here
self.user_id=user_id
self.index_name = index_name
self.namespace = namespace
self.semantic_memory_id = str(uuid.uuid4())
self.memory_id = memory_id
self.ltm_memory_id = ltm_memory_id
self.vector_db = VectorDB(user_id=user_id, memory_id= self.memory_id, ltm_memory_id = self.ltm_memory_id, index_name=index_name, db_type=db_type, namespace=self.namespace)
self.db_type = db_type
def _update_memories(self ,memory_id:str="None", semantic_memory: str="None") -> None:
"""Update semantic memory for the user"""
if self.db_type == "weaviate":
self.vector_db.add_memories( observation = semantic_memory)
elif self.db_type == "pinecone":
pass
def _fetch_memories(self, observation: str,params) -> dict[str, str] | str:
"""Fetch related characteristics, preferences or dislikes for a user."""
# self.init_pinecone(index_name=self.index)
if self.db_type == "weaviate":
return self.vector_db.fetch_memories(observation, params)
elif self.db_type == "pinecone":
pass
class LongTermMemory:
def __init__(self, user_id: str = "676", memory_id:str=None, index_name: str = None, namespace:str=None, db_type:str="weaviate"):
self.user_id = user_id
self.memory_id = memory_id
self.ltm_memory_id = str(uuid.uuid4())
self.index_name = index_name
self.namespace = namespace
self.db_type = db_type
# self.episodic_memory = EpisodicMemory()
self.semantic_memory = SemanticMemory(user_id = self.user_id, memory_id=self.memory_id, ltm_memory_id = self.ltm_memory_id, index_name=self.index_name, db_type=self.db_type)
class ShortTermMemory:
def __init__(self, user_id: str = "676", memory_id:str=None, index_name: str = None, namespace:str=None, db_type:str="weaviate"):
# Add any short-term memory-related attributes or setup here
self.user_id = user_id
self.memory_id = memory_id
self.namespace = namespace
self.db_type = db_type
self.stm_memory_id = str(uuid.uuid4())
self.index_name = index_name
self.episodic_buffer = EpisodicBuffer(user_id=self.user_id, memory_id=self.memory_id, index_name=self.index_name, db_type=self.db_type)
class EpisodicBuffer:
def __init__(self, user_id: str = "676", memory_id:str=None, index_name: str = None, namespace:str='EPISODICBUFFER', db_type:str="weaviate"):
# Add any short-term memory-related attributes or setup here
self.user_id = user_id
self.memory_id = memory_id
self.namespace = namespace
self.db_type = db_type
self.st_memory_id = "blah"
self.index_name = index_name
self.llm= ChatOpenAI(
temperature=0.0,
max_tokens=1200,
openai_api_key=os.environ.get('OPENAI_API_KEY'),
model_name="gpt-4-0613",
)
# self.vector_db = VectorDB(user_id=user_id, memory_id= self.memory_id, st_memory_id = self.st_memory_id, index_name=index_name, db_type=db_type, namespace=self.namespace)
def _context_filter(self, context: str):
"""Filters the context for the buffer"""
prompt = PromptTemplate.from_template(
""" Based on the {CONTEXT} of {user_id} choose events that are relevant"""
)
return
def _compute_weights(self, context: str):
"""Computes the weights for the buffer"""
pass
def _temporal_weighting(self, context: str):
"""Computes the temporal weighting for the buffer"""
pass
async def infer_schema_from_text(self, text: str):
"""Infer schema from text"""
prompt_ = """ You are a json schema master. Create a JSON schema based on the following data and don't write anything else: {prompt} """
complete_query = PromptTemplate(
input_variables=["prompt"],
template=prompt_,
)
chain = LLMChain(
llm=self.llm, prompt=complete_query, verbose=True
)
chain_result = chain.run(prompt=text).strip()
json_data = json.dumps(chain_result)
return json_data
def main_buffer(self, user_input=None):
"""AI function to convert unstructured data to structured data"""
# Here we define the user prompt and the structure of the output we desire
# prompt = output[0].page_content
class PromptWrapper(BaseModel):
observation: str = Field(
description="observation we want to fetch from vectordb"
)\
# ,
# json_schema: str = Field(description="json schema we want to infer")
@tool("convert_to_structured", args_schema=PromptWrapper, return_direct=True)
def convert_to_structured( observation=None, json_schema=None):
"""Convert unstructured data to structured data"""
BASE_DIR = os.getcwd()
json_path = os.path.join(BASE_DIR, "schema_registry", "ticket_schema.json")
def load_json_or_infer_schema(file_path, document_path):
"""Load JSON schema from file or infer schema from text"""
# Attempt to load the JSON file
with open(file_path, 'r') as file:
json_schema = json.load(file)
return json_schema
json_schema =load_json_or_infer_schema(json_path, None)
def run_open_ai_mapper(observation=None, json_schema=None):
"""Convert unstructured data to structured data"""
prompt_msgs = [
SystemMessage(
content="You are a world class algorithm converting unstructured data into structured data."
),
HumanMessage(content="Convert unstructured data to structured data:"),
HumanMessagePromptTemplate.from_template("{input}"),
HumanMessage(content="Tips: Make sure to answer in the correct format"),
]
prompt_ = ChatPromptTemplate(messages=prompt_msgs)
chain_funct = create_structured_output_chain(json_schema, prompt=prompt_, llm=self.llm, verbose=True)
output = chain_funct.run(input=observation, llm=self.llm)
yield output
pipeline = dlt.pipeline(pipeline_name="train_ticket", destination='duckdb', dataset_name='train_ticket_data')
info = pipeline.run(data=run_open_ai_mapper(prompt, json_schema))
return print(info)
class GoalWrapper(BaseModel):
observation: str = Field(
description="observation we want to fetch from vectordb"
)
@tool("fetch_memory_wrapper", args_schema=GoalWrapper, return_direct=True)
def fetch_memory_wrapper(observation, args_schema=GoalWrapper):
"""Fetches data from the VectorDB and returns it as a python dictionary."""
print("HELLO, HERE IS THE OBSERVATION: ", observation)
marvin.settings.openai.api_key = os.environ.get('OPENAI_API_KEY')
@ai_classifier
class MemoryRoute(Enum):
"""Represents distinct routes for different memory types."""
storage_of_documents_and_knowledge_to_memory = "SEMANTICMEMORY"
raw_information_currently_processed_in_short_term_memory = "EPISODICBUFFER"
raw_information_kept_in_short_term_memory = "SHORTTERMMEMORY"
long_term_recollections_of_past_events_and_emotions = "EPISODICMEMORY"
namespace= MemoryRoute(observation)
vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
index_name=self.index_name, db_type=self.db_type, namespace=namespace.value)
query = vector_db.fetch_memories(observation)
return query
class UpdatePreferences(BaseModel):
observation: str = Field(
description="observation we want to fetch from vectordb"
)
@tool("add_memories_wrapper", args_schema=UpdatePreferences, return_direct=True)
def add_memories_wrapper(observation, args_schema=UpdatePreferences):
"""Updates user preferences in the VectorDB."""
@ai_classifier
class MemoryRoute(Enum):
"""Represents distinct routes for different memory types."""
storage_of_documents_and_knowledge_to_memory = "SEMANTICMEMORY"
raw_information_currently_processed_in_short_term_memory = "EPISODICBUFFER"
raw_information_kept_in_short_term_memory = "SHORTTERMMEMORY"
long_term_recollections_of_past_events_and_emotions = "EPISODICMEMORY"
namespace= MemoryRoute(observation)
print("HELLO, HERE IS THE OBSERVATION 2: ")
vector_db = VectorDB(user_id=self.user_id, memory_id=self.memory_id, st_memory_id=self.st_memory_id,
index_name=self.index_name, db_type=self.db_type, namespace=namespace.value)
return vector_db.add_memories(observation)
agent = initialize_agent(
llm=self.llm,
tools=[convert_to_structured,fetch_memory_wrapper, add_memories_wrapper],
agent=AgentType.OPENAI_FUNCTIONS,
verbose=True,
)
prompt = """
Based on all the history and information of this user, decide based on user query query: {query} which of the following tasks needs to be done:
1. Memory retrieval , 2. Memory update, 3. Convert data to structured If the query is not any of these, then classify it as 'Other'
Return the result in format: 'Result_type': 'Goal', "Original_query": "Original query"
"""
# template = Template(prompt)
# output = template.render(query=user_input)
# complete_query = output
complete_query = PromptTemplate(
input_variables=["query"], template=prompt
)
summary_chain = LLMChain(
llm=self.llm, prompt=complete_query, verbose=True
)
from langchain.chains import SimpleSequentialChain
overall_chain = SimpleSequentialChain(
chains=[summary_chain, agent], verbose=True
)
output = overall_chain.run(user_input)
return output
#DEFINE STM
#DEFINE LTM
class Memory:
load_dotenv()
def __init__(self, user_id: str = "676", index_name: str = None, knowledge_source: str = None,
knowledge_type: str = None, db_type:str="weaviate", namespace:str=None) -> None:
self.user_id = user_id
self.index_name = index_name
self.db_type = db_type
self.knowledge_source = knowledge_source
self.knowledge_type = knowledge_type
self.memory_id = str(uuid.uuid4())
self.long_term_memory = LongTermMemory(user_id=self.user_id, memory_id=self.memory_id, index_name=index_name,
namespace=namespace, db_type=self.db_type)
self.short_term_memory = ShortTermMemory(user_id=self.user_id, memory_id=self.memory_id, index_name=index_name, db_type=self.db_type)
def _update_semantic_memory(self, semantic_memory:str):
return self.long_term_memory.semantic_memory._update_memories(
memory_id=self.memory_id,
semantic_memory=semantic_memory
)
def _fetch_semantic_memory(self, observation, params):
return self.long_term_memory.semantic_memory._fetch_memories(
observation=observation, params=params
)
def _run_buffer(self, user_input:str):
return self.short_term_memory.episodic_buffer.main_buffer(user_input=user_input)
if __name__ == "__main__":
namespace = "gggg"
agent = Memory(index_name="my-agent", user_id='555' )
#bb = agent._update_semantic_memory(semantic_memory="Users core summary")
# bb = agent._fetch_semantic_memory(observation= "Users core summary", params = {
# "path": ["inserted_at"],
# "operator": "Equal",
# "valueText": "*2023*"
# })
buffer = agent._run_buffer(user_input="I want to get a schema for my data")
# print(bb)
# rrr = {
# "path": ["year"],
# "operator": "Equal",
# "valueText": "2017*"
# }
================================================
FILE: examples/level_2/pyproject.toml
================================================
[tool.poetry]
name = "PromethAI_memory"
version = "0.1.0"
description = "PromethAI memory manager"
authors = ["Vasilije Markovic"]
readme = "README.md"
[tool.poetry.dependencies]
python = "^3.10"
#langchain = {git = "https://github.com/topoteretes/langchain.git" , tag = "v0.0.209"}
langchain = "v0.0.250"
nltk = "3.8.1"
openai = "0.27.8"
pinecone-client = "2.2.2"
python-dotenv = "1.0.0"
pyyaml = "6.0"
fastapi = "0.98.0"
uvicorn = "0.22.0"
googlemaps = "4.10.0"
jinja2 = "3.1.2"
replicate = "^0.8.4"
pexpect = "^4.8.0"
selenium = "^4.9.0"
playwright = "^1.32.1"
pytest-playwright = "^0.3.3"
boto3 = "^1.26.125"
gptcache = "^0.1.22"
redis = "^4.5.5"
gunicorn = "^20.1.0"
tiktoken = "^0.4.0"
google-search-results = "^2.4.2"
spacy = "^3.5.3"
python-jose = "^3.3.0"
pypdf = "^3.12.0"
fastjsonschema = "^2.18.0"
marvin = "^1.3.0"
dlt = "^0.3.8"
weaviate-client = "^3.22.1"
python-multipart = "^0.0.6"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
================================================
FILE: examples/level_2/schema_registry/ticket_schema.json
================================================
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {
"ticketType": {
"type": "string",
"enum": ["online ticket", "ICE ticket"]
},
"departureDate": {
"type": "string",
"format": "date"
},
"priceType": {
"type": "string",
"enum": ["Flex price (single journey)"]
},
"class": {
"type": "integer",
"enum": [1]
},
"adult": {
"type": "object",
"properties": {
"quantity": {
"type": "integer"
},
"BC50": {
"type": "integer"
}
},
"required": ["quantity", "BC50"]
},
"journey": {
"type": "object",
"properties": {
"from": {
"type": "string"
},
"to": {
"type": "string"
},
"via": {
"type": "string"
},
"train": {
"type": "string",
"enum": ["ICE"]
}
},
"required": ["from", "to", "via", "train"]
},
"refundPolicy": {
"type": "string"
},
"payment": {
"type": "object",
"properties": {
"items": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"quantity": {
"type": "integer"
},
"price": {
"type": "number"
},
"vat19": {
"type": "number"
},
"vat7": {
"type": "number"
}
},
"required": ["name", "quantity", "price", "vat19", "vat7"]
}
},
"total": {
"type": "number"
},
"method": {
"type": "string",
"enum": ["credit card"]
},
"transactionDetails": {
"type": "object",
"properties": {
"amount": {
"type": "number"
},
"VUNumber": {
"type": "integer"
},
"transactionNumber": {
"type": "integer"
},
"date": {
"type": "string",
"format": "date"
},
"genNumber": {
"type": "string"
}
},
"required": ["amount", "VUNumber", "transactionNumber", "date", "genNumber"]
}
},
"required": ["items", "total", "method", "transactionDetails"]
},
"bookingDetails": {
"type": "object",
"properties": {
"bookingDate": {
"type": "string",
"format": "date-time"
},
"bookingAddress": {
"type": "string"
},
"taxNumber": {
"type": "string"
}
},
"required": ["bookingDate", "bookingAddress", "taxNumber"]
},
"journeyDetails": {
"type": "object",
"properties": {
"validFrom": {
"type": "string",
"format": "date"
},
"passengerName": {
"type": "string"
},
"orderNumber": {
"type": "string"
},
"stops": {
"type": "array",
"items": {
"type": "object",
"properties": {
"stop": {
"type": "string"
},
"date": {
"type": "string",
"format": "date"
},
"time": {
"type": "string",
"format": "time"
},
"track": {
"type": "integer"
},
"product": {
"type": "string"
},
"reservation": {
"type": "string"
}
},
"required": ["stop", "date", "time", "track", "product", "reservation"]
}
}
},
"required": ["validFrom", "passengerName", "orderNumber", "stops"]
},
"usageNotes": {
"type": "string"
}
},
"required": ["ticketType", "departureDate", "priceType", "class", "adult", "journey", "refundPolicy", "payment", "bookingDetails", "journeyDetails", "usageNotes"]
}
================================================
FILE: examples/simple_ETLs.py
================================================
#note, you need to install dlt, langchain, and duckdb
#pip install dlt
#pip install langchain
#pip install duckdb
#pip install python-dotenv
#pip install openai
#you also need a .env file with your openai api key
from langchain.chains.openai_functions import create_structured_output_chain
from langchain.chat_models import ChatOpenAI
from langchain.prompts import HumanMessagePromptTemplate, ChatPromptTemplate
from langchain.schema import SystemMessage, HumanMessage
import os
import dlt
from dotenv import load_dotenv
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
llm = ChatOpenAI(
temperature=0.0,
max_tokens=1200,
openai_api_key=OPENAI_API_KEY,
model_name="gpt-4-0613",
)
@dlt.resource(name='output', write_disposition='replace')
def ai_function():
# Here we define the user prompt and the structure of the output we desire
prompt = "I want to eat something very healthy and tasty."
json_schema = {
"title": "Recipe name",
"description": "Recipe description",
"type": "object",
"properties": {
"ingredients": {"title": "Ingredients", "description": "Detailed ingredients", "type": "string"},
"steps": {"title": "Cooking steps", "description": "Detailed cooking steps", "type": "string"}
},
"required": ["ingredients", "steps"],
}
prompt_msgs = [
SystemMessage(
content="You are a world class algorithm for creating recipes"
),
HumanMessage(content="Create a food recipe based on the following prompt:"),
HumanMessagePromptTemplate.from_template("{input}"),
HumanMessage(content="Tips: Make sure to answer in the correct format"),
]
prompt_ = ChatPromptTemplate(messages=prompt_msgs)
chain = create_structured_output_chain(json_schema, prompt=prompt_, llm=llm, verbose=True)
output = chain.run(input = prompt, llm=llm)
yield output
# Here we initialize DLT pipeline and export the data to duckdb
pipeline = dlt.pipeline(pipeline_name ="recipe", destination='duckdb', dataset_name='recipe_data')
info = pipeline.run(data =ai_function())
print(info)
================================================
FILE: extensions/__init__.py
================================================
================================================
FILE: extensions/argparseext.py
================================================
import os
import sys
import argparse
# Extract the env filenames in the -e flag only
# Ignore any other arguments
def parse_dotenv_extensions(argv):
env_argv = []
if "-e" in argv:
tmp_argv = argv[argv.index("-e") + 1 :]
parsed_args = []
for arg in tmp_argv:
if arg.startswith("-"):
break
parsed_args.append(arg)
env_argv = ["-e"] + parsed_args
parser = argparse.ArgumentParser()
parser.add_argument(
"-e",
"--env",
nargs="+",
help="""
filenames for additional env variables to load
""",
default=os.getenv("DOTENV_EXTENSIONS", "").split(" "),
)
return parser.parse_args(env_argv).env
def parse_arguments():
dotenv_extensions = parse_dotenv_extensions(sys.argv)
# Check if we need to load any additional env files
# This allows us to override the default .env file
# and update the default values for any command line arguments
if dotenv_extensions:
from extensions.dotenvext import load_dotenv_extensions
load_dotenv_extensions(parse_dotenv_extensions(sys.argv))
# Now parse the full command line arguments
parser = argparse.ArgumentParser(
add_help=False,
)
parser.add_argument(
"objective",
nargs="*",
metavar="<objective>",
help="""
main objective description. Doesn\'t need to be quoted.
if not specified, get objective from environment.
""",
default=[os.getenv("OBJECTIVE", "")],
)
parser.add_argument(
"-t",
"--task",
metavar="<initial task>",
help="""
initial task description. must be quoted.
if not specified, get initial_task from environment.
""",
default=os.getenv("INITIAL_TASK", os.getenv("FIRST_TASK", "")),
)
group = parser.add_mutually_exclusive_group()
group.add_argument(
"-4",
"--gpt-4",
dest="openai_api_model",
action="store_const",
const="gpt-4",
help="""
use GPT-4 instead of the default model.
""",
default=os.getenv("OPENAI_API_MODEL", "gpt-3.5-turbo"),
)
group.add_argument(
"-l",
"--llama",
dest="openai_api_model",
action="store_const",
const="llama",
help="""
use LLaMa instead of the default model. Requires llama.cpp.
""",
)
# This will parse -e again, which we want, because we need
# to load those in the main file later as well
parser.add_argument(
"-e",
"--env",
nargs="+",
help="""
filenames for additional env variables to load
""",
default=os.getenv("DOTENV_EXTENSIONS", "").split(" "),
)
parser.add_argument(
"-h",
"-?",
"--help",
action="help",
help="""
show this help message and exit
""",
)
args = parser.parse_args()
openai_api_model = args.openai_api_model
dotenv_extensions = args.env
objective = " ".join(args.objective).strip()
if not objective:
print(
"\033[91m\033[1m"
+ "No objective specified or found in environment.\n"
+ "\033[0m\033[0m"
)
parser.print_help()
parser.exit()
initial_task = args.task
if not initial_task:
print(
"\033[91m\033[1m"
+ "No initial task specified or found in environment.\n"
+ "\033[0m\033[0m"
)
parser.print_help()
parser.exit()
return objective, initial_task, openai_api_model, dotenv_extensions
================================================
FILE: extensions/dotenvext.py
================================================
from dotenv import load_dotenv
def load_dotenv_extensions(dotenv_files):
for dotenv_file in dotenv_files:
load_dotenv(dotenv_file)
================================================
FILE: fetch_secret.py
================================================
import os
from dotenv import load_dotenv
from api import start_api_server
# API_ENABLED = os.environ.get("API_ENABLED", "False").lower() == "true"
import boto3
environment = os.getenv("AWS_ENV", "dev")
def fetch_secret(secret_name, region_name, env_file_path):
session = boto3.session.Session()
client = session.client(service_name="secretsmanager", region_name=region_name)
try:
response = client.get_secret_value(SecretId=secret_name)
except Exception as e:
print(f"Error retrieving secret: {e}")
return None
if "SecretString" in response:
secret = response["SecretString"]
else:
secret = response["SecretBinary"]
with open(env_file_path, "w") as env_file:
env_file.write(secret)
if os.path.exists(env_file_path):
print(f"The .env file is located at: {os.path.abspath(env_file_path)}")
load_dotenv()
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY", "")
print("LEN OF PINECONE_API_KEY", len(PINECONE_API_KEY))
else:
print("The .env file was not found.")
return "Success in loading env files"
env_file = ".env"
if os.path.exists(env_file):
# Load default environment variables (.env)
load_dotenv()
print("Talk to the AI!")
else:
secrets = fetch_secret(
f"promethai-{environment}-backend-secretso-promethaijs-dotenv",
"eu-west-1",
".env",
)
if secrets:
print(secrets)
load_dotenv()
# Check if "dev" is present in the task ARN
if "dev" in environment:
# Fetch the secret
secrets = fetch_secret(
f"promethai-dev-backend-secretso-promethaijs-dotenv",
"eu-west-1",
".env",
)
load_dotenv()
elif "prd" in environment:
# Fetch the secret
secrets = fetch_secret(
f"promethai-prd-backend-secretso-promethaijs-dotenv",
"eu-west-1",
".env",
)
load_dotenv()
================================================
FILE: fixtures/choose_meal_tree_response.json
================================================
{
"prompt": "I’d like a quick veggie meal under $25 near me. No peanuts, I’m allergic.",
"tree": [
{
"category": "price",
"options": [
{
"category": "under $25",
"options": [
{
"category": "$10"
},
{
"category": "$15"
},
{
"category": "$20"
}
],
"preference": []
},
{
"category": "over $25",
"options": [
{
"category": "$30"
},
{
"category": "$35"
},
{
"category": "$40"
}
],
"preference": []
}
],
"preference": [
"under $25"
]
},
{
"category": "location",
"options": [
{
"category": "near me",
"options": [
{
"category": "1 mile"
},
{
"category": "5 miles"
},
{
"category": "10 miles"
}
],
"preference": []
},
{
"category": "far away",
"options": [
{
"category": "20 miles"
},
{
"category": "50 miles"
},
{
"category": "100 miles"
}
],
"preference": []
}
],
"preference": [
"near me"
]
},
{
"category": "diet",
"options": [
{
"category": "veggie",
"options": [
{
"category": "vegan"
},
{
"category": "vegetarian"
},
{
"category": "plant-based"
}
],
"preference": []
},
{
"category": "non-veggie",
"options": [
{
"category": "meat"
},
{
"category": "dairy"
},
{
"category": "seafood"
}
],
"preference": []
}
],
"preference": [
"veggie"
]
},
{
"category": "allergies",
"options": [
{
"category": "peanuts"
},
{
"category": "berries",
"options": [
{
"category": "cranberry"
},
{
"category": "blackberry"
},
{
"category": "blueberry"
}
]
}
],
"preference": [
"peanuts"
]
}
]
}
================================================
FILE: fixtures/goal_response.json
================================================
{
"goals": [
"Cuisine",
"Healthy",
"Budget",
"Taste"
]
}
================================================
FILE: fixtures/recipe_response.json
================================================
{
"recipes": [
{
"title": "Vegetable Stir Fry",
"rating": 90,
"prep_time": 15,
"cook_time": 20,
"description": "A delicious and healthy vegetable stir fry",
"ingredients": [
"1 onion",
"2 cloves garlic",
"1 red pepper",
"1 green pepper",
"1 cup mushrooms",
"1 cup broccoli",
"1 tablespoon olive oil",
"1 tablespoon soy sauce",
"1 teaspoon sesame oil",
"Salt and pepper to taste"
],
"instructions": [
"Chop the onion, garlic, red pepper, green pepper, mushrooms, and broccoli.",
"Heat the olive oil in a large skillet over medium-high heat.",
"Add the onion, garlic, red pepper, green pepper, mushrooms, and broccoli to the skillet and cook for 5 minutes, stirring occasionally.",
"Add the soy sauce, sesame oil, salt, and pepper and cook for an additional 5 minutes, stirring occasionally.",
"Serve hot."
]
},
{
"title": "Veggie Burger",
"rating": 95,
"prep_time": 10,
"cook_time": 15,
"description": "A delicious and healthy veggie burger",
"ingredients": [
"1/2 cup cooked quinoa",
"1/2 cup cooked black beans",
"1/4 cup diced onion",
"1/4 cup diced bell pepper",
"1/4 cup diced mushrooms",
"1/4 cup breadcrumbs",
"1 tablespoon olive oil",
"1 teaspoon garlic powder",
"1 teaspoon cumin",
"1 teaspoon chili powder",
"Salt and pepper to taste"
],
"instructions": [
"In a large bowl, combine the quinoa, black beans, onion, bell pepper, mushrooms, breadcrumbs, olive oil, garlic powder, cumin, chili powder, salt, and pepper.",
"Form the mixture into 4 patties.",
"Heat a large skillet over medium-high heat and add the patties.",
"Cook for 5 minutes on each side, or until golden brown and cooked through.",
"Serve hot."
]
}
]
}
================================================
FILE: fixtures/subgoal_response.json
================================================
{
"sub_goals": [
{
"goal_name": "Budget",
"sub_goals": [
{
"name": "Cheap",
"amount": 80
},
{
"name": "Moderate",
"amount": 20
},
{
"name": "Expensive",
"amount": 0
},
{
"name": "Splurge",
"amount": 0
}
]
},
{
"goal_name": "Cuisine",
"sub_goals": [
{
"name": "Italian",
"amount": 50
},
{
"name": "Mexican",
"amount": 30
},
{
"name": "Chinese",
"amount": 20
},
{
"name": "Indian",
"amount": 0
}
]
},
{
"goal_name": "Healthy",
"sub_goals": [
{
"name": "Low-Calorie",
"amount": 50
},
{
"name": "Low-Carb",
"amount": 30
},
{
"name": "Low-Fat",
"amount": 20
},
{
"name": "Vegetarian",
"amount": 0
}
]
},
{
"goal_name": "Taste",
"sub_goals": [
{
"name": "Spicy",
"amount": 50
},
{
"name": "Salty",
"amount": 30
},
{
"name": "Sweet",
"amount": 20
},
{
"name": "Sour",
"amount": 0
}
]
}
]
}
================================================
FILE: fixtures/update_meal_tree_response.json
================================================
"{'prompt': 'I would like a quick veggie meal under 25$ near me.', 'tree': [{'category': 'price', 'options': [{'category': 'under 25$', 'options': [{'category': 'under 10$'}, {'category': '10-15$'}, {'category': '15-20$'}, {'category': '20-25$'}], 'preference': []}, {'category': 'over 25$', 'options': [{'category': '25-30$'}, {'category': '30-35$'}, {'category': '35-40$'}, {'category': 'over 40$'}], 'preference': []}], 'preference': ['under 25$']}, {'category': 'location', 'options': [{'category': 'near me', 'options': [{'category': 'walking distance'}, {'category': 'driving distance'}, {'category': 'public transport'}], 'preference': []}, {'category': 'far away', 'options': [{'category': '1 hour away'}, {'category': '2 hours away'}, {'category': '3 hours away'}], 'preference': []}], 'preference': ['near me']}, {'category': 'type', 'options': [{'category': 'veggie', 'options': [{'category': 'vegan'}, {'category': 'vegetarian'}, {'category': 'plant-based'}], 'preference': []}, {'category': 'non-veggie', 'options': [{'category': 'meat'}, {'category': 'fish'}, {'category': 'dairy'}], 'preference': []}], 'preference': ['veggie']}, {'category': 'time', 'options': [{'category': 'quick', 'options': [{'category': '1 min'}, {'category': '10 mins'}, {'category': '30 mins'}], 'preference': []}, {'category': 'slow', 'options': [{'category': '60 mins'}, {'category': '120 mins'}, {'category': '180 mins'}], 'preference': []}], 'preference': ['quick']}]}"
================================================
FILE: food_scrapers/wolt_tool.py
================================================
from playwright.async_api import async_playwright, Playwright
async def find_and_click_by_attributes(page, attributes):
selector = "button"
for attr, value in attributes.items():
selector += f'[{attr}="{value}"]'
element = page.locator(selector)
await element.click()
async def enter_zipcode_and_press_enter(page, zipcode):
input_selector = 'input[data-test-id="FrontpageAddressQueryInput"]'
element = page.locator(input_selector)
await element.fill(zipcode)
await element.press("Enter")
async def run(playwright, zipcode: str, prompt: str):
browser = await playwright.chromium.launch(headless=True)
context = await browser.new_context()
page = await context.new_page()
# Navigate to wolt.com
await page.goto("https://wolt.com")
button_attributes = {
"aria-disabled": "false",
"role": "button",
"type": "button",
"data-localization-key": "gdpr-consents.banner.accept-button",
}
await find_and_click_by_attributes(page, button_attributes)
await enter_zipcode_and_press_enter(page, zipcode)
await page.wait_for_load_state("networkidle")
await page.press('input[data-test-id="FrontpageAddressQueryInput"]', "Enter")
await page.wait_for_load_state("networkidle")
await page.wait_for_selector('[data-test-id="VenuesOnlySearchInput"]')
await page.wait_for_load_state("networkidle")
search_input_selector = '[data-test-id="VenuesOnlySearchInput"]'
await page.wait_for_load_state("networkidle")
element = page.locator(search_input_selector)
await element.fill(prompt)
await page.press('input[data-test-id="VenuesOnlySearchInput"]', "Enter")
await page.wait_for_load_state("networkidle")
resulting_url = page.url
await browser.close()
return resulting_url
async def main(prompt: str, zipcode: str):
async with async_playwright() as playwright:
result = await run(playwright, zipcode=zipcode, prompt=prompt)
print(result)
return result
import asyncio
# asyncio.run(main(prompt="pizza", zipcode="10005"))
================================================
FILE: heuristic_experience_orchestrator/README.md
================================================
# Heuristic Orchestration Chain
The chain is meant to operate various agents that have a predetermined set of goals they can change based on their operation, and information gathered about the user and his experiences
## HOC
HOC or heuristic orchestration chain has as a goal to implement a goal given, by choosing one of the various methods available and then optimise the goal itself after the result is produced and assesed
This type of a system requires:
Based on Newell and Simon (1958) - Report on a general problem-solving program
1. Problem Identification
Methods:
Decomposition
Analogies
Root cause analysis
Goal clarification
Constraint identification
SWOT analysis
Expert consultation
Visualization
2. Problem Definition
3. Strategy selection
4. Information collection
5. Resource distribution
6. Process monitoring
7. Solution evaluation
`objective` (mandatory) - The overarching objective you want the task orchestration system to converge to
`first_task` (optional) - The prompt it gets for its "first task", which is usually some form of creating a task list. The default is "Make a todo list".
The `from_llm` method that constructs the chain takes in the following arguments that may be of interest:
`llm` - The LLM model you want to the chain to use. Note: Using a model like GPT-4 add up costs extremely quickly. Use with caution.
`vectorstore` - The vectorstore you want the chain to use
`max_iterations` - The maximum number of iterations, i.e. number of tasks that BabyAGI will output a result for and iterate on. If this number is not provided, the chain WILL run forever.
================================================
FILE: heuristic_experience_orchestrator/task_identification.py
================================================
from langchain import LLMChain, PromptTemplate
from langchain.llms import BaseLLM
class TaskIdentificationChain(LLMChain):
"""Chain to generate tasks."""
@classmethod
def from_llm(
cls, llm: BaseLLM, verbose: bool = True, value: str = None
) -> LLMChain:
"""Get the response parser."""
def get_template_by_value(self, value):
if value == "Decomposition":
template = """ Hey ChatGPT, I need your help in decomposing the following task into a series of manageable steps for the purpose of task identification based on
Newell and Simon paper. Return the result as a json with the result type 'Identification' and 'Value': 'Decomposition' : {task_description}"""
elif value == "Analogy":
template = """ Hey ChatGPT, I need your help in creating an analogy for the purpose of task identification based on
Newell and Simon paper. Return the result as a json with the result type 'Identification' and 'Value': 'Analogy' : {task_description}"""
elif value == "Template":
template = "Template B content"
elif value == "Templatetest":
template = "Template B content"
else:
template = " Return the tasks as an array."
return template
if value:
task_creation_template = get_template_by_value(value)
else:
task_creation_template = "Default template content"
prompt = PromptTemplate(
template=task_creation_template,
input_variables=["task_description"],
)
return cls(prompt=prompt, llm=llm, verbose=verbose)
================================================
FILE: initdb/init.sql
================================================
CREATE TABLE pinecone_id (
id SERIAL PRIMARY KEY,
user_id INTEGER NOT NULL
);
================================================
FILE: llm_chains/__init__.py
================================================
================================================
FILE: llm_chains/chains.py
================================================
from langchain.document_loaders import PyPDFLoader
import pinecone
from datetime import datetime, timedelta
from typing import List, Optional, Tuple, Dict
from langchain.agents import initialize_agent
from langchain.tools import tool
from langchain.vectorstores import Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
import openai
from pydantic import BaseModel, Field, parse_obj_as
import re
from jinja2 import Template
from dotenv import load_dotenv
from langchain import LLMChain
from langchain.schema import Document
from langchain.chains import SimpleSequentialChain
from langchain.chains.openai_functions import (
create_openai_fn_chain, create_structured_output_chain
)
from langchain.schema import HumanMessage, SystemMessage
import os
import fastjsonschema
import json
from langchain.tools import GooglePlacesTool
import tiktoken
import asyncio
import logging
from langchain.chat_models import ChatOpenAI
from langchain.agents.agent_toolkits import ZapierToolkit
from langchain.agents import AgentType
from langchain.utilities.zapier import ZapierNLAWrapper
from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate
from typing import Optional
# redis imports for cache
import langchain
from langchain.callbacks import get_openai_callback
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
from langchain.llms import Replicate
from redis import Redis
from langchain.cache import RedisCache
import os
logging.basicConfig(level=logging.INFO)
# from langchain import llm_cache
# langchain.llm_cache = RedisCache(redis_=Redis(host="redis", port=6379, db=0))
# logging.info("Using redis cache")
if os.getenv("AWS_ENV", "") == "dev":
REDIS_HOST = os.getenv(
"REDIS_HOST",
"promethai-dev-backend-redis-repl-gr.60qtmk.ng.0001.euw1.cache.amazonaws.com",
)
langchain.llm_cache = RedisCache(
redis_=Redis(host="promethai-dev-backend-redis-repl-gr.60qtmk.ng.0001.euw1.cache.amazonaws.com", port=6379,
db=0))
logging.info("Using redis cache for DEV")
elif os.getenv("AWS_ENV", "") == "prd":
REDIS_HOST = os.getenv(
"REDIS_HOST",
"promethai-prd-backend-redis-repl-gr.60qtmk.ng.0001.euw1.cache.amazonaws.com",
)
langchain.llm_cache = RedisCache(
redis_=Redis(host="promethai-prd-backend-redis-repl-gr.60qtmk.ng.0001.euw1.cache.amazonaws.com", port=6379,
db=0))
logging.info("Using redis cache for PRD")
else:
pass
class Agent:
load_dotenv()
OPENAI_MODEL = os.getenv("OPENAI_MODEL") or "gpt-4"
GPLACES_API_KEY = os.getenv("GPLACES_API_KEY", "")
ZAPIER_NLA_API_KEY = os.environ["ZAPIER_NLA_API_KEY"] = os.environ.get(
"ZAPIER_NLA_API_KEY", ""
)
OPENAI_TEMPERATURE = float(os.getenv("OPENAI_TEMPERATURE", 0.0))
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY", "")
PINECONE_API_ENV = os.getenv("PINECONE_API_ENV", "")
REPLICATE_API_TOKEN = os.getenv("REPLICATE_API_TOKEN", "")
def __init__(
self,
table_name=None,
user_id: Optional[str] = "676",
session_id: Optional[str] = None,
) -> None:
self.table_name = table_name
self.user_id = user_id
self.session_id = session_id
# self.memory = None
self.thought_id_timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f")[
:-3
] # Timestamp with millisecond precision
self.last_message = ""
self.openai_model35 = "gpt-3.5-turbo-16k-0613"
self.openai_model4 = "gpt-4-0613"
self.llm = ChatOpenAI(
temperature=0.0,
max_tokens=1500,
openai_api_key=self.OPENAI_API_KEY,
model_name=self.openai_model35,
cache=False,
)
self.llm35_fast = ChatOpenAI(
temperature=0.2,
max_tokens=550,
openai_api_key=self.OPENAI_API_KEY,
model_name=self.openai_model35,
cache=False,
)
self.llm_fast = ChatOpenAI(
temperature=0.0,
max_tokens=700,
openai_api_key=self.OPENAI_API_KEY,
model_name=self.openai_model4,
cache=False,
)
self.llm35 = ChatOpenAI(
temperature=0.0,
max_tokens=1200,
openai_api_key=self.OPENAI_API_KEY,
model_name=self.openai_model35,
cache=False,
)
# self.llm = ChatOpenAI(temperature=0.0,max_tokens = 1500, openai_api_key = self.OPENAI_API_KEY, model_name="gpt-4")
self.replicate_llm = Replicate(
model="replicate/vicuna-13b:a68b84083b703ab3d5fbf31b6e25f16be2988e4c3e21fe79c2ff1c18b99e61c1",
api_token=self.REPLICATE_API_TOKEN,
)
self.verbose: bool = True
self.openai_temperature = 0.0
self.index = "my-agent"
def clear_cache(self):
langchain.llm_cache.clear()
def set_user_session(self, user_id: str, session_id: str) -> None:
self.user_id = user_id
self.session_id = session_id
def get_ada_embedding(self, text):
text = text.replace("\n", " ")
return openai.Embedding.create(
input=[text], model="text-embedding-ada-002", api_key=OPENAI_API_KEY
)["data"][0]["embedding"]
def init_pinecone(self, index_name):
load_dotenv()
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY", "")
PINECONE_API_ENV = os.getenv("PINECONE_API_ENV", "")
pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_API_ENV)
return pinecone.Index(index_name)
def _simple_test(self):
# langchain.llm_cache = RedisCache(redis_=Redis(host='0.0.0.0', port=6379, db=0))
with get_openai_callback() as cb:
# langchain.llm_cache = RedisCache(redis_=Redis(host='0.0.0.0', port=6379, db=0))
prompt = """ How long does it take to go to the moon on foot """
prompt = PromptTemplate.from_template(prompt)
chain = LLMChain(llm=self.llm, prompt=prompt, verbose=self.verbose)
chain_result = chain.run(prompt=prompt, name=self.user_id).strip()
print(cb)
return chain_result
# create the length function
def tiktoken_len(self, text):
tokenizer = tiktoken.get_encoding("cl100k_base")
tokens = tokenizer.encode(text, disallowed_special=())
return len(tokens)
# class VectorDBInput(BaseModel):
# observation: str = Field(description="should be what we are inserting into the memory")
# namespace: str = Field(description="should be the namespace of the VectorDB")
# @tool("_update_memories", return_direct=True, args_schema = VectorDBInput)
# def insert_documents(self, documents, namespace):
# from datetime import datetime
#
# retriever = vectorstore.as_retriever()
# retriever.add_documents(
# [
# Document(
# page_content=observation,
# metadata={
# "inserted_at": datetime.now(),
# "text": observation,
# "user_id": self.user_id,
# },
# namespace=namespace,
# )
# ]
# )
def _update_memories(self, observation: str, namespace: str, page: str = "", source: str = "") -> None:
"""Update related characteristics, preferences or dislikes for a user."""
from langchain.text_splitter import RecursiveCharacterTextSplitter
self.init_pinecone(index_name=self.index)
vectorstore: Pinecone = Pinecone.from_existing_index(
index_name=self.index, embedding=OpenAIEmbeddings(), namespace=namespace
)
from datetime import datetime
retriever = vectorstore.as_retriever()
retriever.add_documents(
[
Document(
page_content=observation,
metadata={
"inserted_at": datetime.now(),
"text": observation,
"user_id": self.user_id,
"page": page,
"source": source,
},
namespace=namespace,
)
]
)
class FetchMemories(BaseModel):
observation: str = Field(
description="observation we want to fetch from vectordb"
)
def _fetch_memories(self, observation: str, namespace: str) -> dict[str, str] | str:
"""Fetch related characteristics, preferences or dislikes for a user."""
self.init_pinecone(index_name=self.index)
vectorstore: Pinecone = Pinecone.from_existing_index(
index_name=self.index, embedding=OpenAIEmbeddings(), namespace=namespace
)
retriever = vectorstore.as_retriever()
retriever.search_kwargs = {"filter": {"user_id": {"$eq": self.user_id}}}
answer_response = retriever.get_relevant_documents(observation)
answer_response.sort(
key=lambda doc: doc.metadata.get("inserted_at")
if "inserted_at" in doc.metadata
else datetime.min,
reverse=True,
)
try:
answer_response = answer_response[0]
except IndexError:
return {
"error": "No document found for this user. Make sure that a query is appropriate"
}
return answer_response.page_content
def _compute_agent_summary(self, model_speed: str):
"""Computes summary for a person"""
prompt = PromptTemplate.from_template(
"How would you summarize {name}'s core characteristics given the"
+ " following statements:\n"
+ "{relevant_preferences}"
+ "{relevant_dislikes}"
+ "Do not embellish."
+ "\n\nSummary: "
)
print("Computing Agent Summary")
self.init_pinecone(index_name=self.index)
# The agent seeks to think about their core characteristics.
relevant_preferences = self._fetch_memories(
f"Users core preferences", namespace="PREFERENCES"
)
relevant_dislikes = self._fetch_memories(
f"Users core dislikes", namespace="PREFERENCES"
)
print(relevant_dislikes)
print(relevant_preferences)
if model_speed == "fast":
output = self.replicate_llm(prompt)
return output
else:
chain = LLMChain(llm=self.llm, prompt=prompt, verbose=self.verbose)
chain_results = chain.run(
name=self.user_id,
relevant_preferences=relevant_preferences,
relevant_dislikes=relevant_dislikes,
).strip()
print(chain_results)
return chain_results
def update_agent_preferences(self, preferences: str):
"""Serves to update agents preferences so that they can be used in summary"""
prompt = """ The {name} has following {past_preference} and the new {preferences}
Update user preferences and return a list of preferences
Do not embellish.
Summary: """
self.init_pinecone(index_name=self.index)
past_preference = self._fetch_memories(
f"Users core preferences", namespace="PREFERENCE"
)
prompt = PromptTemplate(
input_variables=["name", "past_preference", "preferences"], template=prompt
)
# prompt = prompt.format(name=self.user_id, past_preference= past_preference, preferences=preferences)
chain = LLMChain(llm=self.llm, prompt=prompt, verbose=self.verbose)
chain_result = chain.run(
prompt=prompt,
past_preference=past_preference,
preferences=preferences,
name=self.user_id,
).strip()
print(chain_result)
return self._update_memories(chain_result, namespace="PREFERENCES")
def update_agent_taboos(self, dislikes: str):
"""Serves to update agents taboos so that they can be used in summary"""
prompt = """ The {name} has following {past_dislikes} and the new {dislikes}
Update user taboos and return a list of dislikes
Do not embellish.
Summary: """
self.init_pinecone(index_name=self.index)
past_dislikes = self._fetch_memories(
f"Users core dislikes", namespace="PREFERENCES"
)
prompt = PromptTemplate(
input_variables=["name", "past_dislikes", "dislikes"], template=prompt
)
# prompt = prompt.format(name=self.user_id, past_dislikes= past_dislikes, dislikes=dislikes)
chain = LLMChain(llm=self.llm, prompt=prompt, verbose=self.verbose)
chain_result = chain.run(
prompt=prompt,
name=self.user_id,
past_dislikes=past_dislikes,
dislikes=dislikes,
).strip()
return self._update_memories(chain_result, namespace="PREFERENCES")
def update_agent_traits(self, traits: str):
"""Serves to update agent traits so that they can be used in summary"""
prompt = """ The {name} has following {past_traits} and the new {traits}
Update user traits and return a list of traits
Do not embellish.
Summary: """
self.init_pinecone(index_name=self.index)
past_traits = self._fetch_memories(
f"Users core dislikes", namespace="PREFERENCES"
)
prompt = PromptTemplate(
input_variables=["name", "past_traits", "traits"], template=prompt
)
chain = LLMChain(llm=self.llm, prompt=prompt, verbose=self.verbose)
chain_result = chain.run(
prompt=prompt, past_traits=past_traits, traits=traits, name=self.user_id
).strip()
return self._update_memories(chain_result, namespace="PREFERENCES")
def update_agent_summary(self, model_speed):
"""Serves to update agent traits so that they can be used in summary"""
summary = self._compute_agent_summary(model_speed=model_speed)
return self._update_memories(summary, namespace="SUMMARY")
def prompt_correction(self, prompt_source: str, model_speed: str):
"""Makes the prompt gramatically correct"""
prompt = """ Gramatically and logically correct sentence: {{prompt_source}} . Return only the corrected sentence, no abbreviations, using same words if it is logical. Do not mention explicitly rules given in prompt. """
template = Template(prompt)
output = template.render(prompt_source=prompt_source)
complete_query = PromptTemplate.from_template(output)
chain = LLMChain(
llm=self.llm, prompt=complete_query, verbose=self.verbose
)
chain_result = chain.run(prompt=complete_query, name=self.user_id).strip()
json_data = json.dumps(chain_result)
return json_data
async def solution_generation(self, prompt: str, prompt_template: str = None, json_example: str = None,
model_speed: str = None):
"""Generates a recipe solution in json"""
if prompt_template is None:
prompt_base = """ Create a food recipe based on the following prompt: '{{prompt}}'. Instructions and ingredients should have medium detail.
Answer a condensed valid JSON in this format: {{ json_example}} Do not explain or write anything else."""
else:
prompt_base = prompt_template
if json_example is None:
json_example = """{"recipes":[{"title":"value","rating":"value","prep_time":"value","cook_time":"value","description":"value","ingredients":["value"],"instructions":["value"]}]}"""
else:
json_example = json_example
# json_example = str(json_example).replace("{", "{{").replace("}", "}}")
# template = Template(prompt_base)
# output = template.render(prompt=prompt
# , json_example=json_example)
# complete_query = output
# complete_query = PromptTemplate.from_template(complete_query)
# Define the response schema
class Recipe(BaseModel):
"""Schema for an individual recipe."""
title: str = Field(..., description="Title of the recipe")
rating: str = Field(None, description="Recipe rating")
prep_time: str = Field(None, description="Time to prepare recipe")
cook_time: str = Field(None, description="Time to cook recipe")
description: str = Field(None, description="Description of recipe")
ingredients: List[str] = Field(None, description="All recipe ingredients")
instructions: List[str] = Field(None, description="All recipe instructions for making a recipe")
class RecordRecipe(BaseModel):
"""Schema for the record containing a list of recipes."""
recipes: List[Recipe] = Field(..., description="List of recipes")
prompt_msgs = [
SystemMessage(
content="You are a world class algorithm for creating recipes"
),
HumanMessage(content="Create a food recipe based on the following prompt:"),
HumanMessagePromptTemplate.from_template("{input}"),
HumanMessage(content="Tips: Make sure to answer in the correct format"),
]
prompt_ = ChatPromptTemplate(messages=prompt_msgs)
chain = create_structured_output_chain(RecordRecipe, self.llm35, prompt_, verbose=True)
from langchain.callbacks import get_openai_callback
with get_openai_callback() as cb:
output = await chain.arun(input=prompt)
print(cb)
# output = json.dumps(output)
my_object = parse_obj_as(RecordRecipe, output)
return my_object.dict()
async def solution_name_generation(self, prompt: str, prompt_template: str = None, json_example: str = None,
model_speed: str = None):
"""Generates a single recipe solution and returns the recipe title as a string."""
prompt_ = """Create a food recipe based on the following prompt: {{prompt}} Return just a concise recipe title. Do not explain or write anything else."""
template = Template(prompt_)
output = template.render(prompt=prompt)
complete_query = PromptTemplate.from_template(output)
chain = LLMChain(
llm=self.llm35_fast, prompt=complete_query, verbose=self.verbose
)
chain_result = await chain.arun(prompt=complete_query, name=self.user_id)
logging.info("Here is the chain result ",chain_result)
# json_data = json.dumps(chain_result)
return str(chain_result)
# if model_speed == "fast":
# output = self.replicate_llm(output)
# return output
# else:
# chain = LLMChain(
# llm=self.llm, prompt=complete_query, verbose=self.verbose
# )
# chain_result = chain.run(prompt=complete_query, name=self.user_id).strip()
# #
# # vectorstore: Pinecone = Pinecone.from_existing_index(
# # index_name=self.index,
# # embedding=OpenAIEmbeddings(),
# # namespace='RESULT'
# # )
# # from datetime import datetime
# # retriever = vectorstore.as_retriever()
# # retriever.add_documents([Document(page_content=chain_result,
# # metadata={'inserted_at': datetime.now(), "text": chain_result,
# # 'user_id': self.user_id}, namespace="RESULT")])
# logging.info("HERE IS THE CHAIN RESULT", chain_result)
# return chain_result
def extract_json(self, data):
json_start = data.find("{")
json_end = data.rfind("}") + 1
json_data = data[json_start:json_end]
try:
return json.loads(json_data) # if successful, return Python dict
except json.JSONDecodeError:
return None # if unsuccessful, return None
async def async_generate(
self,
prompt_template_base,
base_category,
base_value,
list_of_items,
assistant_category,
):
"""Generates an individual solution choice"""
json_example = """ {"category":"time","options":[{"category":"quick","options":[{"category":"1 min"},{"category":"10 mins"},{"category":"30 mins"}]},{"category":"slow","options":[{"category":"60 mins"},{"category":"120 mins"},{"category":"180 mins"}]}]}"""
#
list_of_items = [
item for item in list_of_items if item != [base_category, base_value]
]
# logging.info("list of items", list_of_items)
# try:
# list_as_string = str(list_of_items[0]).strip("[]")
# except:
# list_as_string = str(list_of_items)
# # agent_summary = agent_summary.split('.', 1)[0]
json_example = json_example.replace("{", "{{").replace("}", "}}")
# template = Template(prompt_template_base)
# output = template.render(
# base_category=base_category,
# base_value=base_value,
# json_example=json_example,
# assistant_category=assistant_category,
# exclusion_categories=list_as_string,
# )
# complete_query = PromptTemplate.from_template(output)
#
# chain = LLMChain(llm=self.llm_fast, prompt=complete_query, verbose=self.verbose)
# chain_result = await chain.arun(prompt=complete_query, name=self.user_id)
# json_o = json.loads(chain_result)
# value_list = [{"category": value} for value in base_value.split(",")]
# # json_o["options"].append({"category": "Your preferences", "options": value_list})
# chain_result = json.dumps(json_o)
# print("FINAL CHAIN", chain_result)
# return chain_result
list_of_items = str(list_of_items)
#
#
class FoodOption(BaseModel):
category: str = Field(...,
description="Specific food option category, e.g., 'Italian', 'Gluten-free', 'Outdoor seating'")
class CategoryOption(BaseModel):
category: str = Field(...,
description="Main category, e.g., 'Cuisine', 'Dietary Restrictions', 'Atmosphere'")
options: List[FoodOption] = Field([], description="List of possible options for this main category")
class Response(BaseModel):
category: str = Field(..., description="Main classification, e.g., 'Location', 'Price Range'")
options: List[CategoryOption] = Field([], description="An array of category option objects.")
#
system_message = f"You are a world class algorithm for decomposing human " \
f"thoughts into decision trees on {assistant_category}. "
guidance_query = f"The request:"
prompt_msgs = [
SystemMessage(
content=system_message
),
HumanMessage(content=guidance_query),
HumanMessagePromptTemplate.from_template("{input}"),
HumanMessage(content=f"Tips: Make sure to answer in the correct format"),
HumanMessage(content=f"Tips: Must include the following as a category: {base_value} and exclude {list_of_items}"),
HumanMessage(content=f" Tips: Look at this json as example: {json_example}"),
HumanMessage(content=f"Tips: Escape possesive apostrophes with a backslash, e.g., 'John\\'s' "),
# HumanMessage(content=f"Tips: Exclude the following categories: {list_of_items}"),
]
prompt_ = ChatPromptTemplate(messages=prompt_msgs)
chain = create_structured_output_chain(Response, self.llm35, prompt_, verbose=True)
output = await chain.arun(input=f"""Decompose decision point '{ base_category }' into three categories with the same or lower granularity and must include '{base_value}'.
Provide three sub-categories that specify the decision point better.""" )
# from pydantic import BaseModel, parse_raw
# Convert the dictionary to a Pydantic object
my_object = parse_obj_as(Response, output)
data = my_object.dict()
return str(data).replace("'", '"')
async def generate_concurrently(self, base_prompt, assistant_category,load_defaults=True):
"""Generates an async solution group"""
list_of_items = [item.split("=") for item in base_prompt.split(";")]
prompt_template_base = """ Decompose decision point '{{ base_category }}' into three categories the same level as value '{{base_value}}' definitely including '{{base_value}} ' but not including {{exclusion_categories}}. Make sure choices further specify the '{{ base_category }}' category where AI is helping person in choosing {{ assistant_category }}.
Provide three sub-options that further specify the particular category better. Generate very short json, do not write anything besides json, follow this json property structure : {{json_example}}"""
list_of_items = base_prompt.split(";")
# If there is no ';', split on '=' instead
if len(list_of_items) == 1:
list_of_items = [list_of_items[0].split("=")]
else:
list_of_items = [item.split("=") for item in list_of_items]
# Remove value
print("LIST OF ITEMS", list_of_items)
logging.info("LIST OF ITEMS", str(list_of_items))
tasks = [
self.async_generate(
prompt_template_base,
base_category,
base_value,
list_of_items,
assistant_category,
)
for base_category, base_value in list_of_items
]
results = await asyncio.gather(*tasks)
def replace_underscores(data):
if isinstance(data, dict):
for key, value in data.items():
if key == "category" and isinstance(value, str):
data[key] = value.replace("_", " ")
else:
replace_underscores(value)
elif isinstance(data, list):
for item in data:
replace_underscores(item)
if len(results) == 1:
logging.info("HERE ARE THE valid RESULTS %s", str(results))
results_list = [json.loads(results[0])]
else:
logging.info("HERE ARE THE valid RESULTS %s", len(results))
print("HERE ARE THE valid RESULTS %s", len(results))
# Parse each JSON string and add it to a list
results = [
result[result.find("{"): result.rfind("}") + 1] for result in results
]
results_list = [json.loads(result) for result in results]
replace_underscores(results_list)
combined_json = {"results": results_list}
def load_schema(filepath):
with open(filepath, 'r') as f:
return json.load(f)
try:
schema_path = os.path.join(os.path.dirname(__file__), '..', 'validations', 'schema',
'decompose_categories.json')
primary_schema = load_schema(schema_path)
validate = fastjsonschema.compile(primary_schema)
logging.info("HERE SOME RESULTS %s", str({"response":combined_json}))
validate({"response":combined_json})
return combined_json
except fastjsonschema.exceptions.JsonSchemaException as e:
logging.info("HERE ARE THE ERRORS %s", str(e))
schema_path = os.path.join(os.path.dirname(__file__), '..', 'validations', 'defaults',
'categories_defaults.json')
combined_json = load_schema(schema_path)
return combined_json
def _loader(self, path: str, namespace: str):
loader = PyPDFLoader("../document_store/nutrition/Human_Nutrition.pdf")
pages = loader.load_and_split()
print("PAGES", pages[0])
for page in pages:
self._update_memories(page.page_content, namespace, page.metadata["page"], page.metadata["source"])
return "Success"
# print(type(pages))
def _process_pref(self, data):
for result in data["response"]["results"]:
# Check if preference is empty and options exist
if not result["preference"] and result["options"]:
# Get the second nested category value
second_category = result["options"][0]["category"]
# Assign it to the preference
result["preference"] = [second_category]
# Assuming data is a dictionary with the structure you described
def remove_second_subnested_category(categories):
for category_data in categories:
if "options" in category_data:
# If there are options in the current category, check if there is a second subnested category and remove it if it exists
try:
category_data["options"].pop(1)
except IndexError:
pass
# Recursively iterate over all subcategories, if any
remove_second_subnested_category(category_data["options"])
remove_second_subnested_category(data["response"]["results"])
print("UPDATED OUTPUT", data)
return data
# Assuming you have the JSON data in the "data" variable
def prompt_to_choose_tree(self, prompt: str, model_speed: str, assistant_category: str, load_defaults: bool = True):
"""Serves to generate agent goals and subgoals based on a prompt"""
def load_schema(filepath):
with open(filepath, 'r') as f:
return json.load(f)
if load_defaults:
schema_path = os.path.join(os.path.dirname(__file__), '..', 'validations', 'defaults',
'categories_input_defaults.json')
combined_json = json.dumps(load_schema(schema_path))
return combined_json
else:
json_example = """ <category1>=<decision1>;<category2>=<decision2>..."""
prompt_template = """
Decompose {{ prompt_str }} statement into decision tree that take into account user summary information and related to {{ assistant_category }}. There should be three categories and one decision for each.
Categories should be logical and user friendly. Do not include budget, meal type, intake, personality, user summary, personal preferences.
Decision should be one user can make in regards to {{ assistant_category }}. Present answer in one line and in property structure : {{json_example}}"""
bb = """Do not include budget, meal type, intake, personality, user summary, personal preferences, or update time to categories. """
# self.init_pinecone(index_name=self.index)
# try:
# agent_summary = self._fetch_memories(
# f"Users core summary", namespace="SUMMARY"
# )
# print("HERE IS THE AGENT SUMMARY", agent_summary)
# agent_summary = str(agent_summary)
#
# if (
# str(agent_summary)
# == "{'error': 'No document found for this user. Make sure that a query is appropriate'}"
# ):
# agent_summary = "None."
# except:
# agent_summary = "None."
#
# import time
# start_time = time.time()
# agent_summary = agent_summary.split(".", 1)[0]
template = Template(prompt_template)
output = template.render(
prompt_str=prompt,
json_example=json_example,
# user_summary=agent_summary,
assistant_category=assistant_category,
# nutritional_context=test_output['answer']
)
complete_query = output
print("HERE IS THE COMPLETE QUERY", complete_query)
complete_query = PromptTemplate.from_template(complete_query)
chain = LLMChain(llm=self.llm_fast, prompt=complete_query, verbose=False)
chain_result = chain.run(prompt=complete_query, name=self.user_id).strip()
import re
def add_space_to_camel_case(s):
# Check if the string contains any uppercase letters
if any(c.isupper() for c in s[1:]): # We exclude the first character from the check
s = re.sub(r'([a-z])([A-Z])', r'\1 \2', s)
# Convert each word to title case
return ' '.join([word.capitalize() for word in s.split()])
chain_result= add_space_to_camel_case(chain_result)
class Option(BaseModel):
category: str = Field(..., description=" Each should have a 'category' (a specific choice like 'Under $25' or 'Red')")
options: Optional[List] = Field([], description="Empty list")
class Result(BaseModel):
category: str = Field(None, description=" Specify the main classification (e.g., Price Range, Color, Size) in the 'category' field.")
options: List[Option] = Field(None, description="An array of option objects.")
preference: Optional[List] = Field([], description="Value of the first category")
class Response(BaseModel):
results: List[Result] = Field(None, description="List of the results of the decision tree")
class Main(BaseModel):
response: Response = Field(None, description="Complete decision tree response")
system_message = f"You are a world class algorithm applying raw output to a schema " \
# f" into decision trees on {assistant_category}. "
# guidance_query = f"Decompose sentences into decision trees on {assistant_category}. " \
# f"Decompose the following statement:"
guidance_query = f"Apply output and change it to a schema"
prompt_msgs = [
SystemMessage(
content=system_message
),
HumanMessage(content=guidance_query),
HumanMessagePromptTemplate.from_template("{input}"),
HumanMessage(content=f"Tips: Make sure to answer in the correct format"),
HumanMessage(content=f"Tips: Make sure lowest level options are an empty list "),
HumanMessage(content=f"Tips: Make sure results have multiple categories on the same level ")
]
prompt_ = ChatPromptTemplate(messages=prompt_msgs)
chain = create_structured_output_chain(Main, self.llm35, prompt_, verbose=True)
output = chain.run(input=chain_result)
# from pydantic import BaseModel, parse_raw
# Convert the dictionary to a Pydantic object
my_object = parse_obj_as(Main, output)
data = my_object.dict()
logging.info("HERE IS THE inter RESULT", str(data).replace("'", '"'))
print("HERE IS THE DICT", data)
data_pr = self._process_pref(data)
logging.info("HERE IS THE FINAL RESULT", str(data_pr).replace("'", '"'))
combined_json = data_pr
# combined_json = str(data_pr).replace("'", '"')
try:
schema_path = os.path.join(os.path.dirname(__file__), '..', 'validations', 'schema',
'decompose_categories_input.json')
primary_schema = load_schema(schema_path)
validate = fastjsonschema.compile(primary_schema)
logging.info("HERE SOME RESULTS %s", str(combined_json))
try:
validate(combined_json)
except:
validate(json.loads(combined_json))
return str(combined_json).replace("'", '"')
except:
# logging.info("HERE ARE THE ERRORS %s", str(e))
schema_path = os.path.join(os.path.dirname(__file__), '..', 'validations', 'defaults',
'categories_input_defaults.json')
combined_json = json.dumps(load_schema(schema_path))
return combined_json
# def prompt_to_choose_tree(self, prompt: str, model_speed: str, assistant_category: str):
# """Serves to generate agent goals and subgoals based on a prompt"""
#
# self.init_pinecone(index_name=self.index)
# vectorstore: Pinecone = Pinecone.from_existing_index(
# index_name=self.index, embedding=OpenAIEmbeddings(), namespace="NUTRITION_RESOURCE"
# )
# retriever = vectorstore.as_retriever()
# #
# # template = """
# # {summaries}
# # {question}
# # """
# #
# # chain = RetrievalQAWithSourcesChain.from_chain_type(
# # llm=OpenAI(temperature=0),
# # chain_type="stuff",
# # retriever=retriever,
# # chain_type_kwargs={
# # "prompt": PromptTemplate(
# # template=template,
# # input_variables=["summaries", "question"],
# # ),
# # },
# # )
# # test_output = chain(
# # "Retireve and summarize releavant information from the following document. Turn it into into decision tree that take into account user summary information and related to food. Present answer in one line summary")
# # print("TEST OUTPUT", test_output['answer'])
#
# # prompt_template = """Retireve and summarize releavant information from the following document
# #
# #
# # {text}
# #
# #
# # Turn it into into decision tree that take into account user summary information and related to {{ assistant_category }}.
# # Do not include budget, personality, user summary, personal preferences, or update time to categories. Do not include information about publisher or details. """
# # prompt_template = Template(prompt_template)
# #
# # prompt_template = prompt_template.render(
# # original_prompt=prompt,
# # assistant_category=assistant_category)
# # PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
# # chain_summary = load_summarize_chain(OpenAI(temperature=0), chain_type="map_reduce", return_intermediate_steps=True,
# # map_prompt=PROMPT, combine_prompt=PROMPT)
# # test_output = chain_summary({"input_documents": pages[1:20]}, return_only_outputs=True)
# #
# # print("TEST OUTPUT", test_output)
#
# json_example = """ <category1>=<decision1>;<category2>=<decision2>..."""
# prompt_template = """Known user summary: '{{ user_summary }} '.
# Decompose {{ prompt_str }} statement into decision tree that take into account user summary information and related to {{ assistant_category }}.
# Do not include budget, meal type, intake, personality, user summary, personal preferences, or update time to categories. Use the information to correct any major mistakes: {{nutritional_context}}
# Decision should be one user can make. Present answer in one line and in property structure : {{json_example}}"""
#
# self.init_pinecone(index_name=self.index)
# try:
# agent_summary = self._fetch_memories(
# f"Users core summary", namespace="SUMMARY"
# )
# print("HERE IS THE AGENT SUMMARY", agent_summary)
# agent_summary = str(agent_summary)
#
# if (
# str(agent_summary)
# == "{'error': 'No document found for this user. Make sure that a query is appropriate'}"
# ):
# agent_summary = "None."
# except:
# agent_summary = "None."
#
# import time
# start_time = time.time()
#
# agent_summary = agent_summary.split(".", 1)[0]
# template = Template(prompt_template)
# output = template.render(
# prompt_str=prompt,
# json_example=json_example,
# user_summary=agent_summary,
# assistant_category=assistant_category,
# # nutritional_context=test_output['answer']
# )
# complete_query = output
# print("HERE IS THE COMPLETE QUERY", complete_query)
# complete_query = PromptTemplate.from_template(complete_query)
# if model_speed == "fast":
# output = self.replicate_llm(output)
# json_data = json.dumps(output)
# return json_data
# else:
# chain = LLMChain(llm=self.llm_fast, prompt=complete_query, verbose=False)
# chain_result = chain.run(prompt=complete_query, name=self.user_id).strip()
# vectorstore: Pinecone = Pinecone.from_existing_index(
# index_name=self.index,
# embedding=OpenAIEmbeddings(),
# namespace="GOAL",
# )
# from datetime import datetime
# retriever = vectorstore.as_retriever()
# logging.info(str(chain_result))
# print("HERE IS THE CHAIN RESULT", chain_result)
# retriever.add_documents(
# [
# Document(
# page_content=chain_result,
# metadata={
# "inserted_at": datetime.now(),
# "text": chain_result,
# "user_id": self.user_id,
# },
# namespace="GOAL",
# )
# ]
# )
# return chain_result.replace("'", '"')
async def prompt_decompose_to_tree_categories(
self, prompt: str, assistant_category, model_speed: str, load_defaults: bool=True
):
"""Serves to generate agent goals and subgoals based on a prompt"""
def load_schema(filepath):
with open(filepath, 'r') as f:
return json.load(f)
if load_defaults:
schema_path = os.path.join(os.path.dirname(__file__), '..', 'validations', 'defaults',
'categories_defaults.json')
combined_json = load_schema(schema_path)
return combined_json
else:
combined_json = await self.generate_concurrently(prompt, assistant_category, load_defaults=load_defaults)
return combined_json
# async for result in self.generate_concurrently(prompt):
# yield result
def prompt_to_update_meal_tree(
self, category: str, from_: str, to_: str, model_speed: str
):
self.init_pinecone(index_name=self.index)
vectorstore: Pinecone = Pinecone.from_existing_index(
index_name=self.index, embedding=OpenAIEmbeddings(), namespace="GOAL"
)
retriever = vectorstore.as_retriever()
retriever.search_kwargs = {
"filter": {"user_id": {"$eq": self.user_id}}
} # filter by user_id
answer_response = retriever.get_relevant_documents("prompt")
answer_response.sort(
key=lambda doc: doc.metadata.get("inserted_at")
if "inserted_at" in doc.metadata
else datetime.min,
reverse=True,
)
# The most recent document is now the first element of the list.
try:
most_recent_document = answer_response[0]
except IndexError:
return {
"error": "No document found for this user. Make sure that a query is appropriate"
}
doc = most_recent_document.page_content
json_str = doc.replace("'", '"')
document = json.loads(json_str)
matching_items = [
item for item in document["tree"] if item["category"] == category
]
sub_tree = matching_items[0] if matching_items else None
sub_tree = json.dumps(sub_tree)
escaped_content = sub_tree.replace("{", "{{").replace("}", "}}")
logging.info(escaped_content)
optimization_prompt = """Change the category: {{category}} based on {{from_}} to {{to_}} change and update appropriate of the following original inluding the preference: {{results}}
"""
optimization_prompt = Template(optimization_prompt)
optimization_output = optimization_prompt.render(
category=category, from_=from_, to_=to_, results=escaped_content
)
complete_query = PromptTemplate.from_template(optimization_output)
# prompt_template = PromptTemplate(input_variables=["query"], template=optimization_output)
review_chain = LLMChain(llm=self.llm, prompt=complete_query)
review_chain_result = review_chain.run(
prompt=complete_query, name=self.user_id
).strip()
return review_chain_result.replace("'", '"')
def extract_info(self, s: str):
lines = s.split("\n")
name = lines[0]
address = lines[1].replace("Address: ", "")
phone = lines[2].replace("Phone: ", "")
website = lines[3].replace("Website: ", "")
return {
"name": name,
"address": address,
"phone": phone,
"website": website,
}
async def restaurant_generation(self, prompt: str, prompt_template: str, json_example: str, model_speed: str):
"""Serves to suggest a restaurant to the agent"""
if prompt:
prompt = prompt
else:
prompt = """
Based on the following prompt {{prompt}} and all the history and information of this user,
Determine the type of restaurant you should offer to a customer. Make the recomendation very short and to a point, as if it is something you would type on google maps
"""
self.init_pinecone(index_name=self.index)
agent_summary = self._fetch_memories(f"Users core summary", namespace="SUMMARY")
template = Template(prompt)
output = template.render(prompt=prompt)
complete_query = str(agent_summary) + output
complete_query = PromptTemplate.from_template(complete_query)
chain = LLMChain(llm=self.llm, prompt=complete_query, verbose=self.verbose)
chain_result = chain.run(prompt=complete_query).strip()
GPLACES_API_KEY = self.GPLACES_API_KEY
places = GooglePlacesTool()
output = places.run(chain_result)
restaurants = re.split(r"\d+\.", output)[1:3]
# Create a list of dictionaries for each restaurant
restaurant_list = [self.extract_info(r) for r in restaurants]
print("HERE IS THE OUTPUT", restaurant_list)
return restaurant_list
# async def run_wolt_tool(self, zipcode, chain_result):
# from food_scrapers import wolt_tool
# return wolt_tool.main(zipcode, chain_result)
async def delivery_generation(self, prompt: str, zipcode: str, model_speed: str):
"""Serves to optimize agent delivery recommendations"""
prompt = """
Based on the following prompt {{prompt}}
Determine the type of food you would want to recommend to the user, that is commonly ordered online. It should of type of food offered on a delivery app similar to burger or pizza, but it doesn't have to be that.
The response should be very short
"""
self.init_pinecone(index_name=self.index)
agent_summary = self._fetch_memories(f"Users core summary", namespace="SUMMARY")
template = Template(prompt)
output = template.render(prompt=prompt)
complete_query = str(agent_summary) + output
complete_query = PromptTemplate.from_template(complete_query)
chain = LLMChain(llm=self.llm, prompt=complete_query, verbose=self.verbose)
chain_result = chain.run(prompt=complete_query).strip()
from food_scrapers import wolt_tool
output = await wolt_tool.main(zipcode=zipcode, prompt=chain_result)
return output
def add_zapier_calendar_action(self, prompt_base, token, model_speed: str):
"""Serves to add a calendar action to the user's Google Calendar account"""
# try:
ZAPIER_NLA_OAUTH_ACCESS_TOKEN = token
zapier = ZapierNLAWrapper(
zapier_nla_oauth_access_token=ZAPIER_NLA_OAUTH_ACCESS_TOKEN
)
toolkit = ZapierToolkit.from_zapier_nla_wrapper(zapier)
agent = initialize_agent(
toolkit.get_tools(),
self.llm_fast,
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
verbose=True,
)
# except:
# zapier = ZapierNLAWrapper()
# toolkit = ZapierToolkit.from_zapier_nla_wrapper(zapier)
# agent = initialize_agent(toolkit.get_tools(), self.llm_fast, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
# verbose=True)
template = """ Formulate the following statement into a calendar request containing time, title, details of the meeting: {prompt} """
prompt_template = PromptTemplate(input_variables=["prompt"], template=template)
# complete_query = PromptTemplate.from_template(output)
chain = LLMChain(llm=self.llm, prompt=prompt_template, verbose=self.verbose)
overall_chain = SimpleSequentialChain(chains=[chain, agent], verbose=True)
outcome = overall_chain.run(prompt_base)
print("HERE IS THE OUTCOME", outcome)
return outcome
def voice_text_input(self, query: str, model_speed: str):
"""Serves to generate sub goals for the user and or update the user's preferences"""
class GoalWrapper(BaseModel):
observation: str = Field(
description="observation we want to fetch from vectordb"
)
@tool("goal_update_wrapper", args_schema=GoalWrapper, return_direct=True)
def goal_update_wrapper(observation, args_schema=GoalWrapper):
"""Fetches data from the VectorDB and returns it as a python dictionary."""
query = self._fetch_memories(observation, "GOAL")
loop = asyncio.get_event_loop()
res = loop.run_until_complete(
self.prompt_decompose_to_meal_tree_categories(query, "slow")
)
loop.close()
return res
class UpdatePreferences(BaseModel):
observation: str = Field(
description="observation we want to fetch from vectordb"
)
@tool("preferences_wrapper", args_schema=UpdatePreferences, return_direct=True)
def preferences_wrapper(observation, args_schema=UpdatePreferences):
"""Updates user preferences in the VectorDB."""
return self._update_memories(observation, "PREFERENCES")
agent = initialize_agent(
llm=self.llm_fast,
tools=[goal_update_wrapper, preferences_wrapper],
agent=AgentType.OPENAI_FUNCTIONS,
verbose=self.verbose,
)
prompt = """
Based on all the history and information of this user, classify the following query: {query} into one of the following categories:
1. Goal update , 2. Preference change, 3. Result change 4. Subgoal update If the query is not any of these, then classify it as 'Other'
Return the classification and a very short summary of the query as a python dictionary. Update or replace or remove the original factors with the new factors if it is specified.
with following python dictionary format 'Result_type': 'Goal', "Result_action": "Goal changed", "value": "Diet added", "summary": "The user is updating their goal to lose weight"
Make sure to include the factors in the summary if they are provided
"""
template = Template(prompt)
output = template.render(query=query)
complete_query = output
complete_query = PromptTemplate(
input_variables=["query"], template=complete_query
)
summary_chain = LLMChain(
llm=self.llm, prompt=complete_query, verbose=self.verbose
)
from langchain.chains import SimpleSequentialChain
overall_chain = SimpleSequentialChain(
chains=[summary_chain, agent], verbose=True
)
output = overall_chain.run(query)
return output
def fetch_user_summary(self, model_speed: str):
"""Serves to retrieve agent summary"""
self.init_pinecone(index_name=self.index)
agent_summary = self._fetch_memories(f"Users core summary", namespace="SUMMARY")
return agent_summary
def _retrieve_summary(self):
"""Serves to retrieve agent summary"""
self.init_pinecone(index_name=self.index)
result = self._fetch_memories("Users core prompt", "GOAL")
print(result)
return result
if __name__ == "__main__":
agent = Agent()
# agent.prompt_correction(prompt_source="I would like a quicko veggiea meals under 25 near me and", model_speed="slow")
# agent.goal_optimization(factors={}, model_speed="slow")
# agent._update_memories("lazy, stupid and hungry", "TRAITS")
# agent.update_agent_traits("His personality is greedy")
# agent.update_agent_preferences("Alergic to corn")
# agent.add_zapier_calendar_action("I would like to schedule 1 hour meeting tomorrow at 12 about brocolli", 'bla', 'BLA')
# agent.update_agent_summary(model_speed="slow")
# agent.solution_generation(prompt="I would like a healthy chicken meal over 125$", model_speed="slow")
# loop = asyncio.get_event_loop()
# loop.run_until_complete(agent.prompt_decompose_to_meal_tree_categories("diet=vegan;availability=cheap", "food", model_speed="slow"))
# loop.close()
# import asyncio
#
#
# async def main():
# out = await agent.prompt_to_choose_tree(prompt="I want would like a quick veggie meal Vietnamese cuisine",
# assistant_category="food", model_speed="slow")
# # Rest of your code here
#
#
# # Run the async function
# asyncio.run(main())
# print(result)
# agent._test()
# agent.update_agent_summary(model_speed="slow")
# agent.voice_text_input("Core prompt ", model_speed="slow")
================================================
FILE: pyproject.toml
================================================
[tool.poetry]
name = "promethAI"
version = "0.1.0"
description = "PromethAI helps with nutrition choices and is an AI agent"
authors = ["Vasilije Markovic"]
readme = "README.md"
[tool.poetry.dependencies]
python = "^3.10"
#langchain = {git = "https://github.com/topoteretes/langchain.git" , tag = "v0.0.209"}
langchain = "v0.0.250"
nltk = "3.8.1"
openai = "0.27.8"
pinecone-client = "2.2.2"
python-dotenv = "1.0.0"
pyyaml = "6.0"
fastapi = "0.98.0"
uvicorn = "0.22.0"
googlemaps = "4.10.0"
jinja2 = "3.1.2"
replicate = "^0.8.4"
pexpect = "^4.8.0"
selenium = "^4.9.0"
playwright = "^1.32.1"
pytest-playwright = "^0.3.3"
boto3 = "^1.26.125"
gptcache = "^0.1.22"
redis = "^4.5.5"
gunicorn = "^20.1.0"
tiktoken = "^0.4.0"
google-search-results = "^2.4.2"
spacy = "^3.5.3"
python-jose = "^3.3.0"
pypdf = "^3.12.0"
fastjsonschema = "^2.18.0"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
================================================
FILE: test_api.py
================================================
import pytest
from fastapi.testclient import TestClient
from llm_chains.chains import Agent
from api import app
client = TestClient(app)
@pytest.fixture(scope="session")
def agent():
yield Agent()
class TestRoutes:
def test_root(self):
response = client.get("/")
assert response.status_code == 200
assert response.json() == {"message": "Hello, World, I am alive!"}
class TestRoutes:
def test_root(self):
response = client.get("/")
assert response.status_code == 200
assert response.json() == {"message": "Hello, World, I am alive!"}
def test_health_check(self):
response = client.get("/health")
assert response.status_code == 200
assert response.json() == {"status": "OK"}
def test_prompt_to_choose_meal_tree(self):
payload = {
"payload": {
"user_id": "657",
"session_id": "456",
"model_speed": "slow",
"prompt": "I want to eat healthy",
}
}
response = client.post("/prompt-to-choose-meal-tree", json=payload)
assert response.status_code == 200
response_body = response.json()
# Check that the response structure is correct
assert "response" in response_body
assert "results" in response_body["response"]
def test_prompt_to_decompose_meal_tree_categories(self):
payload = {
"payload": {
"user_id": "659",
"session_id": "458",
"model_speed": "slow",
"prompt_struct": "taste=Helsinki;health=Helsinki;cost=Helsinki",
}
}
response = client.post(
"/prompt-to-decompose-meal-tree-categories", json=payload
)
assert response.status_code == 200
response_body = response.json()
# Check that the response structure is correct
assert "response" in response_body
assert "category" in response_body["response"]
assert "options" in response_body["response"]
# Check that the main category is 'location'
assert response_body["response"]["category"] == "location"
# Check that the options are correct
options = response_body["response"]["options"]
assert len(options) == 3 # There should be 3 options
# Check that each option has a 'category' and 'options'
for option in options:
assert "category" in option
assert "options" in option
# Check that each sub-option has a 'category'
for sub_option in option["options"]:
assert "category" in sub_option
if __name__ == "__main__":
pytest.main()
================================================
FILE: utils/load_prod_redis.py
================================================
import requests
import json
from itertools import combinations
# Define the endpoint URL
endpoint_url = "http://0.0.0.0:8000/prompt-to-decompose-meal-tree-categories"
# Define the meal choice factors
meal_choice_factors = [
"taste",
"health",
"cost",
"cuisine",
"hunger",
"availability",
"diet",
"allergies",
"time",
"mood",
"calories",
]
meal_choice_factors.sort()
print("Factors used for sorting: ", meal_choice_factors)
# Define the payload template
payload_template =
gitextract_hp6s2jka/
├── .github/
│ ├── actions/
│ │ └── image_builder/
│ │ └── action.yaml
│ └── workflows/
│ ├── cd.yaml
│ ├── cd_prd.yaml
│ └── ci.yaml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── Dockerfile
├── LICENSE.md
├── README.md
├── act.env.example
├── agent.py
├── api.py
├── assistant_templates.yaml
├── auth/
│ ├── auth.py
│ ├── auth_utils.py
│ └── cognito/
│ └── JWTBearer.py
├── bin/
│ └── dockerize
├── bots/
│ ├── __init__.py
│ ├── bot_extension.py
│ ├── bot_loading_util.py
│ ├── ff.yaml
│ └── mam.tf
├── docker-compose.yml
├── entrypoint.sh
├── examples/
│ ├── level_1/
│ │ ├── level_1_pdf_vectorstore_dlt_etl.py
│ │ └── ticket_schema.json
│ ├── level_2/
│ │ ├── Dockerfile
│ │ ├── Readme.md
│ │ ├── api.py
│ │ ├── docker-compose.yml
│ │ ├── entrypoint.sh
│ │ ├── level_2_pdf_vectorstore__dlt_contracts.py
│ │ ├── pyproject.toml
│ │ └── schema_registry/
│ │ └── ticket_schema.json
│ └── simple_ETLs.py
├── extensions/
│ ├── __init__.py
│ ├── argparseext.py
│ └── dotenvext.py
├── fetch_secret.py
├── fixtures/
│ ├── choose_meal_tree_response.json
│ ├── goal_response.json
│ ├── recipe_response.json
│ ├── subgoal_response.json
│ └── update_meal_tree_response.json
├── food_scrapers/
│ └── wolt_tool.py
├── heuristic_experience_orchestrator/
│ ├── README.md
│ └── task_identification.py
├── initdb/
│ └── init.sql
├── llm_chains/
│ ├── __init__.py
│ └── chains.py
├── pyproject.toml
├── test_api.py
├── utils/
│ ├── load_prod_redis.py
│ └── utils.py
└── validations/
├── defaults/
│ ├── categories_defaults.json
│ └── categories_input_defaults.json
└── schema/
├── decompose_categories.json
└── decompose_categories_input.json
SYMBOL INDEX (128 symbols across 20 files)
FILE: agent.py
function get_ada_embedding (line 39) | def get_ada_embedding(text):
FILE: api.py
class Payload (line 38) | class Payload(BaseModel):
class ImageResponse (line 40) | class ImageResponse(BaseModel):
function str_to_bool (line 46) | def str_to_bool(s):
function root (line 55) | async def root():
function health_check (line 62) | def health_check():
function clear_cache (line 93) | async def clear_cache(request_data: Payload) -> dict:
function prompt_to_correct_grammar (line 113) | async def prompt_to_correct_grammar(request_data: Payload) -> dict:
function prompt_to_choose_meal_tree (line 145) | async def prompt_to_choose_meal_tree(request_data: Payload) -> dict:
function create_endpoint (line 181) | def create_endpoint(category: str, solution_type: str, prompt: str, json...
function prompt_to_decompose_meal_tree_categories (line 270) | async def prompt_to_decompose_meal_tree_categories(request_data: Payload...
function prompt_to_correct_grammar (line 296) | async def prompt_to_correct_grammar(request_data: Payload) -> dict:
function fetch_user_summary (line 308) | async def fetch_user_summary(request_data: Payload) -> dict:
function recipe_request (line 318) | async def recipe_request(request_data: Payload) -> dict:
function solution_name_request (line 336) | async def solution_name_request(request_data: Payload) -> dict:
function start_api_server (line 372) | def start_api_server(host: str = "0.0.0.0", port: int = 8000):
FILE: auth/auth.py
function get_current_user (line 29) | async def get_current_user(
FILE: auth/auth_utils.py
function authenticate_and_get_token (line 43) | def authenticate_and_get_token(username: str, password: str,
FILE: auth/cognito/JWTBearer.py
class JWKS (line 14) | class JWKS(BaseModel):
class JWTAuthorizationCredentials (line 18) | class JWTAuthorizationCredentials(BaseModel):
class JWTBearer (line 26) | class JWTBearer(HTTPBearer):
method __init__ (line 27) | def __init__(self, jwks: JWKS, auto_error: bool = True):
method verify_jwk_token (line 32) | def verify_jwk_token(self, jwt_credentials: JWTAuthorizationCredential...
method __call__ (line 45) | async def __call__(self, request: Request) -> Optional[JWTAuthorizatio...
FILE: bots/bot_extension.py
class AppAgent (line 10) | class AppAgent(App, Agent):
method __init__ (line 11) | def __init__(self, db=None, ef=None, table_name=None, user_id: Optiona...
FILE: bots/bot_loading_util.py
function _load_extension (line 8) | def _load_extension( object_type:str, object_value:str):
FILE: examples/level_1/level_1_pdf_vectorstore_dlt_etl.py
function _convert_pdf_to_document (line 31) | def _convert_pdf_to_document(path: str = None):
function _init_weaviate (line 68) | def _init_weaviate():
function load_to_weaviate (line 89) | def load_to_weaviate(document_path=None):
function get_from_weaviate (line 98) | def get_from_weaviate(query=None, path=None, operator=None, valueText=No...
function delete_from_weaviate (line 138) | def delete_from_weaviate(query=None, filters=None):
function infer_schema_from_text (line 175) | def infer_schema_from_text(text: str):
function set_data_contract (line 194) | def set_data_contract(data, version, date, agreement_id=None, privacy_po...
function create_id_dict (line 217) | def create_id_dict(memory_id=None, st_memory_id=None, buffer_id=None):
function init_buffer (line 238) | def init_buffer(data, version, date, memory_id=None, st_memory_id=None, ...
function infer_properties_from_text (line 260) | def infer_properties_from_text(text: str):
function load_json_or_infer_schema (line 281) | def load_json_or_infer_schema(file_path, document_path):
function ai_function (line 297) | def ai_function(prompt=None, json_schema=None):
function higher_level_thinking (line 321) | def higher_level_thinking():
function process_higher_level_thinking (line 333) | def process_higher_level_thinking(result=None):
function main (line 346) | def main(raw_loading, processed_loading,document_paths):
FILE: examples/level_2/api.py
class Payload (line 39) | class Payload(BaseModel):
class ImageResponse (line 41) | class ImageResponse(BaseModel):
function root (line 49) | async def root():
function health_check (line 56) | def health_check():
function upload_pdf_and_payload (line 70) | async def upload_pdf_and_payload(
function start_api_server (line 157) | def start_api_server(host: str = "0.0.0.0", port: int = 8000):
FILE: examples/level_2/level_2_pdf_vectorstore__dlt_contracts.py
class VectorDB (line 49) | class VectorDB:
method __init__ (line 50) | def __init__(self, user_id: str, index_name: str, memory_id:str, ltm_m...
method init_pinecone (line 66) | def init_pinecone(self, index_name):
method init_weaviate (line 80) | def init_weaviate(self, namespace:str):
method add_memories (line 101) | def add_memories(self, observation: str, page: str = "", source: str ...
method fetch_memories (line 151) | def fetch_memories(self, observation: str, params = None):
method delete_memories (line 183) | def delete_memories(self, params: None):
method update_memories (line 199) | def update_memories(self):
class SemanticMemory (line 204) | class SemanticMemory:
method __init__ (line 205) | def __init__(self, user_id: str, memory_id:str, ltm_memory_id:str, ind...
method _update_memories (line 218) | def _update_memories(self ,memory_id:str="None", semantic_memory: str=...
method _fetch_memories (line 228) | def _fetch_memories(self, observation: str,params) -> dict[str, str] |...
class LongTermMemory (line 240) | class LongTermMemory:
method __init__ (line 241) | def __init__(self, user_id: str = "676", memory_id:str=None, index_nam...
class ShortTermMemory (line 251) | class ShortTermMemory:
method __init__ (line 252) | def __init__(self, user_id: str = "676", memory_id:str=None, index_nam...
class EpisodicBuffer (line 264) | class EpisodicBuffer:
method __init__ (line 265) | def __init__(self, user_id: str = "676", memory_id:str=None, index_nam...
method infer_schema_from_text (line 300) | async def infer_schema_from_text(self, text: str):
method main_buffer (line 318) | def main_buffer(self, user_input=None):
class Memory (line 452) | class Memory:
method __init__ (line 455) | def __init__(self, user_id: str = "676", index_name: str = None, knowl...
method _update_semantic_memory (line 468) | def _update_semantic_memory(self, semantic_memory:str):
method _fetch_semantic_memory (line 475) | def _fetch_semantic_memory(self, observation, params):
method _run_buffer (line 483) | def _run_buffer(self, user_input:str):
FILE: examples/simple_ETLs.py
function ai_function (line 26) | def ai_function():
FILE: extensions/argparseext.py
function parse_dotenv_extensions (line 8) | def parse_dotenv_extensions(argv):
function parse_arguments (line 33) | def parse_arguments():
FILE: extensions/dotenvext.py
function load_dotenv_extensions (line 4) | def load_dotenv_extensions(dotenv_files):
FILE: fetch_secret.py
function fetch_secret (line 12) | def fetch_secret(secret_name, region_name, env_file_path):
FILE: food_scrapers/wolt_tool.py
function find_and_click_by_attributes (line 4) | async def find_and_click_by_attributes(page, attributes):
function enter_zipcode_and_press_enter (line 12) | async def enter_zipcode_and_press_enter(page, zipcode):
function run (line 19) | async def run(playwright, zipcode: str, prompt: str):
function main (line 53) | async def main(prompt: str, zipcode: str):
FILE: heuristic_experience_orchestrator/task_identification.py
class TaskIdentificationChain (line 5) | class TaskIdentificationChain(LLMChain):
method from_llm (line 9) | def from_llm(
FILE: initdb/init.sql
type pinecone_id (line 1) | CREATE TABLE pinecone_id (
FILE: llm_chains/chains.py
class Agent (line 81) | class Agent:
method __init__ (line 94) | def __init__(
method clear_cache (line 147) | def clear_cache(self):
method set_user_session (line 150) | def set_user_session(self, user_id: str, session_id: str) -> None:
method get_ada_embedding (line 154) | def get_ada_embedding(self, text):
method init_pinecone (line 160) | def init_pinecone(self, index_name):
method _simple_test (line 167) | def _simple_test(self):
method tiktoken_len (line 180) | def tiktoken_len(self, text):
method _update_memories (line 207) | def _update_memories(self, observation: str, namespace: str, page: str...
class FetchMemories (line 234) | class FetchMemories(BaseModel):
method _fetch_memories (line 239) | def _fetch_memories(self, observation: str, namespace: str) -> dict[st...
method _compute_agent_summary (line 264) | def _compute_agent_summary(self, model_speed: str):
method update_agent_preferences (line 301) | def update_agent_preferences(self, preferences: str):
method update_agent_taboos (line 327) | def update_agent_taboos(self, dislikes: str):
method update_agent_traits (line 350) | def update_agent_traits(self, traits: str):
method update_agent_summary (line 370) | def update_agent_summary(self, model_speed):
method prompt_correction (line 375) | def prompt_correction(self, prompt_source: str, model_speed: str):
method solution_generation (line 390) | async def solution_generation(self, prompt: str, prompt_template: str ...
method solution_name_generation (line 447) | async def solution_name_generation(self, prompt: str, prompt_template:...
method extract_json (line 487) | def extract_json(self, data):
method async_generate (line 496) | async def async_generate(
method generate_concurrently (line 576) | async def generate_concurrently(self, base_prompt, assistant_category,...
method _loader (line 657) | def _loader(self, path: str, namespace: str):
method _process_pref (line 669) | def _process_pref(self, data):
method prompt_to_choose_tree (line 697) | def prompt_to_choose_tree(self, prompt: str, model_speed: str, assista...
method prompt_decompose_to_tree_categories (line 945) | async def prompt_decompose_to_tree_categories(
method prompt_to_update_meal_tree (line 964) | def prompt_to_update_meal_tree(
method extract_info (line 1017) | def extract_info(self, s: str):
method restaurant_generation (line 1030) | async def restaurant_generation(self, prompt: str, prompt_template: st...
method delivery_generation (line 1061) | async def delivery_generation(self, prompt: str, zipcode: str, model_s...
method add_zapier_calendar_action (line 1083) | def add_zapier_calendar_action(self, prompt_base, token, model_speed: ...
method voice_text_input (line 1113) | def voice_text_input(self, query: str, model_speed: str):
method fetch_user_summary (line 1175) | def fetch_user_summary(self, model_speed: str):
method _retrieve_summary (line 1181) | def _retrieve_summary(self):
FILE: test_api.py
function agent (line 10) | def agent():
class TestRoutes (line 14) | class TestRoutes:
method test_root (line 15) | def test_root(self):
class TestRoutes (line 20) | class TestRoutes:
method test_root (line 21) | def test_root(self):
method test_health_check (line 26) | def test_health_check(self):
method test_prompt_to_choose_meal_tree (line 31) | def test_prompt_to_choose_meal_tree(self):
method test_prompt_to_decompose_meal_tree_categories (line 48) | def test_prompt_to_decompose_meal_tree_categories(self):
FILE: utils/utils.py
function openai_call (line 6) | def openai_call(
Condensed preview — 58 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (220K chars).
[
{
"path": ".github/actions/image_builder/action.yaml",
"chars": 1511,
"preview": "name: 'Build Docker images for PromethAI'\ndescription: 'Build PromethAI-related Docker images and push to the Docker reg"
},
{
"path": ".github/workflows/cd.yaml",
"chars": 2527,
"preview": "name: Publishing promethai-backend Docker image\n\non:\n push:\n branches:\n - dev\n - feature/*\n paths-ignor"
},
{
"path": ".github/workflows/cd_prd.yaml",
"chars": 3522,
"preview": "on:\n push:\n branches:\n - main\n paths-ignore:\n - '**.md'\n - 'examples/**'\nname: Publishing promet"
},
{
"path": ".github/workflows/ci.yaml",
"chars": 870,
"preview": "name: Test build docker image for PromethAI backend app\n\non: pull_request\n\nenv:\n AWS_ACCOUNT_ID_DEV: \"463722570299\"\n\njo"
},
{
"path": ".gitignore",
"chars": 29,
"preview": ".env\n__pycache__\ntst.py\n.idea"
},
{
"path": "CODE_OF_CONDUCT.md",
"chars": 1804,
"preview": "# Code of Conduct for Auto-GPT\n\n## 1. Purpose\n\nThe purpose of this Code of Conduct is to provide guidelines for contribu"
},
{
"path": "Dockerfile",
"chars": 886,
"preview": "\nFROM python:3.11-slim\n\n# Set build argument\nARG API_ENABLED\n\n# Set environment variable based on the build argument\nENV"
},
{
"path": "LICENSE.md",
"chars": 1068,
"preview": "MIT License\n\nCopyright (c) 2023 topoteretes\n\nPermission is hereby granted, free of charge, to any person obtaining a cop"
},
{
"path": "README.md",
"chars": 11416,
"preview": "# PromethAI\n\n\n<p align=\"center\">\n <a href=\"https://prometh.ai//#gh-light-mode-only\">\n <img src=\"assets/topoteretes_l"
},
{
"path": "act.env.example",
"chars": 71,
"preview": "AWS_ACCESS_KEY_ID=\nAWS_SECRET_ACCESS_KEY=\nAWS_DEFAULT_REGION=eu-west-1\n"
},
{
"path": "agent.py",
"chars": 1077,
"preview": "import openai\nimport os\nimport pinecone\n\nfrom dotenv import load_dotenv\nimport nltk\nfrom langchain.text_splitter import "
},
{
"path": "api.py",
"chars": 14778,
"preview": "from llm_chains.chains import Agent\nfrom fastapi import FastAPI\nfrom fastapi.responses import JSONResponse\nfrom pydantic"
},
{
"path": "assistant_templates.yaml",
"chars": 3089,
"preview": "assistant:\n categories:\n - name: food\n solution_types:\n - name: recipe\n prompt: |-\n "
},
{
"path": "auth/auth.py",
"chars": 1135,
"preview": "import os\n\nimport requests\nfrom dotenv import load_dotenv\nfrom fastapi import Depends, HTTPException\nfrom starlette.stat"
},
{
"path": "auth/auth_utils.py",
"chars": 1567,
"preview": "\nfrom cognito.JWTBearer import JWKS, JWTBearer, JWTAuthorizationCredentials\n\nimport requests\n\nregion = \"eu-west-1\"\nuser_"
},
{
"path": "auth/cognito/JWTBearer.py",
"chars": 2451,
"preview": "from typing import Dict, Optional, List\n\nfrom fastapi import HTTPException\nfrom fastapi.security import HTTPBearer, HTTP"
},
{
"path": "bin/dockerize",
"chars": 1294,
"preview": "set -euo pipefail\n\nAWS_REGION=${region:-eu-west-1}\nAWS_DEPLOYMENT_ACCOUNT=${account:-463722570299}\nAWS_REPOSITORY=${repo"
},
{
"path": "bots/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "bots/bot_extension.py",
"chars": 818,
"preview": "import sys\nfrom typing import Optional\n\nsys.path.append('../llm_chains')\n# from embedchain import EmbedChain\n\nfrom llm_c"
},
{
"path": "bots/bot_loading_util.py",
"chars": 659,
"preview": "from bot_extension import AppAgent\nimport yaml\nimport sys\nsys.path.append('../')\nwith open('../assistant_templates.yaml"
},
{
"path": "bots/ff.yaml",
"chars": 6617,
"preview": "Resources:\n ChromaInstance:\n Type: 'AWS::AccessAnalyzer::Analyzer'\n Properties:\n ImageId: !FindInMap\n "
},
{
"path": "bots/mam.tf",
"chars": 104,
"preview": "// Existing Terraform src code found at /var/folders/d_/1x0yyl7n5g5cc8vlgchdr06m0000gn/T/terraform_src.\n"
},
{
"path": "docker-compose.yml",
"chars": 846,
"preview": "version: \"3.9\"\n\nservices:\n promethai:\n networks:\n - agi_backend\n depends_on:\n - db_agi\n - redis\n "
},
{
"path": "entrypoint.sh",
"chars": 191,
"preview": "#!/bin/bash\nexport ENVIRONMENT\npython fetch_secret.py\n\n# Start Gunicorn\ngunicorn -w 2 -k uvicorn.workers.UvicornWorker -"
},
{
"path": "examples/level_1/level_1_pdf_vectorstore_dlt_etl.py",
"chars": 13566,
"preview": "#Make sure to install the following packages: dlt, langchain, duckdb, python-dotenv, openai, weaviate-client\n\nimport dlt"
},
{
"path": "examples/level_1/ticket_schema.json",
"chars": 4331,
"preview": "{\n \"$schema\": \"http://json-schema.org/draft-07/schema#\",\n \"type\": \"object\",\n \"properties\": {\n \"ticketType\": {\n "
},
{
"path": "examples/level_2/Dockerfile",
"chars": 885,
"preview": "\nFROM python:3.11-slim\n\n# Set build argument\nARG API_ENABLED\n\n# Set environment variable based on the build argument\nENV"
},
{
"path": "examples/level_2/Readme.md",
"chars": 923,
"preview": "## PromethAI Memory Manager\n\n\n\n### Description\n\n\nInitial code lets you do three operations:\n\n1. Add to memory\n2. Retriev"
},
{
"path": "examples/level_2/api.py",
"chars": 5174,
"preview": "from langchain.document_loaders import PyPDFLoader\n\nfrom level_2_pdf_vectorstore__dlt_contracts import ShortTermMemory\nf"
},
{
"path": "examples/level_2/docker-compose.yml",
"chars": 402,
"preview": "version: \"3.9\"\n\nservices:\n promethai_mem:\n networks:\n - promethai_mem_backend\n build:\n context: ./\n "
},
{
"path": "examples/level_2/entrypoint.sh",
"chars": 192,
"preview": "#!/bin/bash\nexport ENVIRONMENT\n#python fetch_secret.py\n\n# Start Gunicorn\ngunicorn -w 2 -k uvicorn.workers.UvicornWorker "
},
{
"path": "examples/level_2/level_2_pdf_vectorstore__dlt_contracts.py",
"chars": 20225,
"preview": "#Make sure to install the following packages: dlt, langchain, duckdb, python-dotenv, openai, weaviate-client\n\nimport dlt"
},
{
"path": "examples/level_2/pyproject.toml",
"chars": 990,
"preview": "[tool.poetry]\nname = \"PromethAI_memory\"\nversion = \"0.1.0\"\ndescription = \"PromethAI memory manager\"\nauthors = [\"Vasilije "
},
{
"path": "examples/level_2/schema_registry/ticket_schema.json",
"chars": 4331,
"preview": "{\n \"$schema\": \"http://json-schema.org/draft-07/schema#\",\n \"type\": \"object\",\n \"properties\": {\n \"ticketType\": {\n "
},
{
"path": "examples/simple_ETLs.py",
"chars": 2193,
"preview": "#note, you need to install dlt, langchain, and duckdb\n#pip install dlt\n#pip install langchain\n#pip install duckdb\n#pip i"
},
{
"path": "extensions/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "extensions/argparseext.py",
"chars": 3640,
"preview": "import os\nimport sys\nimport argparse\n\n\n# Extract the env filenames in the -e flag only\n# Ignore any other arguments\ndef "
},
{
"path": "extensions/dotenvext.py",
"chars": 145,
"preview": "from dotenv import load_dotenv\n\n\ndef load_dotenv_extensions(dotenv_files):\n for dotenv_file in dotenv_files:\n "
},
{
"path": "fetch_secret.py",
"chars": 1929,
"preview": "import os\nfrom dotenv import load_dotenv\nfrom api import start_api_server\n\n# API_ENABLED = os.environ.get(\"API_ENABLED\","
},
{
"path": "fixtures/choose_meal_tree_response.json",
"chars": 4863,
"preview": "{\n \"prompt\": \"I’d like a quick veggie meal under $25 near me. No peanuts, I’m allergic.\",\n \"tree\": [\n {"
},
{
"path": "fixtures/goal_response.json",
"chars": 96,
"preview": "{\n \"goals\": [\n \"Cuisine\",\n \"Healthy\",\n \"Budget\",\n \"Taste\"\n ]\n}"
},
{
"path": "fixtures/recipe_response.json",
"chars": 2380,
"preview": "{\n \"recipes\": [\n {\n \"title\": \"Vegetable Stir Fry\",\n \"rating\": 90,\n \"prep_time"
},
{
"path": "fixtures/subgoal_response.json",
"chars": 1466,
"preview": "{\n \"sub_goals\": [\n {\n \"goal_name\": \"Budget\",\n \"sub_goals\": [\n {\n \"name\": \"Cheap\",\n "
},
{
"path": "fixtures/update_meal_tree_response.json",
"chars": 1463,
"preview": "\"{'prompt': 'I would like a quick veggie meal under 25$ near me.', 'tree': [{'category': 'price', 'options': [{'category"
},
{
"path": "food_scrapers/wolt_tool.py",
"chars": 2101,
"preview": "from playwright.async_api import async_playwright, Playwright\n\n\nasync def find_and_click_by_attributes(page, attributes)"
},
{
"path": "heuristic_experience_orchestrator/README.md",
"chars": 1641,
"preview": "# Heuristic Orchestration Chain\n\nThe chain is meant to operate various agents that have a predetermined set of goals the"
},
{
"path": "heuristic_experience_orchestrator/task_identification.py",
"chars": 1728,
"preview": "from langchain import LLMChain, PromptTemplate\nfrom langchain.llms import BaseLLM\n\n\nclass TaskIdentificationChain(LLMCha"
},
{
"path": "initdb/init.sql",
"chars": 81,
"preview": "CREATE TABLE pinecone_id (\n id SERIAL PRIMARY KEY,\n user_id INTEGER NOT NULL\n);"
},
{
"path": "llm_chains/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "llm_chains/chains.py",
"chars": 55683,
"preview": "from langchain.document_loaders import PyPDFLoader\n\n\nimport pinecone\nfrom datetime import datetime, timedelta\nfrom typin"
},
{
"path": "pyproject.toml",
"chars": 927,
"preview": "[tool.poetry]\nname = \"promethAI\"\nversion = \"0.1.0\"\ndescription = \"PromethAI helps with nutrition choices and is an AI ag"
},
{
"path": "test_api.py",
"chars": 2841,
"preview": "import pytest\nfrom fastapi.testclient import TestClient\nfrom llm_chains.chains import Agent\nfrom api import app\n\nclient "
},
{
"path": "utils/load_prod_redis.py",
"chars": 1630,
"preview": "import requests\nimport json\nfrom itertools import combinations\n\n# Define the endpoint URL\n\nendpoint_url = \"http://0.0.0."
},
{
"path": "utils/utils.py",
"chars": 1886,
"preview": "import time\nimport subprocess\nimport openai\n\n\ndef openai_call(\n self,\n prompt: str,\n model: str = None,\n tem"
},
{
"path": "validations/defaults/categories_defaults.json",
"chars": 4950,
"preview": " {\"response\":{\n \"results\": [\n {\n \"category\": \"Nutritional Content\",\n \"op"
},
{
"path": "validations/defaults/categories_input_defaults.json",
"chars": 2184,
"preview": "{\n \"response\": {\n \"results\": [\n {\n \"category\": \"Nutritional Content\",\n "
},
{
"path": "validations/schema/decompose_categories.json",
"chars": 918,
"preview": "{\n \"$schema\": \"http://json-schema.org/draft-07/schema#\",\n \"type\": \"object\",\n \"properties\": {\n \"response\""
},
{
"path": "validations/schema/decompose_categories_input.json",
"chars": 1564,
"preview": "{\n \"$schema\": \"http://json-schema.org/draft-07/schema#\",\n \"type\": \"object\",\n \"properties\": {\n \"response\""
}
]
About this extraction
This page contains the full source code of the topoteretes/PromethAI-Backend GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 58 files (200.8 KB), approximately 46.6k tokens, and a symbol index with 128 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.